diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt index 03756c208acdc5..2b324674c81f26 100644 --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -14,8 +14,11 @@ if (NOT DEFINED CLANGD_BUILD_XPC) unset(CLANGD_BUILD_XPC_DEFAULT) endif () -llvm_canonicalize_cmake_booleans(CLANGD_BUILD_XPC) -llvm_canonicalize_cmake_booleans(CLANGD_ENABLE_REMOTE) +llvm_canonicalize_cmake_booleans( + CLANGD_BUILD_XPC + CLANGD_ENABLE_REMOTE + LLVM_ENABLE_ZLIB +) configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/Features.inc.in diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def index f35a49b681ccc6..78ce77043b6f58 100644 --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -738,6 +738,8 @@ MMA_BUILTIN(pmxvbf16ger2pp, "vW512*VVi15i15i3", true) MMA_BUILTIN(pmxvbf16ger2pn, "vW512*VVi15i15i3", true) MMA_BUILTIN(pmxvbf16ger2np, "vW512*VVi15i15i3", true) MMA_BUILTIN(pmxvbf16ger2nn, "vW512*VVi15i15i3", true) +MMA_BUILTIN(lxvp, "W256SLLiW256C*", false) +MMA_BUILTIN(stxvp, "vW256SLLiW256C*", false) // FIXME: Obviously incomplete. diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index 29c4f15e57b095..94491e45b55b74 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -1450,8 +1450,6 @@ class ASTReader void Error(StringRef Msg) const; void Error(unsigned DiagID, StringRef Arg1 = StringRef(), StringRef Arg2 = StringRef(), StringRef Arg3 = StringRef()) const; - void Error(unsigned DiagID, StringRef Arg1, StringRef Arg2, - unsigned Select) const; void Error(llvm::Error &&Err) const; public: diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 0b8259221d8f53..0ea149e0cbde77 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -14776,6 +14776,19 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, break; #include "clang/Basic/BuiltinsPPC.def" } + if (BuiltinID == PPC::BI__builtin_mma_lxvp || + BuiltinID == PPC::BI__builtin_mma_stxvp) { + if (BuiltinID == PPC::BI__builtin_mma_lxvp) { + Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); + Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]); + } else { + Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy); + Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]); + } + Ops.pop_back(); + llvm::Function *F = CGM.getIntrinsic(ID); + return Builder.CreateCall(F, Ops, ""); + } SmallVector CallOps; if (Accumulate) { Address Addr = EmitPointerWithAlignment(E->getArg(0)); diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index 68ee29fd988bb9..0a19c0bc243d69 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -1593,7 +1593,8 @@ void Parser::ProhibitCXX11Attributes(ParsedAttributesWithRange &Attrs, if (!AL.isCXX11Attribute() && !AL.isC2xAttribute()) continue; if (AL.getKind() == ParsedAttr::UnknownAttribute) - Diag(AL.getLoc(), diag::warn_unknown_attribute_ignored) << AL; + Diag(AL.getLoc(), diag::warn_unknown_attribute_ignored) + << AL << AL.getRange(); else { Diag(AL.getLoc(), DiagID) << AL; AL.setInvalid(); diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index a2df339151fb43..a2b7e8dbf57c27 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -2055,7 +2055,8 @@ bool Sema::CheckAttrNoArgs(const ParsedAttr &Attrs) { bool Sema::CheckAttrTarget(const ParsedAttr &AL) { // Check whether the attribute is valid on the current target. if (!AL.existsInTarget(Context.getTargetInfo())) { - Diag(AL.getLoc(), diag::warn_unknown_attribute_ignored) << AL; + Diag(AL.getLoc(), diag::warn_unknown_attribute_ignored) + << AL << AL.getRange(); AL.setInvalid(); return true; } @@ -7362,7 +7363,7 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, AL.isDeclspecAttribute() ? (unsigned)diag::warn_unhandled_ms_attribute_ignored : (unsigned)diag::warn_unknown_attribute_ignored) - << AL; + << AL << AL.getRange(); return; } diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index c49e9cab6d63c8..c90a71626ea7be 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -2612,7 +2612,7 @@ Sema::ActOnBaseSpecifier(Decl *classdecl, SourceRange SpecifierRange, Diag(AL.getLoc(), AL.getKind() == ParsedAttr::UnknownAttribute ? (unsigned)diag::warn_unknown_attribute_ignored : (unsigned)diag::err_base_specifier_attribute) - << AL; + << AL << AL.getRange(); } TypeSourceInfo *TInfo = nullptr; diff --git a/clang/lib/Sema/SemaStmtAttr.cpp b/clang/lib/Sema/SemaStmtAttr.cpp index 214952e914ace0..58f4d0dc1944fb 100644 --- a/clang/lib/Sema/SemaStmtAttr.cpp +++ b/clang/lib/Sema/SemaStmtAttr.cpp @@ -409,7 +409,7 @@ static Attr *ProcessStmtAttribute(Sema &S, Stmt *St, const ParsedAttr &A, S.Diag(A.getLoc(), A.isDeclspecAttribute() ? (unsigned)diag::warn_unhandled_ms_attribute_ignored : (unsigned)diag::warn_unknown_attribute_ignored) - << A; + << A << A.getRange(); return nullptr; case ParsedAttr::AT_FallThrough: return handleFallThroughAttr(S, St, A, Range); diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 3203e1365ee1c9..7f18dc77762fd0 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -8091,7 +8091,7 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type, if (attr.isCXX11Attribute() && TAL == TAL_DeclChunk) state.getSema().Diag(attr.getLoc(), diag::warn_unknown_attribute_ignored) - << attr; + << attr << attr.getRange(); break; case ParsedAttr::IgnoredAttribute: diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index f3ecb1e51368bf..6e09fa464940a1 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -1246,12 +1246,6 @@ void ASTReader::Error(unsigned DiagID, StringRef Arg1, StringRef Arg2, Diag(DiagID) << Arg1 << Arg2 << Arg3; } -void ASTReader::Error(unsigned DiagID, StringRef Arg1, StringRef Arg2, - unsigned Select) const { - if (!Diags.isDiagnosticInFlight()) - Diag(DiagID) << Arg1 << Arg2 << Select; -} - void ASTReader::Error(llvm::Error &&Err) const { Error(toString(std::move(Err))); } @@ -2395,7 +2389,7 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) { auto FileChange = HasInputFileChanged(); // For an overridden file, there is nothing to validate. if (!Overridden && FileChange != ModificationType::None) { - if (Complain) { + if (Complain && !Diags.isDiagnosticInFlight()) { // Build a list of the PCH imports that got us here (in reverse). SmallVector ImportStack(1, &F); while (!ImportStack.back()->ImportedBy.empty()) @@ -2406,17 +2400,17 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) { unsigned DiagnosticKind = moduleKindForDiagnostic(ImportStack.back()->Kind); if (DiagnosticKind == 0) - Error(diag::err_fe_pch_file_modified, Filename, TopLevelPCHName, - (unsigned)FileChange); + Diag(diag::err_fe_pch_file_modified) + << Filename << TopLevelPCHName << FileChange; else if (DiagnosticKind == 1) - Error(diag::err_fe_module_file_modified, Filename, TopLevelPCHName, - (unsigned)FileChange); + Diag(diag::err_fe_module_file_modified) + << Filename << TopLevelPCHName << FileChange; else - Error(diag::err_fe_ast_file_modified, Filename, TopLevelPCHName, - (unsigned)FileChange); + Diag(diag::err_fe_ast_file_modified) + << Filename << TopLevelPCHName << FileChange; // Print the import stack. - if (ImportStack.size() > 1 && !Diags.isDiagnosticInFlight()) { + if (ImportStack.size() > 1) { Diag(diag::note_pch_required_by) << Filename << ImportStack[0]->FileName; for (unsigned I = 1; I < ImportStack.size(); ++I) @@ -2424,8 +2418,7 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) { << ImportStack[I-1]->FileName << ImportStack[I]->FileName; } - if (!Diags.isDiagnosticInFlight()) - Diag(diag::note_pch_rebuild_required) << TopLevelPCHName; + Diag(diag::note_pch_rebuild_required) << TopLevelPCHName; } IsOutOfDate = true; diff --git a/clang/test/CodeGen/builtins-ppc-mma.c b/clang/test/CodeGen/builtins-ppc-mma.c index 820f72653876b7..88ca36aa67141d 100644 --- a/clang/test/CodeGen/builtins-ppc-mma.c +++ b/clang/test/CodeGen/builtins-ppc-mma.c @@ -1036,3 +1036,162 @@ void test65(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns __builtin_mma_pmxvbf16ger2nn(&vq, vc, vc, 0, 0, 0); *((__vector_quad *)resp) = vq; } + +// CHECK-LABEL: @test66( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8* +// CHECK-NEXT: tail call void @llvm.ppc.mma.stxvp(<256 x i1> [[TMP1]], i8* [[TMP2]]) +// CHECK-NEXT: ret void +// +void test66(const __vector_pair *vpp, const __vector_pair *vp2) { + __vector_pair vp = __builtin_mma_lxvp(0LL, vpp); + __builtin_mma_stxvp(vp, 0LL, vp2); +} + +// CHECK-LABEL: @test67( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 [[OFFSET:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8* +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 [[OFFSET]] +// CHECK-NEXT: tail call void @llvm.ppc.mma.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) +// CHECK-NEXT: ret void +// +void test67(const __vector_pair *vpp, signed long long offset, const __vector_pair *vp2) { + __vector_pair vp = __builtin_mma_lxvp(offset, vpp); + __builtin_mma_stxvp(vp, offset, vp2); +} + +// CHECK-LABEL: @test68( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 18 +// CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8* +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 18 +// CHECK-NEXT: tail call void @llvm.ppc.mma.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) +// CHECK-NEXT: ret void +// +void test68(const __vector_pair *vpp, const __vector_pair *vp2) { + __vector_pair vp = __builtin_mma_lxvp(18LL, vpp); + __builtin_mma_stxvp(vp, 18LL, vp2); +} + +// CHECK-LABEL: @test69( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 1 +// CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8* +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 1 +// CHECK-NEXT: tail call void @llvm.ppc.mma.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) +// CHECK-NEXT: ret void +// +void test69(const __vector_pair *vpp, const __vector_pair *vp2) { + __vector_pair vp = __builtin_mma_lxvp(1LL, vpp); + __builtin_mma_stxvp(vp, 1LL, vp2); +} + +// CHECK-LABEL: @test70( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 42 +// CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8* +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 42 +// CHECK-NEXT: tail call void @llvm.ppc.mma.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) +// CHECK-NEXT: ret void +// +void test70(const __vector_pair *vpp, const __vector_pair *vp2) { + __vector_pair vp = __builtin_mma_lxvp(42LL, vpp); + __builtin_mma_stxvp(vp, 42LL, vp2); +} + +// CHECK-LABEL: @test71( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr <256 x i1>, <256 x i1>* [[VPP:%.*]], i64 128 +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <256 x i1>* [[TMP0]] to i8* +// CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr <256 x i1>, <256 x i1>* [[VP2:%.*]], i64 128 +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <256 x i1>* [[TMP3]] to i8* +// CHECK-NEXT: tail call void @llvm.ppc.mma.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) +// CHECK-NEXT: ret void +// +void test71(const __vector_pair *vpp, const __vector_pair *vp2) { + __vector_pair vp = __builtin_mma_lxvp(32768LL, vpp); + __builtin_mma_stxvp(vp, 32768LL, vp2); +} + +// CHECK-LABEL: @test72( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 32799 +// CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8* +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 32799 +// CHECK-NEXT: tail call void @llvm.ppc.mma.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) +// CHECK-NEXT: ret void +// +void test72(const __vector_pair *vpp, const __vector_pair *vp2) { + __vector_pair vp = __builtin_mma_lxvp(32799LL, vpp); + __builtin_mma_stxvp(vp, 32799LL, vp2); +} + +// CHECK-LABEL: @test73( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, [[TBAA2:!tbaa !.*]] +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, i8* [[TMP2]], i64 8 +// CHECK-NEXT: [[TMP4:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP3]]) +// CHECK-NEXT: [[TMP5:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP1]], <256 x i1> [[TMP4]], <16 x i8> [[VC:%.*]], i32 0, i32 0) +// CHECK-NEXT: [[TMP6:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-NEXT: store <512 x i1> [[TMP5]], <512 x i1>* [[TMP6]], align 64, [[TBAA2]] +// CHECK-NEXT: ret void +// +void test73(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) { + __vector_quad vq = *((__vector_quad *)vqp); + __vector_pair vp = __builtin_mma_lxvp(8LL, vpp); + __builtin_mma_pmxvf64gernn(&vq, vp, vc, 0, 0); + *((__vector_quad *)resp) = vq; +} + +// CHECK-LABEL: @test74( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-NEXT: [[TMP3:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, [[TBAA2]] +// CHECK-NEXT: ret void +// +void test74(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) { + __vector_quad vq = *((__vector_quad *)vqp); + __vector_pair vp = __builtin_mma_lxvp(0LL, vpp); + __builtin_mma_xvf64gernp(&vq, vp, vc); + *((__vector_quad *)resp) = vq; +} + +// CHECK-LABEL: @test75( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, [[TBAA2:!tbaa !.*]] +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[OFFS:%.*]] +// CHECK-NEXT: [[TMP4:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP3]]) +// CHECK-NEXT: [[TMP5:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP4]], <16 x i8> [[VC:%.*]]) +// CHECK-NEXT: [[TMP6:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-NEXT: store <512 x i1> [[TMP5]], <512 x i1>* [[TMP6]], align 64, [[TBAA2]] +// CHECK-NEXT: ret void +// +void test75(unsigned char *vqp, signed long long offs, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) { + __vector_quad vq = *((__vector_quad *)vqp); + __vector_pair vp = __builtin_mma_lxvp(offs, vpp); + __builtin_mma_xvf64gernp(&vq, vp, vc); + *((__vector_quad *)resp) = vq; +} diff --git a/clang/test/Sema/ppc-mma-types.c b/clang/test/Sema/ppc-mma-types.c index 96644a4d9bbd68..840e34845f58b1 100644 --- a/clang/test/Sema/ppc-mma-types.c +++ b/clang/test/Sema/ppc-mma-types.c @@ -319,3 +319,17 @@ void testVPOperators4(int v, void *ptr) { __vector_pair vp2 = (__vector_pair)vpp; // expected-error {{used type '__vector_pair' where arithmetic or pointer type is required}} } +void testBuiltinTypes1(const __vector_pair *vpp, const __vector_pair *vp2, float f) { + __vector_pair vp = __builtin_mma_lxvp(f, vpp); // expected-error {{passing 'float' to parameter of incompatible type 'long long'}} + __builtin_mma_stxvp(vp, 32799, vp2); // expected-error {{passing 'int' to parameter of incompatible type 'long long'}} +} + +void testBuiltinTypes2(__vector_pair *vpp, const __vector_pair *vp2, unsigned char c) { + __vector_pair vp = __builtin_mma_lxvp(6LL, vpp); // expected-error {{passing '__vector_pair *' to parameter of incompatible type 'const __vector_pair *'}} + __builtin_mma_stxvp(vp, c, vp2); // expected-error {{passing 'unsigned char' to parameter of incompatible type 'long long'}} +} + +void testBuiltinTypes3(vector int v, __vector_pair *vp2, signed long long ll, unsigned short s) { + __vector_pair vp = __builtin_mma_lxvp(ll, v); // expected-error {{passing '__vector int' (vector of 4 'int' values) to parameter of incompatible type 'const __vector_pair *'}} + __builtin_mma_stxvp(vp, ll, s); // expected-error {{passing 'unsigned short' to parameter of incompatible type 'const __vector_pair *'}} +} diff --git a/libcxx/utils/ci/Dockerfile b/libcxx/utils/ci/Dockerfile index 9085573086471e..d9cd5d9b206a05 100644 --- a/libcxx/utils/ci/Dockerfile +++ b/libcxx/utils/ci/Dockerfile @@ -45,7 +45,7 @@ ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y bash curl # Install various tools used by the build or the test suite -RUN apt-get update && apt-get install -y ninja-build python3 python3-sphinx git +RUN apt-get update && apt-get install -y ninja-build python3 python3-sphinx git gdb # Install the most recently released LLVM RUN apt-get update && apt-get install -y lsb-release wget software-properties-common diff --git a/lld/wasm/InputChunks.cpp b/lld/wasm/InputChunks.cpp index f33178f29b3176..06a734b69a9706 100644 --- a/lld/wasm/InputChunks.cpp +++ b/lld/wasm/InputChunks.cpp @@ -97,6 +97,7 @@ void InputChunk::verifyRelocTargets() const { break; case R_WASM_TABLE_INDEX_I64: case R_WASM_MEMORY_ADDR_I64: + case R_WASM_FUNCTION_OFFSET_I64: existingValue = read64le(loc); break; default: @@ -176,6 +177,7 @@ void InputChunk::writeTo(uint8_t *buf) const { break; case R_WASM_TABLE_INDEX_I64: case R_WASM_MEMORY_ADDR_I64: + case R_WASM_FUNCTION_OFFSET_I64: write64le(loc, value); break; default: diff --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp index 3cf177532d84c0..57db39dd76fd96 100644 --- a/lld/wasm/InputFiles.cpp +++ b/lld/wasm/InputFiles.cpp @@ -124,6 +124,7 @@ uint64_t ObjFile::calcNewAddend(const WasmRelocation &reloc) const { case R_WASM_MEMORY_ADDR_I32: case R_WASM_MEMORY_ADDR_I64: case R_WASM_FUNCTION_OFFSET_I32: + case R_WASM_FUNCTION_OFFSET_I64: return reloc.Addend; case R_WASM_SECTION_OFFSET_I32: return getSectionSymbol(reloc.Index)->section->outputOffset + reloc.Addend; @@ -171,7 +172,8 @@ uint64_t ObjFile::calcExpectedValue(const WasmRelocation &reloc) const { else llvm_unreachable("unknown init expr opcode"); } - case R_WASM_FUNCTION_OFFSET_I32: { + case R_WASM_FUNCTION_OFFSET_I32: + case R_WASM_FUNCTION_OFFSET_I64: { const WasmSymbol &sym = wasmObj->syms()[reloc.Index]; InputFunction *f = functions[sym.Info.ElementIndex - wasmObj->getNumImportedFunctions()]; @@ -258,7 +260,8 @@ uint64_t ObjFile::calcNewValue(const WasmRelocation &reloc) const { return sym->getGOTIndex(); case R_WASM_EVENT_INDEX_LEB: return getEventSymbol(reloc.Index)->getEventIndex(); - case R_WASM_FUNCTION_OFFSET_I32: { + case R_WASM_FUNCTION_OFFSET_I32: + case R_WASM_FUNCTION_OFFSET_I64: { auto *f = cast(sym); return f->function->outputOffset + (f->function->getFunctionCodeOffset() + reloc.Addend); diff --git a/lldb/test/Shell/SymbolFile/PDB/udt-layout.test b/lldb/test/Shell/SymbolFile/PDB/udt-layout.test index 726f633efe5b3c..0ee9dcf6771bd5 100644 --- a/lldb/test/Shell/SymbolFile/PDB/udt-layout.test +++ b/lldb/test/Shell/SymbolFile/PDB/udt-layout.test @@ -5,8 +5,8 @@ RUN: %lldb -b -s %S/Inputs/UdtLayoutTest.script -- %t.exe | FileCheck %s CHECK:(int) int C::abc = 123 CHECK:(List [16]) ls = { CHECK: [15] = { -CHECK: Prev = 0x00000000 -CHECK: Next = 0x00000000 +CHECK: Prev = nullptr +CHECK: Next = nullptr CHECK: Value = { CHECK: B<0> = { CHECK: A = { diff --git a/llvm/include/llvm/Analysis/Utils/Local.h b/llvm/include/llvm/Analysis/Utils/Local.h index 9da0c6586dac81..bd82b34165d665 100644 --- a/llvm/include/llvm/Analysis/Utils/Local.h +++ b/llvm/include/llvm/Analysis/Utils/Local.h @@ -30,7 +30,7 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP, bool NoAssumptions = false) { GEPOperator *GEPOp = cast(GEP); Type *IntIdxTy = DL.getIndexType(GEP->getType()); - Value *Result = Constant::getNullValue(IntIdxTy); + Value *Result = nullptr; // If the GEP is inbounds, we know that none of the addressing operations will // overflow in a signed sense. @@ -46,6 +46,7 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP, ++i, ++GTI) { Value *Op = *i; uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask; + Value *Offset; if (Constant *OpC = dyn_cast(Op)) { if (OpC->isZeroValue()) continue; @@ -54,46 +55,47 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP, if (StructType *STy = GTI.getStructTypeOrNull()) { uint64_t OpValue = OpC->getUniqueInteger().getZExtValue(); Size = DL.getStructLayout(STy)->getElementOffset(OpValue); - - if (Size) - Result = Builder->CreateAdd(Result, ConstantInt::get(IntIdxTy, Size), - GEP->getName().str()+".offs"); - continue; + if (!Size) + continue; + + Offset = ConstantInt::get(IntIdxTy, Size); + } else { + // Splat the constant if needed. + if (IntIdxTy->isVectorTy() && !OpC->getType()->isVectorTy()) + OpC = ConstantVector::getSplat( + cast(IntIdxTy)->getElementCount(), OpC); + + Constant *Scale = ConstantInt::get(IntIdxTy, Size); + Constant *OC = + ConstantExpr::getIntegerCast(OpC, IntIdxTy, true /*SExt*/); + Offset = + ConstantExpr::getMul(OC, Scale, false /*NUW*/, isInBounds /*NSW*/); } - - // Splat the constant if needed. - if (IntIdxTy->isVectorTy() && !OpC->getType()->isVectorTy()) - OpC = ConstantVector::getSplat( - cast(IntIdxTy)->getElementCount(), OpC); - - Constant *Scale = ConstantInt::get(IntIdxTy, Size); - Constant *OC = ConstantExpr::getIntegerCast(OpC, IntIdxTy, true /*SExt*/); - Scale = - ConstantExpr::getMul(OC, Scale, false /*NUW*/, isInBounds /*NSW*/); - // Emit an add instruction. - Result = Builder->CreateAdd(Result, Scale, GEP->getName().str()+".offs"); - continue; - } - - // Splat the index if needed. - if (IntIdxTy->isVectorTy() && !Op->getType()->isVectorTy()) - Op = Builder->CreateVectorSplat( - cast(IntIdxTy)->getNumElements(), Op); - - // Convert to correct type. - if (Op->getType() != IntIdxTy) - Op = Builder->CreateIntCast(Op, IntIdxTy, true, Op->getName().str()+".c"); - if (Size != 1) { - // We'll let instcombine(mul) convert this to a shl if possible. - Op = Builder->CreateMul(Op, ConstantInt::get(IntIdxTy, Size), - GEP->getName().str() + ".idx", false /*NUW*/, - isInBounds /*NSW*/); + } else { + // Splat the index if needed. + if (IntIdxTy->isVectorTy() && !Op->getType()->isVectorTy()) + Op = Builder->CreateVectorSplat( + cast(IntIdxTy)->getNumElements(), Op); + + // Convert to correct type. + if (Op->getType() != IntIdxTy) + Op = Builder->CreateIntCast(Op, IntIdxTy, true, Op->getName().str()+".c"); + if (Size != 1) { + // We'll let instcombine(mul) convert this to a shl if possible. + Op = Builder->CreateMul(Op, ConstantInt::get(IntIdxTy, Size), + GEP->getName().str() + ".idx", false /*NUW*/, + isInBounds /*NSW*/); + } + Offset = Op; } - // Emit an add instruction. - Result = Builder->CreateAdd(Op, Result, GEP->getName().str()+".offs"); + if (Result) + Result = Builder->CreateAdd(Result, Offset, GEP->getName().str()+".offs", + false /*NUW*/, isInBounds /*NSW*/); + else + Result = Offset; } - return Result; + return Result ? Result : Constant::getNullValue(IntIdxTy); } } diff --git a/llvm/include/llvm/BinaryFormat/WasmRelocs.def b/llvm/include/llvm/BinaryFormat/WasmRelocs.def index 4eb12684eaa6bb..dca63eca945530 100644 --- a/llvm/include/llvm/BinaryFormat/WasmRelocs.def +++ b/llvm/include/llvm/BinaryFormat/WasmRelocs.def @@ -24,3 +24,4 @@ WASM_RELOC(R_WASM_TABLE_INDEX_SLEB64, 18) WASM_RELOC(R_WASM_TABLE_INDEX_I64, 19) WASM_RELOC(R_WASM_TABLE_NUMBER_LEB, 20) WASM_RELOC(R_WASM_MEMORY_ADDR_TLS_SLEB, 21) +WASM_RELOC(R_WASM_FUNCTION_OFFSET_I64, 22) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h index e337a835b4fab3..8f1cd1a1408a89 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h @@ -53,6 +53,24 @@ struct ConstantMatch { inline ConstantMatch m_ICst(int64_t &Cst) { return ConstantMatch(Cst); } +/// Matcher for a specific constant value. +struct SpecificConstantMatch { + int64_t RequestedVal; + SpecificConstantMatch(int64_t RequestedVal) : RequestedVal(RequestedVal) {} + bool match(const MachineRegisterInfo &MRI, Register Reg) { + int64_t MatchedVal; + return mi_match(Reg, MRI, m_ICst(MatchedVal)) && MatchedVal == RequestedVal; + } +}; + +/// Matches a constant equal to \p RequestedValue. +inline SpecificConstantMatch m_SpecificICst(int64_t RequestedValue) { + return SpecificConstantMatch(RequestedValue); +} + +/// Matches an integer 0. +inline SpecificConstantMatch m_ZeroInt() { return SpecificConstantMatch(0); } + // TODO: Rework this for different kinds of MachineOperand. // Currently assumes the Src for a match is a register. // We might want to support taking in some MachineOperands and call getReg on @@ -425,6 +443,14 @@ m_GInsertVecElt(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2) { TargetOpcode::G_INSERT_VECTOR_ELT>(Src0, Src1, Src2); } +/// Matches a register negated by a G_SUB. +/// G_SUB 0, %negated_reg +template +inline BinaryOp_match +m_Neg(const SrcTy &&Src) { + return m_GSub(m_ZeroInt(), Src); +} + } // namespace GMIPatternMatch } // namespace llvm diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index ac994548c506c9..fa5000d4248284 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -1422,6 +1422,14 @@ let TargetPrefix = "ppc" in { def int_ppc_mma_xxsetaccz : Intrinsic<[llvm_v512i1_ty], [], [IntrNoMem]>; + def int_ppc_mma_lxvp : + Intrinsic<[llvm_v256i1_ty], [llvm_ptr_ty], + [IntrReadMem, IntrArgMemOnly]>; + + def int_ppc_mma_stxvp : + Intrinsic<[], [llvm_v256i1_ty, llvm_ptr_ty], + [IntrWriteMem, IntrArgMemOnly]>; + // MMA Reduced-Precision: Outer Product Intrinsic Definitions. defm int_ppc_mma_xvi4ger8 : PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>; diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 6f2b1f71faee4e..7a8f54bd0c6e9c 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -3437,12 +3437,12 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP, // flow and the no-overflow bits may not be valid for the expression in any // context. This can be fixed similarly to how these flags are handled for // adds. - SCEV::NoWrapFlags Wrap = GEP->isInBounds() ? SCEV::FlagNSW - : SCEV::FlagAnyWrap; + SCEV::NoWrapFlags OffsetWrap = + GEP->isInBounds() ? SCEV::FlagNSW : SCEV::FlagAnyWrap; Type *CurTy = GEP->getType(); bool FirstIter = true; - SmallVector AddOps{BaseExpr}; + SmallVector Offsets; for (const SCEV *IndexExpr : IndexExprs) { // Compute the (potentially symbolic) offset in bytes for this index. if (StructType *STy = dyn_cast(CurTy)) { @@ -3450,7 +3450,7 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP, ConstantInt *Index = cast(IndexExpr)->getValue(); unsigned FieldNo = Index->getZExtValue(); const SCEV *FieldOffset = getOffsetOfExpr(IntIdxTy, STy, FieldNo); - AddOps.push_back(FieldOffset); + Offsets.push_back(FieldOffset); // Update CurTy to the type of the field at Index. CurTy = STy->getTypeAtIndex(Index); @@ -3470,13 +3470,23 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP, IndexExpr = getTruncateOrSignExtend(IndexExpr, IntIdxTy); // Multiply the index by the element size to compute the element offset. - const SCEV *LocalOffset = getMulExpr(IndexExpr, ElementSize, Wrap); - AddOps.push_back(LocalOffset); + const SCEV *LocalOffset = getMulExpr(IndexExpr, ElementSize, OffsetWrap); + Offsets.push_back(LocalOffset); } } - // Add the base and all the offsets together. - return getAddExpr(AddOps, Wrap); + // Handle degenerate case of GEP without offsets. + if (Offsets.empty()) + return BaseExpr; + + // Add the offsets together, assuming nsw if inbounds. + const SCEV *Offset = getAddExpr(Offsets, OffsetWrap); + // Add the base address and the offset. We cannot use the nsw flag, as the + // base address is unsigned. However, if we know that the offset is + // non-negative, we can use nuw. + SCEV::NoWrapFlags BaseWrap = GEP->isInBounds() && isKnownNonNegative(Offset) + ? SCEV::FlagNUW : SCEV::FlagAnyWrap; + return getAddExpr(BaseExpr, Offset, BaseWrap); } std::tuple diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 0a18718fd9b54d..e6d3727a239081 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -1289,9 +1289,6 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt AccConstIndices(BitWidth, 0, /*IsSigned*/ true); gep_type_iterator GTI = gep_type_begin(I); - // If the inbounds keyword is not present, the offsets are added to the - // base address with silently-wrapping two’s complement arithmetic. - bool IsInBounds = cast(I)->isInBounds(); for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) { // TrailZ can only become smaller, short-circuit if we hit zero. if (Known.isUnknown()) @@ -1356,17 +1353,17 @@ static void computeKnownBitsFromOperator(const Operator *I, // to the width of the pointer. IndexBits = IndexBits.sextOrTrunc(BitWidth); + // Note that inbounds does *not* guarantee nsw for the addition, as only + // the offset is signed, while the base address is unsigned. Known = KnownBits::computeForAddSub( - /*Add=*/true, - /*NSW=*/IsInBounds, Known, IndexBits); + /*Add=*/true, /*NSW=*/false, Known, IndexBits); } if (!Known.isUnknown() && !AccConstIndices.isNullValue()) { KnownBits Index(BitWidth); Index.Zero = ~AccConstIndices; Index.One = AccConstIndices; Known = KnownBits::computeForAddSub( - /*Add=*/true, - /*NSW=*/IsInBounds, Known, Index); + /*Add=*/true, /*NSW=*/false, Known, Index); } break; } diff --git a/llvm/lib/BinaryFormat/Wasm.cpp b/llvm/lib/BinaryFormat/Wasm.cpp index a22ab5890922f7..126680ac41c241 100644 --- a/llvm/lib/BinaryFormat/Wasm.cpp +++ b/llvm/lib/BinaryFormat/Wasm.cpp @@ -50,6 +50,7 @@ bool llvm::wasm::relocTypeHasAddend(uint32_t Type) { case R_WASM_MEMORY_ADDR_I64: case R_WASM_MEMORY_ADDR_TLS_SLEB: case R_WASM_FUNCTION_OFFSET_I32: + case R_WASM_FUNCTION_OFFSET_I64: case R_WASM_SECTION_OFFSET_I32: return true; default: diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 32bdba442b56ea..79f74a47d83c84 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -2614,9 +2614,7 @@ bool CombinerHelper::matchSimplifyAddToSub( // ((0-A) + B) -> B - A // (A + (0-B)) -> A - B auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) { - int64_t Cst; - if (!mi_match(MaybeSub, MRI, m_GSub(m_ICst(Cst), m_Reg(NewRHS))) || - Cst != 0) + if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS)))) return false; NewLHS = MaybeNewLHS; return true; diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index a72f9f66560cbe..4d9580de3b7bb6 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -682,9 +682,7 @@ static bool isBuildVectorConstantSplat(const MachineInstr &MI, const unsigned NumOps = MI.getNumOperands(); for (unsigned I = 1; I != NumOps; ++I) { Register Element = MI.getOperand(I).getReg(); - int64_t ElementValue; - if (!mi_match(Element, MRI, m_ICst(ElementValue)) || - ElementValue != SplatValue) + if (!mi_match(Element, MRI, m_SpecificICst(SplatValue))) return false; } diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index f69b96a295bfc7..6f9268fbb4ecc3 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -345,7 +345,7 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer, if (!MDO) return nullptr; auto V = cast(MDO); - const Function *F = cast(V->getValue()); + const Function *F = cast(V->getValue()->stripPointerCasts()); return TM->getSymbol(F); }; diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 907993c400d9c0..6c57369f8d8007 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -1578,8 +1578,8 @@ void Verifier::visitModuleFlagCGProfileEntry(const MDOperand &MDO) { if (!FuncMDO) return; auto F = dyn_cast(FuncMDO); - Assert(F && isa(F->getValue()), "expected a Function or null", - FuncMDO); + Assert(F && isa(F->getValue()->stripPointerCasts()), + "expected a Function or null", FuncMDO); }; auto Node = dyn_cast_or_null(MDO); Assert(Node && Node->getNumOperands() == 3, "expected a MDNode triple", MDO); diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp index 6bcec5f4c37c6b..0cf6e310ae3224 100644 --- a/llvm/lib/MC/WasmObjectWriter.cpp +++ b/llvm/lib/MC/WasmObjectWriter.cpp @@ -478,6 +478,7 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm, // Currently only supported for for metadata sections. // See: test/MC/WebAssembly/blockaddress.ll if (Type == wasm::R_WASM_FUNCTION_OFFSET_I32 || + Type == wasm::R_WASM_FUNCTION_OFFSET_I64 || Type == wasm::R_WASM_SECTION_OFFSET_I32) { if (!FixupSection.getKind().isMetadata()) report_fatal_error("relocations for function or section offsets are " @@ -564,6 +565,7 @@ WasmObjectWriter::getProvisionalValue(const WasmRelocationEntry &RelEntry, assert(WasmIndices.count(RelEntry.Symbol) > 0 && "symbol not found in wasm index space"); return WasmIndices[RelEntry.Symbol]; case wasm::R_WASM_FUNCTION_OFFSET_I32: + case wasm::R_WASM_FUNCTION_OFFSET_I64: case wasm::R_WASM_SECTION_OFFSET_I32: { const auto &Section = static_cast(RelEntry.Symbol->getSection()); @@ -680,6 +682,7 @@ void WasmObjectWriter::applyRelocations( break; case wasm::R_WASM_TABLE_INDEX_I64: case wasm::R_WASM_MEMORY_ADDR_I64: + case wasm::R_WASM_FUNCTION_OFFSET_I64: patchI64(Stream, Value, Offset); break; case wasm::R_WASM_TABLE_INDEX_SLEB: diff --git a/llvm/lib/Object/RelocationResolver.cpp b/llvm/lib/Object/RelocationResolver.cpp index d56e7be7043cde..f39262bc906102 100644 --- a/llvm/lib/Object/RelocationResolver.cpp +++ b/llvm/lib/Object/RelocationResolver.cpp @@ -566,6 +566,7 @@ static bool supportsWasm64(uint64_t Type) { case wasm::R_WASM_MEMORY_ADDR_I64: case wasm::R_WASM_TABLE_INDEX_SLEB64: case wasm::R_WASM_TABLE_INDEX_I64: + case wasm::R_WASM_FUNCTION_OFFSET_I64: return true; default: return supportsWasm32(Type); @@ -601,6 +602,7 @@ static uint64_t resolveWasm64(RelocationRef R, uint64_t S, uint64_t A) { case wasm::R_WASM_MEMORY_ADDR_I64: case wasm::R_WASM_TABLE_INDEX_SLEB64: case wasm::R_WASM_TABLE_INDEX_I64: + case wasm::R_WASM_FUNCTION_OFFSET_I64: // For wasm section, its offset at 0 -- ignoring Value return A; default: diff --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp index 63f2742e9215ff..82aa1f527fb873 100644 --- a/llvm/lib/Object/WasmObjectFile.cpp +++ b/llvm/lib/Object/WasmObjectFile.cpp @@ -876,6 +876,12 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, ReadContext &Ctx) { object_error::parse_failed); Reloc.Addend = readVarint32(Ctx); break; + case wasm::R_WASM_FUNCTION_OFFSET_I64: + if (!isValidFunctionSymbol(Reloc.Index)) + return make_error("Bad relocation function index", + object_error::parse_failed); + Reloc.Addend = readVarint64(Ctx); + break; case wasm::R_WASM_SECTION_OFFSET_I32: if (!isValidSectionSymbol(Reloc.Index)) return make_error("Bad relocation section index", @@ -903,7 +909,8 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, ReadContext &Ctx) { Reloc.Type == wasm::R_WASM_GLOBAL_INDEX_I32) Size = 4; if (Reloc.Type == wasm::R_WASM_TABLE_INDEX_I64 || - Reloc.Type == wasm::R_WASM_MEMORY_ADDR_I64) + Reloc.Type == wasm::R_WASM_MEMORY_ADDR_I64 || + Reloc.Type == wasm::R_WASM_FUNCTION_OFFSET_I64) Size = 8; if (Reloc.Offset + Size > EndOffset) return make_error("Bad relocation offset", diff --git a/llvm/lib/ObjectYAML/WasmEmitter.cpp b/llvm/lib/ObjectYAML/WasmEmitter.cpp index 81e24bfdaf8454..7b21d56be5030b 100644 --- a/llvm/lib/ObjectYAML/WasmEmitter.cpp +++ b/llvm/lib/ObjectYAML/WasmEmitter.cpp @@ -547,6 +547,7 @@ void WasmWriter::writeRelocSection(raw_ostream &OS, WasmYAML::Section &Sec, case wasm::R_WASM_MEMORY_ADDR_I32: case wasm::R_WASM_MEMORY_ADDR_I64: case wasm::R_WASM_FUNCTION_OFFSET_I32: + case wasm::R_WASM_FUNCTION_OFFSET_I64: case wasm::R_WASM_SECTION_OFFSET_I32: encodeULEB128(Reloc.Addend, OS); } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 218aa19d548eb6..1d5791399014c7 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -1010,12 +1010,29 @@ AArch64InstructionSelector::emitSelect(Register Dst, Register True, // By default, we'll try and emit a CSEL. unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr; + bool Optimized = false; + auto TryOptNegIntoSelect = [&Opc, &False, Is32Bit, &MRI]() { + // Attempt to fold: + // + // sub = G_SUB 0, x + // select = G_SELECT cc, true, sub + // + // Into: + // select = CSNEG true, x, cc + if (!mi_match(False, MRI, m_Neg(m_Reg(False)))) + return false; + Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr; + return true; + }; // Helper lambda which tries to use CSINC/CSINV for the instruction when its // true/false values are constants. // FIXME: All of these patterns already exist in tablegen. We should be // able to import these. - auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI]() { + auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI, + &Optimized]() { + if (Optimized) + return false; auto TrueCst = getConstantVRegValWithLookThrough(True, MRI); auto FalseCst = getConstantVRegValWithLookThrough(False, MRI); if (!TrueCst && !FalseCst) @@ -1083,25 +1100,13 @@ AArch64InstructionSelector::emitSelect(Register Dst, Register True, return false; }; - TryOptSelectCst(); + Optimized |= TryOptNegIntoSelect(); + Optimized |= TryOptSelectCst(); auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC); constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI); return &*SelectInst; } -/// Returns true if \p P is an unsigned integer comparison predicate. -static bool isUnsignedICMPPred(const CmpInst::Predicate P) { - switch (P) { - default: - return false; - case CmpInst::ICMP_UGT: - case CmpInst::ICMP_UGE: - case CmpInst::ICMP_ULT: - case CmpInst::ICMP_ULE: - return true; - } -} - static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) { switch (P) { default: @@ -1837,10 +1842,7 @@ bool AArch64InstructionSelector::convertPtrAddToAdd( // Also take the opportunity here to try to do some optimization. // Try to convert this into a G_SUB if the offset is a 0-x negate idiom. Register NegatedReg; - int64_t Cst; - if (!mi_match(I.getOperand(2).getReg(), MRI, - m_GSub(m_ICst(Cst), m_Reg(NegatedReg))) || - Cst != 0) + if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg)))) return true; I.getOperand(2).setReg(NegatedReg); I.setDesc(TII.get(TargetOpcode::G_SUB)); @@ -4382,7 +4384,7 @@ MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare( // Produce this if the compare is signed: // // tst x, y - if (!isUnsignedICMPPred(P) && LHSDef && + if (!CmpInst::isUnsigned(P) && LHSDef && LHSDef->getOpcode() == TargetOpcode::G_AND) { // Make sure that the RHS is 0. auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp index 645e85388490e0..43f28729baa1fc 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp @@ -291,8 +291,7 @@ static bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI, return false; // Match the index constant 0. - int64_t Index = 0; - if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index) + if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ZeroInt())) return false; MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 638ec0e7ce0607..e5cbcb3ccdb76e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -633,7 +633,6 @@ bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR_TRUNC( Register ShiftSrc0; Register ShiftSrc1; - int64_t ShiftAmt; // With multiple uses of the shift, this will duplicate the shift and // increase register pressure. @@ -645,14 +644,11 @@ bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR_TRUNC( // (build_vector_trunc $src0, $src1) // => (S_PACK_LL_B32_B16 $src0, $src1) - // FIXME: This is an inconvenient way to check a specific value bool Shift0 = mi_match( - Src0, *MRI, m_OneUse(m_GLShr(m_Reg(ShiftSrc0), m_ICst(ShiftAmt)))) && - ShiftAmt == 16; + Src0, *MRI, m_OneUse(m_GLShr(m_Reg(ShiftSrc0), m_SpecificICst(16)))); bool Shift1 = mi_match( - Src1, *MRI, m_OneUse(m_GLShr(m_Reg(ShiftSrc1), m_ICst(ShiftAmt)))) && - ShiftAmt == 16; + Src1, *MRI, m_OneUse(m_GLShr(m_Reg(ShiftSrc1), m_SpecificICst(16)))); unsigned Opc = AMDGPU::S_PACK_LL_B32_B16; if (Shift0 && Shift1) { @@ -3474,9 +3470,7 @@ static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg) { if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES) return false; - int64_t MergeRHS; - if (mi_match(Def->getOperand(2).getReg(), MRI, m_ICst(MergeRHS)) && - MergeRHS == 0) { + if (mi_match(Def->getOperand(2).getReg(), MRI, m_ZeroInt())) { return Def->getOperand(1).getReg(); } diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index c67cf897c397c2..a66a015ac2efe2 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -293,6 +293,13 @@ namespace { Align(16)); } + /// SelectAddrImmX34 - Returns true if the address N can be represented by + /// a base register plus a signed 34-bit displacement. Suitable for use by + /// PSTXVP and friends. + bool SelectAddrImmX34(SDValue N, SDValue &Disp, SDValue &Base) { + return PPCLowering->SelectAddressRegImm34(N, Disp, Base, *CurDAG); + } + // Select an address into a single register. bool SelectAddr(SDValue N, SDValue &Base) { Base = N; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 5b5504a458ed93..2a77d53a78177d 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -2399,6 +2399,20 @@ bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base, return false; } +/// isIntS34Immediate - This method tests if value of node given can be +/// accurately represented as a sign extension from a 34-bit value. If so, +/// this returns true and the immediate. +bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) { + if (!isa(N)) + return false; + + Imm = (int64_t)cast(N)->getZExtValue(); + return isInt<34>(Imm); +} +bool llvm::isIntS34Immediate(SDValue Op, int64_t &Imm) { + return isIntS34Immediate(Op.getNode(), Imm); +} + /// SelectAddressRegReg - Given the specified addressed, check to see if it /// can be represented as an indexed [r+r] operation. Returns false if it /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is @@ -2599,6 +2613,55 @@ bool PPCTargetLowering::SelectAddressRegImm( return true; // [r+0] } +/// Similar to the 16-bit case but for instructions that take a 34-bit +/// displacement field (prefixed loads/stores). +bool PPCTargetLowering::SelectAddressRegImm34(SDValue N, SDValue &Disp, + SDValue &Base, + SelectionDAG &DAG) const { + // Only on 64-bit targets. + if (N.getValueType() != MVT::i64) + return false; + + SDLoc dl(N); + int64_t Imm = 0; + + if (N.getOpcode() == ISD::ADD) { + if (!isIntS34Immediate(N.getOperand(1), Imm)) + return false; + Disp = DAG.getTargetConstant(Imm, dl, N.getValueType()); + if (FrameIndexSDNode *FI = dyn_cast(N.getOperand(0))) + Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); + else + Base = N.getOperand(0); + return true; + } + + if (N.getOpcode() == ISD::OR) { + if (!isIntS34Immediate(N.getOperand(1), Imm)) + return false; + // If this is an or of disjoint bitfields, we can codegen this as an add + // (for better address arithmetic) if the LHS and RHS of the OR are + // provably disjoint. + KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0)); + if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL) + return false; + if (FrameIndexSDNode *FI = dyn_cast(N.getOperand(0))) + Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); + else + Base = N.getOperand(0); + Disp = DAG.getTargetConstant(Imm, dl, N.getValueType()); + return true; + } + + if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const. + Disp = DAG.getTargetConstant(Imm, dl, N.getValueType()); + Base = DAG.getRegister(PPC::ZERO8, N.getValueType()); + return true; + } + + return false; +} + /// SelectAddressRegRegOnly - Given the specified addressed, force it to be /// represented as an indexed [r+r] operation. bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 70dcef4658ff91..ca7c68624c68aa 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -770,6 +770,8 @@ namespace llvm { bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const; + bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, + SelectionDAG &DAG) const; /// SelectAddressRegRegOnly - Given the specified addressed, force it to be /// represented as an indexed [r+r] operation. @@ -1325,6 +1327,8 @@ namespace llvm { bool isIntS16Immediate(SDNode *N, int16_t &Imm); bool isIntS16Immediate(SDValue Op, int16_t &Imm); + bool isIntS34Immediate(SDNode *N, int64_t &Imm); + bool isIntS34Immediate(SDValue Op, int64_t &Imm); bool convertToNonDenormSingle(APInt &ArgAPInt); bool convertToNonDenormSingle(APFloat &ArgAPFloat); diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 1a128f1ddf0d83..2e77d04d4a79e2 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -1031,11 +1031,13 @@ def pred : Operand { // Define PowerPC specific addressing mode. // d-form -def iaddr : ComplexPattern; // "stb" +def iaddr : ComplexPattern; // "stb" // ds-form -def iaddrX4 : ComplexPattern; // "std" +def iaddrX4 : ComplexPattern; // "std" // dq-form -def iaddrX16 : ComplexPattern; // "stxv" +def iaddrX16 : ComplexPattern; // "stxv" +// 8LS:d-form +def iaddrX34 : ComplexPattern; // "pstxvp" // Below forms are all x-form addressing mode, use three different ones so we // can make a accurate check for x-form instructions in ISEL. diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td index 9f20bfcebe3c90..e1b76bb3bd0016 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -1654,6 +1654,24 @@ let mayLoad = 0, mayStore = 1, Predicates = [PairedVectorMemops, PrefixInstrs] i "pstxvp $XTp, $D_RA", IIC_LdStLFD>; } +let Predicates = [PairedVectorMemops] in { + // Intrinsics for Paired Vector Loads. + def : Pat<(v256i1 (int_ppc_mma_lxvp iaddrX16:$src)), (LXVP memrix16:$src)>; + def : Pat<(v256i1 (int_ppc_mma_lxvp xaddrX16:$src)), (LXVPX xaddrX16:$src)>; + let Predicates = [PairedVectorMemops, PrefixInstrs] in { + def : Pat<(v256i1 (int_ppc_mma_lxvp iaddrX34:$src)), (PLXVP memri34:$src)>; + } + // Intrinsics for Paired Vector Stores. + def : Pat<(int_ppc_mma_stxvp v256i1:$XSp, iaddrX16:$dst), + (STXVP $XSp, memrix16:$dst)>; + def : Pat<(int_ppc_mma_stxvp v256i1:$XSp, xaddrX16:$dst), + (STXVPX $XSp, xaddrX16:$dst)>; + let Predicates = [PairedVectorMemops, PrefixInstrs] in { + def : Pat<(int_ppc_mma_stxvp v256i1:$XSp, iaddrX34:$dst), + (PSTXVP $XSp, memri34:$dst)>; + } +} + // TODO: We have an added complexity of 500 here. This is only a temporary // solution to have tablegen consider these patterns first. The way we do // addressing for PowerPC is complex depending on available D form, X form, or diff --git a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp index 4f1825bfc1c50f..ccbaea88d2f1d3 100644 --- a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp +++ b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp @@ -60,6 +60,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IntrinsicsPowerPC.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" @@ -277,8 +278,11 @@ static Value *GetPointerOperand(Value *MemI) { } else if (StoreInst *SMemI = dyn_cast(MemI)) { return SMemI->getPointerOperand(); } else if (IntrinsicInst *IMemI = dyn_cast(MemI)) { - if (IMemI->getIntrinsicID() == Intrinsic::prefetch) + if (IMemI->getIntrinsicID() == Intrinsic::prefetch || + IMemI->getIntrinsicID() == Intrinsic::ppc_mma_lxvp) return IMemI->getArgOperand(0); + if (IMemI->getIntrinsicID() == Intrinsic::ppc_mma_stxvp) + return IMemI->getArgOperand(1); } return nullptr; @@ -345,9 +349,13 @@ SmallVector PPCLoopInstrFormPrep::collectCandidates( MemI = SMemI; PtrValue = SMemI->getPointerOperand(); } else if (IntrinsicInst *IMemI = dyn_cast(&J)) { - if (IMemI->getIntrinsicID() == Intrinsic::prefetch) { + if (IMemI->getIntrinsicID() == Intrinsic::prefetch || + IMemI->getIntrinsicID() == Intrinsic::ppc_mma_lxvp) { MemI = IMemI; PtrValue = IMemI->getArgOperand(0); + } else if (IMemI->getIntrinsicID() == Intrinsic::ppc_mma_stxvp) { + MemI = IMemI; + PtrValue = IMemI->getArgOperand(1); } else continue; } else continue; @@ -827,6 +835,11 @@ bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) { if (ST && ST->hasAltivec() && PtrValue->getType()->getPointerElementType()->isVectorTy()) return false; + // There are no update forms for P10 lxvp/stxvp intrinsic. + auto *II = dyn_cast(I); + if (II && ((II->getIntrinsicID() == Intrinsic::ppc_mma_lxvp) || + II->getIntrinsicID() == Intrinsic::ppc_mma_stxvp)) + return false; // See getPreIndexedAddressParts, the displacement for LDU/STDU has to // be 4's multiple (DS-form). For i64 loads/stores when the displacement // fits in a 16-bit signed field but isn't a multiple of 4, it will be @@ -864,7 +877,13 @@ bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) { // Check if a load/store has DQ form. auto isDQFormCandidate = [&] (const Instruction *I, const Value *PtrValue) { assert((PtrValue && I) && "Invalid parameter!"); - return !isa(I) && ST && ST->hasP9Vector() && + // Check if it is a P10 lxvp/stxvp intrinsic. + auto *II = dyn_cast(I); + if (II) + return II->getIntrinsicID() == Intrinsic::ppc_mma_lxvp || + II->getIntrinsicID() == Intrinsic::ppc_mma_stxvp; + // Check if it is a P9 vector load/store. + return ST && ST->hasP9Vector() && (PtrValue->getType()->getPointerElementType()->isVectorTy()); }; diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index f3134460514cb4..be4f3354ede4b9 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -1223,7 +1223,8 @@ bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, case Intrinsic::ppc_vsx_lxvd2x_be: case Intrinsic::ppc_vsx_lxvw4x_be: case Intrinsic::ppc_vsx_lxvl: - case Intrinsic::ppc_vsx_lxvll: { + case Intrinsic::ppc_vsx_lxvll: + case Intrinsic::ppc_mma_lxvp: { Info.PtrVal = Inst->getArgOperand(0); Info.ReadMem = true; Info.WriteMem = false; @@ -1239,7 +1240,8 @@ bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, case Intrinsic::ppc_vsx_stxvd2x_be: case Intrinsic::ppc_vsx_stxvw4x_be: case Intrinsic::ppc_vsx_stxvl: - case Intrinsic::ppc_vsx_stxvll: { + case Intrinsic::ppc_vsx_stxvll: + case Intrinsic::ppc_mma_stxvp: { Info.PtrVal = Inst->getArgOperand(1); Info.ReadMem = false; Info.WriteMem = true; diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp index 7cbe9e38fb47fb..c3d259e6ff2055 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp @@ -23,9 +23,7 @@ WebAssemblyMCAsmInfo::~WebAssemblyMCAsmInfo() = default; // anchor. WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T, const MCTargetOptions &Options) { - CalleeSaveStackSlotSize = T.isArch64Bit() ? 8 : 4; - // So far this is used for DWARF DW_AT_low_pc which is always 32-bit in Wasm. - CodePointerSize = 4; + CodePointerSize = CalleeSaveStackSlotSize = T.isArch64Bit() ? 8 : 4; // TODO: What should MaxInstLength be? diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp index f0f5d9834771ee..aa7e2311d24086 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp @@ -131,7 +131,7 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target, if (auto Section = static_cast( getFixupSection(Fixup.getValue()))) { if (Section->getKind().isText()) - llvm_unreachable("unimplemented R_WASM_FUNCTION_OFFSET_I64"); + return wasm::R_WASM_FUNCTION_OFFSET_I64; else if (!Section->isWasmData()) llvm_unreachable("unimplemented R_WASM_SECTION_OFFSET_I64"); } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index abeddf45f22c69..b8431a5a453212 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1672,13 +1672,11 @@ Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS, Value *Result = EmitGEPOffset(GEP1); // If this is a single inbounds GEP and the original sub was nuw, - // then the final multiplication is also nuw. We match an extra add zero - // here, because that's what EmitGEPOffset() generates. - Instruction *I; - if (IsNUW && !GEP2 && !Swapped && GEP1->isInBounds() && - match(Result, m_Add(m_Instruction(I), m_Zero())) && - I->getOpcode() == Instruction::Mul) - I->setHasNoUnsignedWrap(); + // then the final multiplication is also nuw. + if (auto *I = dyn_cast(Result)) + if (IsNUW && !GEP2 && !Swapped && GEP1->isInBounds() && + I->getOpcode() == Instruction::Mul) + I->setHasNoUnsignedWrap(); // If we had a constant expression GEP on the other side offsetting the // pointer, subtract it from the offset we have. diff --git a/llvm/test/Analysis/CostModel/X86/arith-fix.ll b/llvm/test/Analysis/CostModel/X86/arith-fix.ll index 57648fead67ea0..6c89c14eed97fb 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-fix.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fix.ll @@ -1,15 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSSE3 -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE42 -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1 -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2 -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512F -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512BW -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512DQ +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=AVX1 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512F +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512BW +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512DQ ; -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SLM -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,GLM -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,BTVER2 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=SLM +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=GLM +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=BTVER2 declare i64 @llvm.smul.fix.i64(i64, i64, i32) declare <2 x i64> @llvm.smul.fix.v2i64(<2 x i64>, <2 x i64>, i32) @@ -31,8 +31,6 @@ declare <16 x i8> @llvm.smul.fix.v16i8(<16 x i8>, <16 x i8>, i32) declare <32 x i8> @llvm.smul.fix.v32i8(<32 x i8>, <32 x i8>, i32) declare <64 x i8> @llvm.smul.fix.v64i8(<64 x i8>, <64 x i8>, i32) -; CHECK: {{^}} - define i32 @smul(i32 %arg) { ; SSSE3-LABEL: 'smul' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) diff --git a/llvm/test/Analysis/CostModel/X86/arith-fma.ll b/llvm/test/Analysis/CostModel/X86/arith-fma.ll index 2fb72c1bc0a919..99864faa037b78 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-fma.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fma.ll @@ -1,11 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+xop | FileCheck %s --check-prefixes=CHECK,XOP -; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+fma,+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 --allow-unused-prefixes -; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+fma,+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 --allow-unused-prefixes -; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+fma,+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F --allow-unused-prefixes -; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+fma,+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW --allow-unused-prefixes - -; CHECK: {{^}} +; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+xop | FileCheck %s --check-prefixes=XOP +; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+fma,+avx | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+fma,+avx2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+fma,+avx512f | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+fma,+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512 define i32 @fma(i32 %arg) { ; XOP-LABEL: 'fma' diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp.ll b/llvm/test/Analysis/CostModel/X86/arith-fp.ll index fe1e48a993c133..d5d47a99e91996 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-fp.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fp.ll @@ -1,17 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=-sse2 | FileCheck %s --check-prefixes=CHECK,SSE1 -; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2 -; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE42 -; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 -; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 -; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F --allow-unused-prefixes -; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW --allow-unused-prefixes -; -; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SLM -; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,GLM -; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX,BTVER2 +; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=-sse2 | FileCheck %s --check-prefixes=SSE1 +; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 +; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512 +; +; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=SLM +; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=GLM +; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 -; CHECK: {{^}} target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" @@ -103,17 +102,6 @@ define i32 @fadd(i32 %arg) { ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fadd <4 x double> undef, undef ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fadd <8 x double> undef, undef ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; BTVER2-LABEL: 'fadd' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fadd float undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fadd <4 x float> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fadd <8 x float> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fadd <16 x float> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fadd double undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fadd <2 x double> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fadd <4 x double> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fadd <8 x double> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %F32 = fadd float undef, undef %V4F32 = fadd <4 x float> undef, undef @@ -216,17 +204,6 @@ define i32 @fsub(i32 %arg) { ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> undef, undef ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> undef, undef ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; BTVER2-LABEL: 'fsub' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %F32 = fsub float undef, undef %V4F32 = fsub <4 x float> undef, undef @@ -329,17 +306,6 @@ define i32 @fneg_idiom(i32 %arg) { ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> , undef ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> , undef ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; BTVER2-LABEL: 'fneg_idiom' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> , undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> , undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> , undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> , undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> , undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %F32 = fsub float -0.0, undef %V4F32 = fsub <4 x float> , undef @@ -533,17 +499,6 @@ define i32 @fmul(i32 %arg) { ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fmul <4 x double> undef, undef ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fmul <8 x double> undef, undef ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; BTVER2-LABEL: 'fmul' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fmul float undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fmul <4 x float> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fmul <8 x float> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fmul <16 x float> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fmul double undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fmul <2 x double> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fmul <4 x double> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fmul <8 x double> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %F32 = fmul float undef, undef %V4F32 = fmul <4 x float> undef, undef @@ -646,17 +601,6 @@ define i32 @fdiv(i32 %arg) { ; GLM-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V4F64 = fdiv <4 x double> undef, undef ; GLM-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %V8F64 = fdiv <8 x double> undef, undef ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; BTVER2-LABEL: 'fdiv' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F32 = fdiv float undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = fdiv <4 x float> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = fdiv <8 x float> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = fdiv <16 x float> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %F64 = fdiv double undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2F64 = fdiv <2 x double> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4F64 = fdiv <4 x double> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F64 = fdiv <8 x double> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %F32 = fdiv float undef, undef %V4F32 = fdiv <4 x float> undef, undef @@ -850,17 +794,6 @@ define i32 @fsqrt(i32 %arg) { ; GLM-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 268 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; BTVER2-LABEL: 'fsqrt' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %F32 = call float @llvm.sqrt.f32(float undef) %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) diff --git a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll index cfb52d3c4c9ffa..fac508f592bb27 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll @@ -1,17 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSSE3 -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE42 -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1 -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2 -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512F -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512BW -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512DQ -; -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SLM -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,GLM -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,BTVER2 - -; CHECK: {{^}} +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=AVX1 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512F +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512BW +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512DQ +; +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=SLM +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=GLM +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=BTVER2 ; ; sadd.with.overflow diff --git a/llvm/test/Analysis/CostModel/X86/arith-sminmax.ll b/llvm/test/Analysis/CostModel/X86/arith-sminmax.ll index 0eca3160c726aa..8e9ef5bba0bb52 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-sminmax.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-sminmax.ll @@ -1,14 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse2 | FileCheck %s -check-prefixes=CHECK,SSE2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+ssse3 | FileCheck %s -check-prefixes=CHECK,SSSE3 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse4.2 | FileCheck %s -check-prefixes=CHECK,SSE42 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx | FileCheck %s -check-prefixes=CHECK,AVX1 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx2 | FileCheck %s -check-prefixes=CHECK,AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f | FileCheck %s -check-prefixes=CHECK,AVX512F -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512vl,+avx512dq | FileCheck %s -check-prefixes=CHECK,AVX512DQ -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512vl,+avx512bw | FileCheck %s -check-prefixes=CHECK,AVX512BW - -; CHECK: {{^}} +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse2 | FileCheck %s -check-prefixes=SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+ssse3 | FileCheck %s -check-prefixes=SSSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx | FileCheck %s -check-prefixes=AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx2 | FileCheck %s -check-prefixes=AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f | FileCheck %s -check-prefixes=AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512vl,+avx512dq | FileCheck %s -check-prefixes=AVX512DQ +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512vl,+avx512bw | FileCheck %s -check-prefixes=AVX512BW declare i64 @llvm.smax.i64(i64, i64) declare <2 x i64> @llvm.smax.v2i64(<2 x i64>, <2 x i64>) diff --git a/llvm/test/Analysis/CostModel/X86/arith-ssat.ll b/llvm/test/Analysis/CostModel/X86/arith-ssat.ll index 15b48e4ad62bda..783add32f6c136 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-ssat.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-ssat.ll @@ -1,17 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSSE3 -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE42 -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1 -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2 -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512F -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512BW -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512DQ +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=AVX1 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512F +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512BW +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512DQ ; -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SLM -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,GLM -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,BTVER2 - -; CHECK: {{^}} +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=SLM +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=GLM +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=BTVER2 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/llvm/test/Analysis/CostModel/X86/arith-uminmax.ll b/llvm/test/Analysis/CostModel/X86/arith-uminmax.ll index 909b624457bb34..b8e748d3fda014 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-uminmax.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-uminmax.ll @@ -1,14 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse2 | FileCheck %s -check-prefixes=CHECK,SSE2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+ssse3 | FileCheck %s -check-prefixes=CHECK,SSSE3 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse4.2 | FileCheck %s -check-prefixes=CHECK,SSE42 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx | FileCheck %s -check-prefixes=CHECK,AVX1 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx2 | FileCheck %s -check-prefixes=CHECK,AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f | FileCheck %s -check-prefixes=CHECK,AVX512F -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512vl,+avx512dq | FileCheck %s -check-prefixes=CHECK,AVX512DQ -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512vl,+avx512bw | FileCheck %s -check-prefixes=CHECK,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse2 | FileCheck %s -check-prefixes=SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+ssse3 | FileCheck %s -check-prefixes=SSSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx | FileCheck %s -check-prefixes=AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx2 | FileCheck %s -check-prefixes=AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f | FileCheck %s -check-prefixes=AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512vl,+avx512dq | FileCheck %s -check-prefixes=AVX512DQ +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512vl,+avx512bw | FileCheck %s -check-prefixes=AVX512BW -; CHECK: {{^}} declare i64 @llvm.umax.i64(i64, i64) declare <2 x i64> @llvm.umax.v2i64(<2 x i64>, <2 x i64>) declare <4 x i64> @llvm.umax.v4i64(<4 x i64>, <4 x i64>) diff --git a/llvm/test/Analysis/CostModel/X86/arith-usat.ll b/llvm/test/Analysis/CostModel/X86/arith-usat.ll index e7b2b413aa1ced..8c0e818c20c233 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-usat.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-usat.ll @@ -1,17 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSSE3 -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE42 -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1 -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2 -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512F -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512BW -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512DQ +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=AVX1 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512F +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512BW +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512DQ ; -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SLM -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,GLM -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,BTVER2 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=SLM +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=GLM +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=BTVER2 -; CHECK: {{^}} target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/llvm/test/Analysis/CostModel/X86/arith.ll b/llvm/test/Analysis/CostModel/X86/arith.ll index fdc3a519e1d6fb..f0bf501f605e71 100644 --- a/llvm/test/Analysis/CostModel/X86/arith.ll +++ b/llvm/test/Analysis/CostModel/X86/arith.ll @@ -1,17 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSE,SSSE3 -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512F -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512BW -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512DQ +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512F +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512BW +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512DQ ; -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SLM -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,GLM -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX,BTVER2 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=SSE,SLM +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,GLM +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,BTVER2 -; CHECK: {{^}} target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/llvm/test/Analysis/CostModel/X86/bitreverse.ll b/llvm/test/Analysis/CostModel/X86/bitreverse.ll index ddbcadb19fdbe6..2497c4bc4fa14c 100644 --- a/llvm/test/Analysis/CostModel/X86/bitreverse.ll +++ b/llvm/test/Analysis/CostModel/X86/bitreverse.ll @@ -1,20 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze -mattr=+sse2 | FileCheck %s -check-prefixes=CHECK,X86,SSE2 -; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze -mattr=+sse4.2 | FileCheck %s -check-prefixes=CHECK,X86,SSE42 -; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze -mattr=+avx | FileCheck %s -check-prefixes=CHECK,X86,AVX,AVX1 -; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze -mattr=+avx2 | FileCheck %s -check-prefixes=CHECK,X86,AVX,AVX2 -; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f | FileCheck %s -check-prefixes=CHECK,X86,AVX512,AVX512F -; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze -mattr=+avx512vl,avx512bw,avx512dq | FileCheck %s -check-prefixes=CHECK,X86,AVX512,AVX512BW -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse2 | FileCheck %s -check-prefixes=CHECK,X64,SSE2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse4.2 | FileCheck %s -check-prefixes=CHECK,X64,SSE42 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx | FileCheck %s -check-prefixes=CHECK,X64,AVX,AVX1 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx2 | FileCheck %s -check-prefixes=CHECK,X64,AVX,AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f | FileCheck %s -check-prefixes=CHECK,X64,AVX512,AVX512F -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512vl,+avx512bw,+avx512dq | FileCheck %s -check-prefixes=CHECK,X64,AVX512,AVX512BW -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+xop | FileCheck %s -check-prefixes=CHECK,XOP -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+xop,+avx2 | FileCheck %s -check-prefixes=CHECK,XOP +; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze -mattr=+sse2 | FileCheck %s -check-prefixes=X86,SSE2 +; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze -mattr=+sse4.2 | FileCheck %s -check-prefixes=X86,SSE42 +; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze -mattr=+avx | FileCheck %s -check-prefixes=X86,AVX,AVX1 +; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze -mattr=+avx2 | FileCheck %s -check-prefixes=X86,AVX,AVX2 +; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f | FileCheck %s -check-prefixes=X86,AVX512,AVX512F +; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze -mattr=+avx512vl,avx512bw,avx512dq | FileCheck %s -check-prefixes=X86,AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse2 | FileCheck %s -check-prefixes=X64,SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse4.2 | FileCheck %s -check-prefixes=X64,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx | FileCheck %s -check-prefixes=X64,AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx2 | FileCheck %s -check-prefixes=X64,AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f | FileCheck %s -check-prefixes=X64,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512vl,+avx512bw,+avx512dq | FileCheck %s -check-prefixes=X64,AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+xop | FileCheck %s -check-prefixes=XOP +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+xop,+avx2 | FileCheck %s -check-prefixes=XOP -; CHECK: {{^}} ; Verify the cost of scalar bitreverse instructions. declare i64 @llvm.bitreverse.i64(i64) diff --git a/llvm/test/Analysis/CostModel/X86/bswap.ll b/llvm/test/Analysis/CostModel/X86/bswap.ll index 69dd328a521934..c8d1ddf656a795 100644 --- a/llvm/test/Analysis/CostModel/X86/bswap.ll +++ b/llvm/test/Analysis/CostModel/X86/bswap.ll @@ -1,12 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse2 | FileCheck %s -check-prefixes=CHECK,SSE2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse4.2 | FileCheck %s -check-prefixes=CHECK,SSE42 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx | FileCheck %s -check-prefixes=CHECK,AVX,AVX1 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx2 | FileCheck %s -check-prefixes=CHECK,AVX,AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+xop,+avx | FileCheck %s -check-prefixes=CHECK,AVX,AVX1 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+xop,+avx2 | FileCheck %s -check-prefixes=CHECK,AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse2 | FileCheck %s -check-prefixes=SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+xop,+avx | FileCheck %s -check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+xop,+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 -; CHECK: {{^}} ; Verify the cost of vector bswap instructions. declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) diff --git a/llvm/test/Analysis/CostModel/X86/cast.ll b/llvm/test/Analysis/CostModel/X86/cast.ll index abc0ea19cb8ce4..f388eecf26d6b6 100644 --- a/llvm/test/Analysis/CostModel/X86/cast.ll +++ b/llvm/test/Analysis/CostModel/X86/cast.ll @@ -1,13 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2 -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE41 -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1 -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2 -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512DQ +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE41 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=AVX1 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ -; CHECK: {{^}} define i32 @add(i32 %arg) { ; SSE-LABEL: 'add' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A = zext <4 x i1> undef to <4 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/ctlz.ll b/llvm/test/Analysis/CostModel/X86/ctlz.ll index 5bf2fc2f80a9dd..a897e3783eb530 100644 --- a/llvm/test/Analysis/CostModel/X86/ctlz.ll +++ b/llvm/test/Analysis/CostModel/X86/ctlz.ll @@ -1,15 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=-lzcnt,+sse2 | FileCheck %s -check-prefixes=CHECK,SSE2,NOLZCNT -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+lzcnt,+sse2 | FileCheck %s -check-prefixes=CHECK,SSE2,LZCNT -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+lzcnt,+sse4.2 | FileCheck %s -check-prefixes=CHECK,LZCNT,SSE42 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+lzcnt,+avx | FileCheck %s -check-prefixes=CHECK,LZCNT,AVX1 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+lzcnt,+avx2 | FileCheck %s -check-prefixes=CHECK,LZCNT,AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+lzcnt,+avx512f | FileCheck %s -check-prefixes=CHECK,LZCNT,AVX512,AVX512F -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+lzcnt,+avx512vl,+avx512bw,+avx512dq | FileCheck %s -check-prefixes=CHECK,LZCNT,AVX512,AVX512BW -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+lzcnt,+avx512vl,+avx512bw,+avx512dq,+avx512cd | FileCheck %s -check-prefixes=CHECK,LZCNT,AVX512CD +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=-lzcnt,+sse2 | FileCheck %s -check-prefixes=SSE2,NOLZCNT +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+lzcnt,+sse2 | FileCheck %s -check-prefixes=SSE2,LZCNT +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+lzcnt,+sse4.2 | FileCheck %s -check-prefixes=LZCNT,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+lzcnt,+avx | FileCheck %s -check-prefixes=LZCNT,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+lzcnt,+avx2 | FileCheck %s -check-prefixes=LZCNT,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+lzcnt,+avx512f | FileCheck %s -check-prefixes=LZCNT,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+lzcnt,+avx512vl,+avx512bw,+avx512dq | FileCheck %s -check-prefixes=LZCNT,AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+lzcnt,+avx512vl,+avx512bw,+avx512dq,+avx512cd | FileCheck %s -check-prefixes=LZCNT,AVX512CD ; Verify the cost of scalar leading zero count instructions. -; CHECK: {{^}} declare i64 @llvm.ctlz.i64(i64, i1) declare i32 @llvm.ctlz.i32(i32, i1) diff --git a/llvm/test/Analysis/CostModel/X86/ctpop.ll b/llvm/test/Analysis/CostModel/X86/ctpop.ll index 84d4fe143cc486..d15255a1697d2f 100644 --- a/llvm/test/Analysis/CostModel/X86/ctpop.ll +++ b/llvm/test/Analysis/CostModel/X86/ctpop.ll @@ -1,14 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=-popcnt,+sse2 | FileCheck %s -check-prefixes=CHECK,SSE2,NOPOPCNT -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+popcnt,+sse2 | FileCheck %s -check-prefixes=CHECK,SSE2,POPCNT -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+popcnt,+sse4.2 | FileCheck %s -check-prefixes=CHECK,POPCNT,SSE42 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+popcnt,+avx | FileCheck %s -check-prefixes=CHECK,POPCNT,AVX1 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+popcnt,+avx2 | FileCheck %s -check-prefixes=CHECK,POPCNT,AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+popcnt,+avx512f | FileCheck %s -check-prefixes=CHECK,POPCNT,AVX512,AVX512F -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+popcnt,+avx512vl,+avx512bw,+avx512dq | FileCheck %s -check-prefixes=CHECK,POPCNT,AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=-popcnt,+sse2 | FileCheck %s -check-prefixes=SSE2,NOPOPCNT +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+popcnt,+sse2 | FileCheck %s -check-prefixes=SSE2,POPCNT +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+popcnt,+sse4.2 | FileCheck %s -check-prefixes=POPCNT,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+popcnt,+avx | FileCheck %s -check-prefixes=POPCNT,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+popcnt,+avx2 | FileCheck %s -check-prefixes=POPCNT,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+popcnt,+avx512f | FileCheck %s -check-prefixes=POPCNT,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+popcnt,+avx512vl,+avx512bw,+avx512dq | FileCheck %s -check-prefixes=POPCNT,AVX512,AVX512BW ; Verify the cost of scalar population count instructions. -; CHECK: {{^}} declare i64 @llvm.ctpop.i64(i64) declare i32 @llvm.ctpop.i32(i32) diff --git a/llvm/test/Analysis/CostModel/X86/cttz.ll b/llvm/test/Analysis/CostModel/X86/cttz.ll index 3091c2ed141135..24266fdfa2ad44 100644 --- a/llvm/test/Analysis/CostModel/X86/cttz.ll +++ b/llvm/test/Analysis/CostModel/X86/cttz.ll @@ -1,13 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=-bmi,+sse2 | FileCheck %s -check-prefixes=CHECK,SSE2,NOBMI -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+bmi,+sse2 | FileCheck %s -check-prefixes=CHECK,SSE2,BMI -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+bmi,+sse4.2 | FileCheck %s -check-prefixes=CHECK,BMI,SSE42 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+bmi,+avx | FileCheck %s -check-prefixes=CHECK,BMI,AVX,AVX1 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+bmi,+avx2 | FileCheck %s -check-prefixes=CHECK,BMI,AVX,AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+bmi,+avx512f | FileCheck %s -check-prefixes=CHECK,BMI,AVX512,AVX512F -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+bmi,+avx512vl,+avx512bw,+avx512dq | FileCheck %s -check-prefixes=CHECK,BMI,AVX512,AVX512BW - -; CHECK: {{^}} +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=-bmi,+sse2 | FileCheck %s -check-prefixes=SSE2,NOBMI +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+bmi,+sse2 | FileCheck %s -check-prefixes=SSE2,BMI +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+bmi,+sse4.2 | FileCheck %s -check-prefixes=BMI,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+bmi,+avx | FileCheck %s -check-prefixes=BMI,AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+bmi,+avx2 | FileCheck %s -check-prefixes=BMI,AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+bmi,+avx512f | FileCheck %s -check-prefixes=BMI,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+bmi,+avx512vl,+avx512bw,+avx512dq | FileCheck %s -check-prefixes=BMI,AVX512,AVX512BW + ; Verify the cost of scalar trailing zero count instructions. declare i64 @llvm.cttz.i64(i64, i1) diff --git a/llvm/test/Analysis/CostModel/X86/div.ll b/llvm/test/Analysis/CostModel/X86/div.ll index 608e15018e9a79..98a1d2380df9b0 100644 --- a/llvm/test/Analysis/CostModel/X86/div.ll +++ b/llvm/test/Analysis/CostModel/X86/div.ll @@ -11,7 +11,6 @@ ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE42 ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,BTVER2 -; CHECK: {{^}} define i32 @sdiv() { ; CHECK-LABEL: 'sdiv' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, undef diff --git a/llvm/test/Analysis/CostModel/X86/extend.ll b/llvm/test/Analysis/CostModel/X86/extend.ll index 346073b00e1023..44325c1d6aa8e8 100644 --- a/llvm/test/Analysis/CostModel/X86/extend.ll +++ b/llvm/test/Analysis/CostModel/X86/extend.ll @@ -1,17 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2 -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSSE3 -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE42 -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1 -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2 -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW -; -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE42 -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE42 -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,BTVER2 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=AVX1 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=BTVER2 -; CHECK: {{^}} define i32 @zext_vXi32() { ; SSE2-LABEL: 'zext_vXi32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %I64 = zext i32 undef to i64 diff --git a/llvm/test/Analysis/CostModel/X86/fcmp.ll b/llvm/test/Analysis/CostModel/X86/fcmp.ll index ceac3606f6223c..00929d0d328ae1 100644 --- a/llvm/test/Analysis/CostModel/X86/fcmp.ll +++ b/llvm/test/Analysis/CostModel/X86/fcmp.ll @@ -1,19 +1,18 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2 -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse3 | FileCheck %s --check-prefixes=CHECK,SSE3 -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSSE3 -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE41 -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE42 -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512 -; -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE42 -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE42 -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse3 | FileCheck %s --check-prefixes=SSE3 +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3 +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE41 +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512 +; +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=AVX -; CHECK: {{^}} define i32 @cmp_float_oeq(i32 %arg) { ; SSE2-LABEL: 'cmp_float_oeq' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp oeq float undef, undef diff --git a/llvm/test/Analysis/CostModel/X86/reduce-fadd.ll b/llvm/test/Analysis/CostModel/X86/reduce-fadd.ll index 5495280d4acdd2..d918eaa9e652b3 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-fadd.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-fadd.ll @@ -9,7 +9,6 @@ ; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512 ; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512 -; CHECK: {{^}} define void @reduce_f64(double %arg) { ; SSE2-LABEL: 'reduce_f64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call double @llvm.vector.reduce.fadd.v1f64(double %arg, <1 x double> undef) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-fmul.ll b/llvm/test/Analysis/CostModel/X86/reduce-fmul.ll index a3926eaec82d1d..e74f7ff02b2ba9 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-fmul.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-fmul.ll @@ -9,7 +9,6 @@ ; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512 ; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512 -; CHECK: {{^}} define void @reduce_f64(double %arg) { ; SSE2-LABEL: 'reduce_f64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call double @llvm.vector.reduce.fmul.v1f64(double %arg, <1 x double> undef) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-smax.ll b/llvm/test/Analysis/CostModel/X86/reduce-smax.ll index 1313c65a1e51ed..ee3cffa69896ef 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-smax.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-smax.ll @@ -1,15 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2 -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSSE3 -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE4,SSE41 -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE4,SSE42 -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1 -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2 -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512DQ +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3 +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE4,SSE41 +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE4,SSE42 +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx | FileCheck %s --check-prefixes=AVX1 +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ -; CHECK: {{^}} define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-smin.ll b/llvm/test/Analysis/CostModel/X86/reduce-smin.ll index 457d327de4f630..2a48aaf97c24cb 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-smin.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-smin.ll @@ -1,15 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2 -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSSE3 -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE4,SSE41 -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE4,SSE42 -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1 -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2 -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512DQ +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3 +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE4,SSE41 +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE4,SSE42 +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx | FileCheck %s --check-prefixes=AVX1 +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ -; CHECK: {{^}} define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-umax.ll b/llvm/test/Analysis/CostModel/X86/reduce-umax.ll index 7a4ae17f45cb91..7be97c184cd864 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-umax.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-umax.ll @@ -1,15 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2 -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSSE3 -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE4,SSE41 -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE4,SSE42 -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1 -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2 -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512DQ +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3 +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE4,SSE41 +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE4,SSE42 +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx | FileCheck %s --check-prefixes=AVX1 +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ -; CHECK: {{^}} define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-umin.ll b/llvm/test/Analysis/CostModel/X86/reduce-umin.ll index d665c07c403a56..4916a98ad962d3 100644 --- a/llvm/test/Analysis/CostModel/X86/reduce-umin.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-umin.ll @@ -1,15 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2 -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSSE3 -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE4,SSE41 -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE4,SSE42 -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1 -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2 -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW -; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512DQ +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3 +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE4,SSE41 +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE4,SSE42 +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx | FileCheck %s --check-prefixes=AVX1 +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ -; CHECK: {{^}} define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) diff --git a/llvm/test/Analysis/LoopAccessAnalysis/memcheck-wrapping-pointers.ll b/llvm/test/Analysis/LoopAccessAnalysis/memcheck-wrapping-pointers.ll index 94034bfd6fbc09..3d459a90b839cd 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/memcheck-wrapping-pointers.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/memcheck-wrapping-pointers.ll @@ -46,10 +46,10 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" ; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: ; CHECK: Expressions re-written: ; CHECK-NEXT: [PSE] %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom: -; CHECK-NEXT: ((4 * (zext i32 {1,+,1}<%for.body> to i64)) + %a) +; CHECK-NEXT: ((4 * (zext i32 {1,+,1}<%for.body> to i64)) + %a) ; CHECK-NEXT: --> {(4 + %a),+,4}<%for.body> ; CHECK-NEXT: [PSE] %arrayidx4 = getelementptr inbounds i32, i32* %b, i64 %conv11: -; CHECK-NEXT: ((4 * (zext i32 {0,+,1}<%for.body> to i64)) + %b) +; CHECK-NEXT: ((4 * (zext i32 {0,+,1}<%for.body> to i64)) + %b) ; CHECK-NEXT: --> {%b,+,4}<%for.body> define void @test1(i64 %x, i32* %a, i32* %b) { entry: diff --git a/llvm/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll b/llvm/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll index 34dddbe5cc1b33..e6416cfb491184 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll @@ -97,11 +97,11 @@ for.end: ; preds = %for.body ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group {{.*}}[[ZERO]]: ; CHECK-NEXT: (Low: %c High: (80 + %c)) -; CHECK-NEXT: Member: {(2 + %c),+,4} +; CHECK-NEXT: Member: {(2 + %c),+,4} ; CHECK-NEXT: Member: {%c,+,4} ; CHECK-NEXT: Group {{.*}}[[ONE]]: ; CHECK-NEXT: (Low: %a High: (42 + %a)) -; CHECK-NEXT: Member: {(2 + %a),+,2} +; CHECK-NEXT: Member: {(2 + %a),+,2} ; CHECK-NEXT: Member: {%a,+,2} ; CHECK-NEXT: Group {{.*}}[[TWO]]: ; CHECK-NEXT: (Low: %b High: (40 + %b)) @@ -169,7 +169,7 @@ for.end: ; preds = %for.body ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group {{.*}}[[ZERO]]: ; CHECK-NEXT: (Low: %c High: (80 + %c)) -; CHECK-NEXT: Member: {(2 + %c),+,4} +; CHECK-NEXT: Member: {(2 + %c),+,4} ; CHECK-NEXT: Member: {%c,+,4} ; CHECK-NEXT: Group {{.*}}[[ONE]]: ; CHECK-NEXT: (Low: %a High: (42 + %a)) @@ -247,8 +247,8 @@ for.end: ; preds = %for.body ; CHECK-NEXT: %arrayidxA2 = getelementptr i16, i16* %a, i64 %ind2 ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group {{.*}}[[ZERO]]: -; CHECK-NEXT: (Low: ((2 * %offset) + %a) High: (10000 + (2 * %offset) + %a)) -; CHECK-NEXT: Member: {((2 * %offset) + %a),+,2}<%for.body> +; CHECK-NEXT: (Low: ((2 * %offset) + %a) High: (10000 + (2 * %offset) + %a)) +; CHECK-NEXT: Member: {((2 * %offset) + %a),+,2}<%for.body> ; CHECK-NEXT: Group {{.*}}[[ONE]]: ; CHECK-NEXT: (Low: %a High: (10000 + %a)) ; CHECK-NEXT: Member: {%a,+,2}<%for.body> diff --git a/llvm/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll b/llvm/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll index 8113c8d7106b2f..be51167dab8a9a 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll @@ -58,8 +58,8 @@ for.end: ; preds = %for.body ; Here it is not obvious what the limits are, since 'step' could be negative. -; CHECK: Low: ((60000 + %a) umin (60000 + (-40000 * %step) + %a)) -; CHECK: High: (4 + ((60000 + %a) umax (60000 + (-40000 * %step) + %a))) +; CHECK: Low: ((60000 + %a) umin (60000 + (-40000 * %step) + %a)) +; CHECK: High: (4 + ((60000 + %a) umax (60000 + (-40000 * %step) + %a))) define void @g(i64 %step) { entry: diff --git a/llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll b/llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll index 5b5c821e998307..80ada5292d0979 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll @@ -365,7 +365,7 @@ for.end: ; preds = %for.body ; LAA-NEXT: {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64)) + %a),+,-4}<%for.body> Added Flags: ; LAA: [PSE] %arrayidxA = getelementptr inbounds i16, i16* %a, i32 %mul: -; LAA-NEXT: ((2 * (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64)) + %a) +; LAA-NEXT: ((2 * (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64)) + %a) ; LAA-NEXT: --> {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64)) + %a),+,-4}<%for.body> ; LV-LABEL: f5 diff --git a/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll b/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll index c60eafd413d54c..a8575c4f66fb32 100644 --- a/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll +++ b/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll @@ -29,7 +29,7 @@ define i32 @d(i32 %base) { ; CHECK-NEXT: %idxprom = sext i32 %f.0 to i64 ; CHECK-NEXT: --> {(sext i32 %base to i64),+,1}<%for.cond> U: [-2147483648,-9223372036854775808) S: [-2147483648,-9223372036854775808) Exits: <> LoopDispositions: { %for.cond: Computable } ; CHECK-NEXT: %arrayidx = getelementptr inbounds [1 x [1 x i8]], [1 x [1 x i8]]* %e, i64 0, i64 %idxprom -; CHECK-NEXT: --> {((sext i32 %base to i64) + %e),+,1}<%for.cond> U: full-set S: full-set Exits: <> LoopDispositions: { %for.cond: Computable } +; CHECK-NEXT: --> {((sext i32 %base to i64) + %e),+,1}<%for.cond> U: full-set S: full-set Exits: <> LoopDispositions: { %for.cond: Computable } ; CHECK-NEXT: %1 = load i32*, i32** @c, align 8 ; CHECK-NEXT: --> %1 U: full-set S: full-set Exits: <> LoopDispositions: { %for.cond: Variant } ; CHECK-NEXT: %sub.ptr.lhs.cast = ptrtoint i32* %1 to i64 @@ -39,7 +39,7 @@ define i32 @d(i32 %base) { ; CHECK-NEXT: %sub.ptr.div = sdiv exact i64 %sub.ptr.sub, 4 ; CHECK-NEXT: --> %sub.ptr.div U: full-set S: [-2305843009213693952,2305843009213693952) Exits: <> LoopDispositions: { %for.cond: Variant } ; CHECK-NEXT: %arrayidx1 = getelementptr inbounds [1 x i8], [1 x i8]* %arrayidx, i64 0, i64 %sub.ptr.div -; CHECK-NEXT: --> ({((sext i32 %base to i64) + %e),+,1}<%for.cond> + %sub.ptr.div) U: full-set S: full-set Exits: <> LoopDispositions: { %for.cond: Variant } +; CHECK-NEXT: --> ({((sext i32 %base to i64) + %e),+,1}<%for.cond> + %sub.ptr.div) U: full-set S: full-set Exits: <> LoopDispositions: { %for.cond: Variant } ; CHECK-NEXT: %2 = load i8, i8* %arrayidx1, align 1 ; CHECK-NEXT: --> %2 U: full-set S: full-set Exits: <> LoopDispositions: { %for.cond: Variant } ; CHECK-NEXT: %conv = sext i8 %2 to i32 diff --git a/llvm/test/Analysis/ScalarEvolution/load.ll b/llvm/test/Analysis/ScalarEvolution/load.ll index 707908ff78cdf5..f41d20cc95769d 100644 --- a/llvm/test/Analysis/ScalarEvolution/load.ll +++ b/llvm/test/Analysis/ScalarEvolution/load.ll @@ -17,11 +17,11 @@ define i32 @test1() nounwind readnone { ; CHECK-NEXT: %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.body ] ; CHECK-NEXT: --> {0,+,1}<%for.body> U: [0,50) S: [0,50) Exits: 49 LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: %arrayidx = getelementptr inbounds [50 x i32], [50 x i32]* @arr1, i32 0, i32 %i.03 -; CHECK-NEXT: --> {@arr1,+,4}<%for.body> U: [0,-3) S: [-2147483648,2147483645) Exits: (196 + @arr1) LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {@arr1,+,4}<%for.body> U: [0,-3) S: [-2147483648,2147483645) Exits: (196 + @arr1) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: %0 = load i32, i32* %arrayidx, align 4 ; CHECK-NEXT: --> %0 U: full-set S: full-set Exits: 50 LoopDispositions: { %for.body: Variant } ; CHECK-NEXT: %arrayidx1 = getelementptr inbounds [50 x i32], [50 x i32]* @arr2, i32 0, i32 %i.03 -; CHECK-NEXT: --> {@arr2,+,4}<%for.body> U: [0,-3) S: [-2147483648,2147483645) Exits: (196 + @arr2) LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {@arr2,+,4}<%for.body> U: [0,-3) S: [-2147483648,2147483645) Exits: (196 + @arr2) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: %1 = load i32, i32* %arrayidx1, align 4 ; CHECK-NEXT: --> %1 U: full-set S: full-set Exits: 0 LoopDispositions: { %for.body: Variant } ; CHECK-NEXT: %add = add i32 %0, %sum.04 @@ -74,7 +74,7 @@ define i32 @test2() nounwind uwtable readonly { ; CHECK-NEXT: %n.01 = phi %struct.ListNode* [ bitcast ({ %struct.ListNode*, i32, [4 x i8] }* @node5 to %struct.ListNode*), %entry ], [ %1, %for.body ] ; CHECK-NEXT: --> %n.01 U: full-set S: full-set Exits: @node1 LoopDispositions: { %for.body: Variant } ; CHECK-NEXT: %i = getelementptr inbounds %struct.ListNode, %struct.ListNode* %n.01, i64 0, i32 1 -; CHECK-NEXT: --> (4 + %n.01) U: [-2147483644,-2147483648) S: [-2147483644,-2147483648) Exits: (4 + @node1) LoopDispositions: { %for.body: Variant } +; CHECK-NEXT: --> (4 + %n.01) U: [4,0) S: [4,0) Exits: (4 + @node1) LoopDispositions: { %for.body: Variant } ; CHECK-NEXT: %0 = load i32, i32* %i, align 4 ; CHECK-NEXT: --> %0 U: full-set S: full-set Exits: 0 LoopDispositions: { %for.body: Variant } ; CHECK-NEXT: %add = add nsw i32 %0, %sum.02 diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll index 12094ac14e7177..48dc484635a6ff 100644 --- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll +++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll @@ -10,7 +10,7 @@ define void @test_guard_less_than_16(i32* nocapture %a, i64 %i) { ; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ %i, %entry ] ; CHECK-NEXT: --> {%i,+,1}<%loop> U: full-set S: full-set Exits: 15 LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %idx = getelementptr inbounds i32, i32* %a, i64 %iv -; CHECK-NEXT: --> {((4 * %i) + %a),+,4}<%loop> U: full-set S: full-set Exits: (60 + %a) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {((4 * %i) + %a),+,4}<%loop> U: full-set S: full-set Exits: (60 + %a) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1 ; CHECK-NEXT: --> {(1 + %i),+,1}<%loop> U: full-set S: full-set Exits: 16 LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test_guard_less_than_16 @@ -42,7 +42,7 @@ define void @test_guard_less_than_16_operands_swapped(i32* nocapture %a, i64 %i) ; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ %i, %entry ] ; CHECK-NEXT: --> {%i,+,1}<%loop> U: full-set S: full-set Exits: 15 LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %idx = getelementptr inbounds i32, i32* %a, i64 %iv -; CHECK-NEXT: --> {((4 * %i) + %a),+,4}<%loop> U: full-set S: full-set Exits: (60 + %a) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {((4 * %i) + %a),+,4}<%loop> U: full-set S: full-set Exits: (60 + %a) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1 ; CHECK-NEXT: --> {(1 + %i),+,1}<%loop> U: full-set S: full-set Exits: 16 LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test_guard_less_than_16_operands_swapped @@ -74,7 +74,7 @@ define void @test_guard_less_than_16_branches_flipped(i32* nocapture %a, i64 %i) ; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ %i, %entry ] ; CHECK-NEXT: --> {%i,+,1}<%loop> U: full-set S: full-set Exits: 15 LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %idx = getelementptr inbounds i32, i32* %a, i64 %iv -; CHECK-NEXT: --> {((4 * %i) + %a),+,4}<%loop> U: full-set S: full-set Exits: (60 + %a) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {((4 * %i) + %a),+,4}<%loop> U: full-set S: full-set Exits: (60 + %a) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1 ; CHECK-NEXT: --> {(1 + %i),+,1}<%loop> U: full-set S: full-set Exits: 16 LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test_guard_less_than_16_branches_flipped @@ -106,7 +106,7 @@ define void @test_guard_uge_16_branches_flipped(i32* nocapture %a, i64 %i) { ; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ %i, %entry ] ; CHECK-NEXT: --> {%i,+,1}<%loop> U: full-set S: full-set Exits: 15 LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %idx = getelementptr inbounds i32, i32* %a, i64 %iv -; CHECK-NEXT: --> {((4 * %i) + %a),+,4}<%loop> U: full-set S: full-set Exits: (60 + %a) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {((4 * %i) + %a),+,4}<%loop> U: full-set S: full-set Exits: (60 + %a) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1 ; CHECK-NEXT: --> {(1 + %i),+,1}<%loop> U: full-set S: full-set Exits: 16 LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test_guard_uge_16_branches_flipped @@ -138,7 +138,7 @@ define void @test_guard_eq_12(i32* nocapture %a, i64 %N) { ; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] ; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,13) S: [0,13) Exits: %N LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %idx = getelementptr inbounds i32, i32* %a, i64 %iv -; CHECK-NEXT: --> {%a,+,4}<%loop> U: full-set S: full-set Exits: ((4 * %N) + %a) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%a,+,4}<%loop> U: full-set S: full-set Exits: ((4 * %N) + %a) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1 ; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,14) S: [1,14) Exits: (1 + %N) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test_guard_eq_12 @@ -170,7 +170,7 @@ define void @test_guard_ule_12(i32* nocapture %a, i64 %N) { ; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] ; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,13) S: [0,13) Exits: %N LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %idx = getelementptr inbounds i32, i32* %a, i64 %iv -; CHECK-NEXT: --> {%a,+,4}<%loop> U: full-set S: full-set Exits: ((4 * %N) + %a) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%a,+,4}<%loop> U: full-set S: full-set Exits: ((4 * %N) + %a) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1 ; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,14) S: [1,14) Exits: (1 + %N) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test_guard_ule_12 @@ -202,7 +202,7 @@ define void @test_guard_ule_12_step2(i32* nocapture %a, i64 %N) { ; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] ; CHECK-NEXT: --> {0,+,2}<%loop> U: [0,-9223372036854775808) S: [0,9223372036854775807) Exits: (2 * (%N /u 2)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %idx = getelementptr inbounds i32, i32* %a, i64 %iv -; CHECK-NEXT: --> {%a,+,8}<%loop> U: full-set S: full-set Exits: ((8 * (%N /u 2)) + %a) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%a,+,8}<%loop> U: full-set S: full-set Exits: ((8 * (%N /u 2)) + %a) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 2 ; CHECK-NEXT: --> {2,+,2}<%loop> U: [2,-1) S: [-9223372036854775808,9223372036854775807) Exits: (2 + (2 * (%N /u 2))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test_guard_ule_12_step2 @@ -234,7 +234,7 @@ define void @test_multiple_const_guards_order1(i32* nocapture %a, i64 %i) { ; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ] ; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,10) S: [0,10) Exits: %i LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %idx = getelementptr inbounds i32, i32* %a, i64 %iv -; CHECK-NEXT: --> {%a,+,4}<%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%a,+,4}<%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1 ; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,11) S: [1,11) Exits: (1 + %i) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test_multiple_const_guards_order1 @@ -270,7 +270,7 @@ define void @test_multiple_const_guards_order2(i32* nocapture %a, i64 %i) { ; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ] ; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,10) S: [0,10) Exits: %i LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %idx = getelementptr inbounds i32, i32* %a, i64 %iv -; CHECK-NEXT: --> {%a,+,4}<%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%a,+,4}<%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1 ; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,11) S: [1,11) Exits: (1 + %i) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test_multiple_const_guards_order2 @@ -307,7 +307,7 @@ define void @test_multiple_var_guards_order1(i32* nocapture %a, i64 %i, i64 %N) ; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ] ; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,-9223372036854775808) S: [0,-9223372036854775808) Exits: %i LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %idx = getelementptr inbounds i32, i32* %a, i64 %iv -; CHECK-NEXT: --> {%a,+,4}<%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%a,+,4}<%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1 ; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,0) S: [1,0) Exits: (1 + %i) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test_multiple_var_guards_order1 @@ -344,7 +344,7 @@ define void @test_multiple_var_guards_order2(i32* nocapture %a, i64 %i, i64 %N) ; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ] ; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,-9223372036854775808) S: [0,-9223372036854775808) Exits: %i LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %idx = getelementptr inbounds i32, i32* %a, i64 %iv -; CHECK-NEXT: --> {%a,+,4}<%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%a,+,4}<%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1 ; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,0) S: [1,0) Exits: (1 + %i) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test_multiple_var_guards_order2 @@ -381,7 +381,7 @@ define void @test_multiple_var_guards_cycle(i32* nocapture %a, i64 %i, i64 %N) { ; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ] ; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,-9223372036854775808) S: [0,-9223372036854775808) Exits: %N LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %idx = getelementptr inbounds i32, i32* %a, i64 %iv -; CHECK-NEXT: --> {%a,+,4}<%loop> U: full-set S: full-set Exits: ((4 * %N) + %a) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%a,+,4}<%loop> U: full-set S: full-set Exits: ((4 * %N) + %a) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1 ; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,0) S: [1,0) Exits: (1 + %N) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test_multiple_var_guards_cycle @@ -417,7 +417,7 @@ define void @test_guard_ult_ne(i32* nocapture readonly %data, i64 %count) { ; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ] ; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,4) S: [0,4) Exits: (-1 + %count) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %idx = getelementptr inbounds i32, i32* %data, i64 %iv -; CHECK-NEXT: --> {%data,+,4}<%loop> U: full-set S: full-set Exits: (-4 + (4 * %count) + %data) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%data,+,4}<%loop> U: full-set S: full-set Exits: (-4 + (4 * %count) + %data) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add nuw i64 %iv, 1 ; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,5) S: [1,5) Exits: %count LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test_guard_ult_ne @@ -456,7 +456,7 @@ define void @test_guard_and_assume(i32* nocapture readonly %data, i64 %count) { ; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] ; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,4) S: [0,4) Exits: (-1 + %count) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %idx = getelementptr inbounds i32, i32* %data, i64 %iv -; CHECK-NEXT: --> {%data,+,4}<%loop> U: full-set S: full-set Exits: (-4 + (4 * %count) + %data) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%data,+,4}<%loop> U: full-set S: full-set Exits: (-4 + (4 * %count) + %data) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add nuw i64 %iv, 1 ; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,5) S: [1,5) Exits: %count LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test_guard_and_assume @@ -537,7 +537,7 @@ define void @crash(i8* %ptr) { ; CHECK-NEXT: %lastout.2271 = phi i8* [ %incdec.ptr126, %while.body125 ], [ %ptr, %while.end117 ] ; CHECK-NEXT: --> {%ptr,+,1}<%while.body125> U: full-set S: full-set Exits: {-2,+,-1}<%while.cond111> LoopDispositions: { %while.body125: Computable } ; CHECK-NEXT: %incdec.ptr126 = getelementptr inbounds i8, i8* %lastout.2271, i64 1 -; CHECK-NEXT: --> {(1 + %ptr),+,1}<%while.body125> U: full-set S: full-set Exits: {-1,+,-1}<%while.cond111> LoopDispositions: { %while.body125: Computable } +; CHECK-NEXT: --> {(1 + %ptr),+,1}<%while.body125> U: [1,0) S: [1,0) Exits: {-1,+,-1}<%while.cond111> LoopDispositions: { %while.body125: Computable } ; CHECK-NEXT: Determining loop execution counts for: @crash ; CHECK-NEXT: Loop %while.body125: backedge-taken count is {(-2 + (-1 * %ptr)),+,-1}<%while.cond111> ; CHECK-NEXT: Loop %while.body125: max backedge-taken count is -1 diff --git a/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll b/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll index e2fc84c8d71c31..fd34306861ea0d 100644 --- a/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll +++ b/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll @@ -210,7 +210,7 @@ define void @f3(i8* %x_addr, i8* %y_addr, i32* %tmp_addr) { ; CHECK-NEXT: %sunkaddr3 = mul i64 %add4.zext, 4 ; CHECK-NEXT: --> (4 * (zext i32 (4 + (4 * (%tmp /u 4))) to i64)) U: [0,17179869169) S: [0,17179869181) ; CHECK-NEXT: %sunkaddr4 = getelementptr inbounds i8, i8* bitcast ({ %union, [2000 x i8] }* @tmp_addr to i8*), i64 %sunkaddr3 -; CHECK-NEXT: --> ((4 * (zext i32 (4 + (4 * (%tmp /u 4))) to i64)) + @tmp_addr) U: [0,-3) S: [-9223372036854775808,9223372036854775805) +; CHECK-NEXT: --> ((4 * (zext i32 (4 + (4 * (%tmp /u 4))) to i64)) + @tmp_addr) U: [0,-3) S: [-9223372036854775808,9223372036854775805) ; CHECK-NEXT: %sunkaddr5 = getelementptr inbounds i8, i8* %sunkaddr4, i64 4096 ; CHECK-NEXT: --> (4096 + (4 * (zext i32 (4 + (4 * (%tmp /u 4))) to i64)) + @tmp_addr) U: [0,-3) S: [-9223372036854775808,9223372036854775805) ; CHECK-NEXT: %addr4.cast = bitcast i8* %sunkaddr5 to i32* diff --git a/llvm/test/Analysis/ScalarEvolution/nsw-offset-assume.ll b/llvm/test/Analysis/ScalarEvolution/nsw-offset-assume.ll index b6867ca471f7a8..0647d328fe5149 100644 --- a/llvm/test/Analysis/ScalarEvolution/nsw-offset-assume.ll +++ b/llvm/test/Analysis/ScalarEvolution/nsw-offset-assume.ll @@ -26,7 +26,7 @@ bb: ; preds = %bb.nph, %bb1 %1 = sext i32 %i.01 to i64 ; [#uses=1] ; CHECK: %2 = getelementptr inbounds double, double* %d, i64 %1 -; CHECK: --> {%d,+,16}<%bb> +; CHECK: --> {%d,+,16}<%bb> %2 = getelementptr inbounds double, double* %d, i64 %1 ; [#uses=1] %3 = load double, double* %2, align 8 ; [#uses=1] @@ -40,7 +40,7 @@ bb: ; preds = %bb.nph, %bb1 %8 = sext i32 %7 to i64 ; [#uses=1] ; CHECK: %9 = getelementptr inbounds double, double* %q, i64 %8 -; CHECK: {(8 + %q),+,16}<%bb> +; CHECK: {(8 + %q),+,16}<%bb> %9 = getelementptr inbounds double, double* %q, i64 %8 ; [#uses=1] ; Artificially repeat the above three instructions, this time using @@ -52,7 +52,7 @@ bb: ; preds = %bb.nph, %bb1 %t8 = sext i32 %t7 to i64 ; [#uses=1] ; CHECK: %t9 = getelementptr inbounds double, double* %q, i64 %t8 -; CHECK: {(8 + %q),+,16}<%bb> +; CHECK: {(8 + %q),+,16}<%bb> %t9 = getelementptr inbounds double, double* %q, i64 %t8 ; [#uses=1] %10 = load double, double* %9, align 8 ; [#uses=1] diff --git a/llvm/test/Analysis/ScalarEvolution/nsw-offset.ll b/llvm/test/Analysis/ScalarEvolution/nsw-offset.ll index 0310ff341516b9..6a4e76a32adb09 100644 --- a/llvm/test/Analysis/ScalarEvolution/nsw-offset.ll +++ b/llvm/test/Analysis/ScalarEvolution/nsw-offset.ll @@ -24,7 +24,7 @@ bb: ; preds = %bb.nph, %bb1 %1 = sext i32 %i.01 to i64 ; [#uses=1] ; CHECK: %2 = getelementptr inbounds double, double* %d, i64 %1 -; CHECK: --> {%d,+,16}<%bb> +; CHECK: --> {%d,+,16}<%bb> %2 = getelementptr inbounds double, double* %d, i64 %1 ; [#uses=1] %3 = load double, double* %2, align 8 ; [#uses=1] @@ -38,7 +38,7 @@ bb: ; preds = %bb.nph, %bb1 %8 = sext i32 %7 to i64 ; [#uses=1] ; CHECK: %9 = getelementptr inbounds double, double* %q, i64 %8 -; CHECK: {(8 + %q),+,16}<%bb> +; CHECK: {(8 + %q),+,16}<%bb> %9 = getelementptr inbounds double, double* %q, i64 %8 ; [#uses=1] ; Artificially repeat the above three instructions, this time using @@ -50,7 +50,7 @@ bb: ; preds = %bb.nph, %bb1 %t8 = sext i32 %t7 to i64 ; [#uses=1] ; CHECK: %t9 = getelementptr inbounds double, double* %q, i64 %t8 -; CHECK: {(8 + %q),+,16}<%bb> +; CHECK: {(8 + %q),+,16}<%bb> %t9 = getelementptr inbounds double, double* %q, i64 %t8 ; [#uses=1] %10 = load double, double* %9, align 8 ; [#uses=1] diff --git a/llvm/test/Analysis/ScalarEvolution/nsw.ll b/llvm/test/Analysis/ScalarEvolution/nsw.ll index 39f199868eaa5e..b80b3ada315ee5 100644 --- a/llvm/test/Analysis/ScalarEvolution/nsw.ll +++ b/llvm/test/Analysis/ScalarEvolution/nsw.ll @@ -41,7 +41,7 @@ bb1: ; preds = %bb ; CHECK-NEXT: --> {1,+,1}<%bb> %tmp9 = getelementptr inbounds double, double* %p, i64 %phitmp ; [#uses=1] ; CHECK: %tmp9 -; CHECK-NEXT: --> {(8 + %p),+,8}<%bb> +; CHECK-NEXT: --> {(8 + %p),+,8}<%bb> %tmp10 = load double, double* %tmp9, align 8 ; [#uses=1] %tmp11 = fcmp ogt double %tmp10, 2.000000e+00 ; [#uses=1] br i1 %tmp11, label %bb, label %bb1.return_crit_edge @@ -69,7 +69,7 @@ for.body.i.i: ; preds = %for.body.i.i, %for. store i32 0, i32* %__first.addr.02.i.i, align 4 %ptrincdec.i.i = getelementptr inbounds i32, i32* %__first.addr.02.i.i, i64 1 ; CHECK: %ptrincdec.i.i -; CHECK-NEXT: --> {(4 + %begin),+,4}<%for.body.i.i> +; CHECK-NEXT: --> {(4 + %begin),+,4}<%for.body.i.i> %cmp.i.i = icmp eq i32* %ptrincdec.i.i, %end br i1 %cmp.i.i, label %for.cond.for.end_crit_edge.i.i, label %for.body.i.i @@ -95,10 +95,10 @@ for.body.i.i: ; preds = %entry, %for.body.i. ; CHECK: {1,+,1}<%for.body.i.i> %ptrincdec.i.i = getelementptr inbounds i32, i32* %begin, i64 %tmp ; CHECK: %ptrincdec.i.i = -; CHECK: {(4 + %begin),+,4}<%for.body.i.i> +; CHECK: {(4 + %begin),+,4}<%for.body.i.i> %__first.addr.08.i.i = getelementptr inbounds i32, i32* %begin, i64 %indvar.i.i ; CHECK: %__first.addr.08.i.i -; CHECK: {%begin,+,4}<%for.body.i.i> +; CHECK: {%begin,+,4}<%for.body.i.i> store i32 0, i32* %__first.addr.08.i.i, align 4 %cmp.i.i = icmp eq i32* %ptrincdec.i.i, %end br i1 %cmp.i.i, label %_ZSt4fillIPiiEvT_S1_RKT0_.exit, label %for.body.i.i @@ -127,7 +127,8 @@ exit: } ; CHECK-LABEL: PR12375 -; CHECK: --> {(4 + %arg),+,4}<%bb1>{{ U: [^ ]+ S: [^ ]+}}{{ *}}Exits: (4 + (4 * ((-1 + (-1 * %arg) + ((4 + %arg) umax (8 + %arg))) /u 4)) + %arg) +; CHECK: --> {(4 + %arg),+,4}<%bb1>{{ U: [^ ]+ S: [^ ]+}}{{ *}}Exits: (8 + %arg) + define i32 @PR12375(i32* readnone %arg) { bb: %tmp = getelementptr inbounds i32, i32* %arg, i64 2 @@ -146,7 +147,7 @@ bb7: ; preds = %bb1 } ; CHECK-LABEL: PR12376 -; CHECK: --> {(4 + %arg),+,4}<%bb2>{{ U: [^ ]+ S: [^ ]+}}{{ *}}Exits: (4 + (4 * ((-1 + (-1 * %arg) + ((4 + %arg) umax %arg1)) /u 4)) + %arg) +; CHECK: --> {(4 + %arg),+,4}<%bb2>{{ U: [^ ]+ S: [^ ]+}}{{ *}}Exits: (4 + (4 * ((-1 + (-1 * %arg) + ((4 + %arg) umax %arg1)) /u 4)) + %arg) define void @PR12376(i32* nocapture %arg, i32* nocapture %arg1) { bb: br label %bb2 diff --git a/llvm/test/Analysis/ScalarEvolution/pr46786.ll b/llvm/test/Analysis/ScalarEvolution/pr46786.ll index 9255807f02c59d..f19a33cf355dcb 100644 --- a/llvm/test/Analysis/ScalarEvolution/pr46786.ll +++ b/llvm/test/Analysis/ScalarEvolution/pr46786.ll @@ -10,7 +10,7 @@ define i8* @FSE_decompress_usingDTable(i8* %arg, i32 %arg1, i32 %arg2, i32 %arg3 ; CHECK-LABEL: 'FSE_decompress_usingDTable' ; CHECK-NEXT: Classifying expressions for: @FSE_decompress_usingDTable ; CHECK-NEXT: %i = getelementptr inbounds i8, i8* %arg, i32 %arg2 -; CHECK-NEXT: --> (%arg2 + %arg) U: full-set S: full-set +; CHECK-NEXT: --> (%arg2 + %arg) U: full-set S: full-set ; CHECK-NEXT: %i4 = sub nsw i32 0, %arg1 ; CHECK-NEXT: --> (-1 * %arg1) U: full-set S: full-set ; CHECK-NEXT: %i5 = getelementptr inbounds i8, i8* %i, i32 %i4 diff --git a/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll b/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll index 70fb37984d8a7c..788a268e0f2272 100644 --- a/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll +++ b/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll @@ -194,17 +194,17 @@ define void @ptrtoint_of_gep(i8* %in, i64* %out0) { ; X64-LABEL: 'ptrtoint_of_gep' ; X64-NEXT: Classifying expressions for: @ptrtoint_of_gep ; X64-NEXT: %in_adj = getelementptr inbounds i8, i8* %in, i64 42 -; X64-NEXT: --> (42 + %in) U: [-9223372036854775766,-9223372036854775808) S: [-9223372036854775766,-9223372036854775808) +; X64-NEXT: --> (42 + %in) U: [42,0) S: [42,0) ; X64-NEXT: %p0 = ptrtoint i8* %in_adj to i64 -; X64-NEXT: --> (42 + (ptrtoint i8* %in to i64)) U: [-9223372036854775766,-9223372036854775808) S: [-9223372036854775766,-9223372036854775808) +; X64-NEXT: --> (42 + (ptrtoint i8* %in to i64)) U: [42,0) S: [42,0) ; X64-NEXT: Determining loop execution counts for: @ptrtoint_of_gep ; ; X32-LABEL: 'ptrtoint_of_gep' ; X32-NEXT: Classifying expressions for: @ptrtoint_of_gep ; X32-NEXT: %in_adj = getelementptr inbounds i8, i8* %in, i64 42 -; X32-NEXT: --> (42 + %in) U: [-2147483606,-2147483648) S: [-2147483606,-2147483648) +; X32-NEXT: --> (42 + %in) U: [42,0) S: [42,0) ; X32-NEXT: %p0 = ptrtoint i8* %in_adj to i64 -; X32-NEXT: --> (zext i32 (42 + (ptrtoint i8* %in to i32)) to i64) U: [0,4294967296) S: [0,4294967296) +; X32-NEXT: --> (42 + (zext i32 (ptrtoint i8* %in to i32) to i64)) U: [42,4294967338) S: [42,4294967338) ; X32-NEXT: Determining loop execution counts for: @ptrtoint_of_gep ; %in_adj = getelementptr inbounds i8, i8* %in, i64 42 @@ -224,9 +224,9 @@ define void @ptrtoint_of_addrec(i32* %in, i32 %count) { ; X64-NEXT: %i6 = phi i64 [ 0, %entry ], [ %i9, %loop ] ; X64-NEXT: --> {0,+,1}<%loop> U: [0,-9223372036854775808) S: [0,-9223372036854775808) Exits: (-1 + (zext i32 %count to i64)) LoopDispositions: { %loop: Computable } ; X64-NEXT: %i7 = getelementptr inbounds i32, i32* %in, i64 %i6 -; X64-NEXT: --> {%in,+,4}<%loop> U: full-set S: full-set Exits: (-4 + (4 * (zext i32 %count to i64)) + %in) LoopDispositions: { %loop: Computable } +; X64-NEXT: --> {%in,+,4}<%loop> U: full-set S: full-set Exits: (-4 + (4 * (zext i32 %count to i64)) + %in) LoopDispositions: { %loop: Computable } ; X64-NEXT: %i8 = ptrtoint i32* %i7 to i64 -; X64-NEXT: --> {(ptrtoint i32* %in to i64),+,4}<%loop> U: full-set S: full-set Exits: (-4 + (4 * (zext i32 %count to i64)) + (ptrtoint i32* %in to i64)) LoopDispositions: { %loop: Computable } +; X64-NEXT: --> {(ptrtoint i32* %in to i64),+,4}<%loop> U: full-set S: full-set Exits: (-4 + (4 * (zext i32 %count to i64)) + (ptrtoint i32* %in to i64)) LoopDispositions: { %loop: Computable } ; X64-NEXT: %i9 = add nuw nsw i64 %i6, 1 ; X64-NEXT: --> {1,+,1}<%loop> U: [1,0) S: [1,0) Exits: (zext i32 %count to i64) LoopDispositions: { %loop: Computable } ; X64-NEXT: Determining loop execution counts for: @ptrtoint_of_addrec @@ -394,7 +394,7 @@ define void @pr46786_c26_char(i8* %arg, i8* %arg1, i8* %arg2) { ; X64-NEXT: %i13 = add i8 %i12, %i8 ; X64-NEXT: --> (%i12 + %i8) U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X64-NEXT: %i14 = getelementptr inbounds i8, i8* %i7, i64 1 -; X64-NEXT: --> {(1 + %arg),+,1}<%bb6> U: full-set S: full-set Exits: %arg1 LoopDispositions: { %bb6: Computable } +; X64-NEXT: --> {(1 + %arg),+,1}<%bb6> U: [1,0) S: [1,0) Exits: %arg1 LoopDispositions: { %bb6: Computable } ; X64-NEXT: Determining loop execution counts for: @pr46786_c26_char ; X64-NEXT: Loop %bb6: backedge-taken count is (-1 + (-1 * %arg) + %arg1) ; X64-NEXT: Loop %bb6: max backedge-taken count is -2 @@ -421,7 +421,7 @@ define void @pr46786_c26_char(i8* %arg, i8* %arg1, i8* %arg2) { ; X32-NEXT: %i13 = add i8 %i12, %i8 ; X32-NEXT: --> (%i12 + %i8) U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X32-NEXT: %i14 = getelementptr inbounds i8, i8* %i7, i64 1 -; X32-NEXT: --> {(1 + %arg),+,1}<%bb6> U: full-set S: full-set Exits: %arg1 LoopDispositions: { %bb6: Computable } +; X32-NEXT: --> {(1 + %arg),+,1}<%bb6> U: [1,0) S: [1,0) Exits: %arg1 LoopDispositions: { %bb6: Computable } ; X32-NEXT: Determining loop execution counts for: @pr46786_c26_char ; X32-NEXT: Loop %bb6: backedge-taken count is (-1 + (-1 * %arg) + %arg1) ; X32-NEXT: Loop %bb6: max backedge-taken count is -2 @@ -475,13 +475,13 @@ define void @pr46786_c26_int(i32* %arg, i32* %arg1, i32* %arg2) { ; X64-NEXT: %i11 = ashr exact i64 %i10, 2 ; X64-NEXT: --> ((({0,+,4}<%bb6> smax {0,+,-4}<%bb6>) /u 4) * (1 smin (-1 smax {0,+,4}<%bb6>))) U: [-4611686018427387903,4611686018427387904) S: [-4611686018427387903,4611686018427387904) Exits: ((((4 * ((-4 + (-1 * %arg) + %arg1) /u 4)) smax (-4 * ((-4 + (-1 * %arg) + %arg1) /u 4))) /u 4) * (1 smin (-1 smax (4 * ((-4 + (-1 * %arg) + %arg1) /u 4))))) LoopDispositions: { %bb6: Computable } ; X64-NEXT: %i12 = getelementptr inbounds i32, i32* %arg2, i64 %i11 -; X64-NEXT: --> ((4 * (({0,+,4}<%bb6> smax {0,+,-4}<%bb6>) /u 4) * (1 smin (-1 smax {0,+,4}<%bb6>))) + %arg2) U: full-set S: full-set Exits: ((4 * (((4 * ((-4 + (-1 * %arg) + %arg1) /u 4)) smax (-4 * ((-4 + (-1 * %arg) + %arg1) /u 4))) /u 4) * (1 smin (-1 smax (4 * ((-4 + (-1 * %arg) + %arg1) /u 4))))) + %arg2) LoopDispositions: { %bb6: Computable } +; X64-NEXT: --> ((4 * (({0,+,4}<%bb6> smax {0,+,-4}<%bb6>) /u 4) * (1 smin (-1 smax {0,+,4}<%bb6>))) + %arg2) U: full-set S: full-set Exits: ((4 * (((4 * ((-4 + (-1 * %arg) + %arg1) /u 4)) smax (-4 * ((-4 + (-1 * %arg) + %arg1) /u 4))) /u 4) * (1 smin (-1 smax (4 * ((-4 + (-1 * %arg) + %arg1) /u 4))))) + %arg2) LoopDispositions: { %bb6: Computable } ; X64-NEXT: %i13 = load i32, i32* %i12, align 4 ; X64-NEXT: --> %i13 U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X64-NEXT: %i14 = add nsw i32 %i13, %i8 ; X64-NEXT: --> (%i13 + %i8) U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X64-NEXT: %i15 = getelementptr inbounds i32, i32* %i7, i64 1 -; X64-NEXT: --> {(4 + %arg),+,4}<%bb6> U: full-set S: full-set Exits: (4 + (4 * ((-4 + (-1 * %arg) + %arg1) /u 4)) + %arg) LoopDispositions: { %bb6: Computable } +; X64-NEXT: --> {(4 + %arg),+,4}<%bb6> U: [4,0) S: [4,0) Exits: (4 + (4 * ((-4 + (-1 * %arg) + %arg1) /u 4)) + %arg) LoopDispositions: { %bb6: Computable } ; X64-NEXT: Determining loop execution counts for: @pr46786_c26_int ; X64-NEXT: Loop %bb6: backedge-taken count is ((-4 + (-1 * %arg) + %arg1) /u 4) ; X64-NEXT: Loop %bb6: max backedge-taken count is 4611686018427387903 @@ -504,13 +504,13 @@ define void @pr46786_c26_int(i32* %arg, i32* %arg1, i32* %arg2) { ; X32-NEXT: %i11 = ashr exact i64 %i10, 2 ; X32-NEXT: --> ({0,+,1}<%bb6> * (1 smin {0,+,4}<%bb6>)) U: [0,1073741824) S: [0,1073741824) Exits: (((zext i32* (-4 + (-1 * %arg) + %arg1) to i64) /u 4) * (1 smin (4 * ((zext i32* (-4 + (-1 * %arg) + %arg1) to i64) /u 4)))) LoopDispositions: { %bb6: Computable } ; X32-NEXT: %i12 = getelementptr inbounds i32, i32* %arg2, i64 %i11 -; X32-NEXT: --> (((trunc i64 (1 smin {0,+,4}<%bb6>) to i32) * {0,+,4}<%bb6>) + %arg2) U: full-set S: full-set Exits: ((4 * (trunc i64 (1 smin (4 * ((zext i32* (-4 + (-1 * %arg) + %arg1) to i64) /u 4))) to i32) * ((-4 + (-1 * %arg) + %arg1) /u 4)) + %arg2) LoopDispositions: { %bb6: Computable } +; X32-NEXT: --> (((trunc i64 (1 smin {0,+,4}<%bb6>) to i32) * {0,+,4}<%bb6>) + %arg2) U: full-set S: full-set Exits: ((4 * (trunc i64 (1 smin (4 * ((zext i32* (-4 + (-1 * %arg) + %arg1) to i64) /u 4))) to i32) * ((-4 + (-1 * %arg) + %arg1) /u 4)) + %arg2) LoopDispositions: { %bb6: Computable } ; X32-NEXT: %i13 = load i32, i32* %i12, align 4 ; X32-NEXT: --> %i13 U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X32-NEXT: %i14 = add nsw i32 %i13, %i8 ; X32-NEXT: --> (%i13 + %i8) U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X32-NEXT: %i15 = getelementptr inbounds i32, i32* %i7, i64 1 -; X32-NEXT: --> {(4 + %arg),+,4}<%bb6> U: full-set S: full-set Exits: (4 + (4 * ((-4 + (-1 * %arg) + %arg1) /u 4)) + %arg) LoopDispositions: { %bb6: Computable } +; X32-NEXT: --> {(4 + %arg),+,4}<%bb6> U: [4,0) S: [4,0) Exits: (4 + (4 * ((-4 + (-1 * %arg) + %arg1) /u 4)) + %arg) LoopDispositions: { %bb6: Computable } ; X32-NEXT: Determining loop execution counts for: @pr46786_c26_int ; X32-NEXT: Loop %bb6: backedge-taken count is ((-4 + (-1 * %arg) + %arg1) /u 4) ; X32-NEXT: Loop %bb6: max backedge-taken count is 1073741823 diff --git a/llvm/test/Analysis/ScalarEvolution/sdiv.ll b/llvm/test/Analysis/ScalarEvolution/sdiv.ll index 89a3e77564ae54..bc919288a566a8 100644 --- a/llvm/test/Analysis/ScalarEvolution/sdiv.ll +++ b/llvm/test/Analysis/ScalarEvolution/sdiv.ll @@ -19,7 +19,7 @@ define dso_local void @_Z4loopi(i32 %width) local_unnamed_addr #0 { ; CHECK-NEXT: %idxprom = sext i32 %rem to i64 ; CHECK-NEXT: --> ({0,+,1}<%for.cond> /u 2) U: [0,2147483648) S: [0,2147483648) Exits: ((zext i32 %width to i64) /u 2) LoopDispositions: { %for.cond: Computable } ; CHECK-NEXT: %arrayidx = getelementptr inbounds [2 x i32], [2 x i32]* %storage, i64 0, i64 %idxprom -; CHECK-NEXT: --> ((4 * ({0,+,1}<%for.cond> /u 2)) + %storage) U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: ((4 * ((zext i32 %width to i64) /u 2)) + %storage) LoopDispositions: { %for.cond: Computable } +; CHECK-NEXT: --> ((4 * ({0,+,1}<%for.cond> /u 2)) + %storage) U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: ((4 * ((zext i32 %width to i64) /u 2)) + %storage) LoopDispositions: { %for.cond: Computable } ; CHECK-NEXT: %1 = load i32, i32* %arrayidx, align 4 ; CHECK-NEXT: --> %1 U: full-set S: full-set Exits: <> LoopDispositions: { %for.cond: Variant } ; CHECK-NEXT: %call = call i32 @_Z3adji(i32 %1) diff --git a/llvm/test/Analysis/ScalarEvolution/srem.ll b/llvm/test/Analysis/ScalarEvolution/srem.ll index 197437b51ca120..089dc2408a30d4 100644 --- a/llvm/test/Analysis/ScalarEvolution/srem.ll +++ b/llvm/test/Analysis/ScalarEvolution/srem.ll @@ -19,7 +19,7 @@ define dso_local void @_Z4loopi(i32 %width) local_unnamed_addr #0 { ; CHECK-NEXT: %idxprom = sext i32 %rem to i64 ; CHECK-NEXT: --> (zext i1 {false,+,true}<%for.cond> to i64) U: [0,2) S: [0,2) Exits: (zext i1 (trunc i32 %width to i1) to i64) LoopDispositions: { %for.cond: Computable } ; CHECK-NEXT: %arrayidx = getelementptr inbounds [2 x i32], [2 x i32]* %storage, i64 0, i64 %idxprom -; CHECK-NEXT: --> ((4 * (zext i1 {false,+,true}<%for.cond> to i64)) + %storage) U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: ((4 * (zext i1 (trunc i32 %width to i1) to i64)) + %storage) LoopDispositions: { %for.cond: Computable } +; CHECK-NEXT: --> ((4 * (zext i1 {false,+,true}<%for.cond> to i64)) + %storage) U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: ((4 * (zext i1 (trunc i32 %width to i1) to i64)) + %storage) LoopDispositions: { %for.cond: Computable } ; CHECK-NEXT: %1 = load i32, i32* %arrayidx, align 4 ; CHECK-NEXT: --> %1 U: full-set S: full-set Exits: <> LoopDispositions: { %for.cond: Variant } ; CHECK-NEXT: %call = call i32 @_Z3adji(i32 %1) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir index fd7620e317cedb..621229d9a8cafe 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir @@ -345,3 +345,95 @@ body: | %select:gpr(s64) = G_SELECT %cond(s1), %t, %f $x0 = COPY %select(s64) RET_ReallyLR implicit $x0 + +... +--- +name: csneg_s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2 + ; G_SELECT cc, true, (G_SUB 0, x) -> CSNEG true, x, cc + + ; CHECK-LABEL: name: csneg_s32 + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: %reg0:gpr32 = COPY $w0 + ; CHECK: %reg1:gpr32 = COPY $w1 + ; CHECK: %t:gpr32 = COPY $w2 + ; CHECK: [[ANDSWri:%[0-9]+]]:gpr32 = ANDSWri %reg0, 0, implicit-def $nzcv + ; CHECK: %select:gpr32 = CSNEGWr %t, %reg1, 1, implicit $nzcv + ; CHECK: $w0 = COPY %select + ; CHECK: RET_ReallyLR implicit $w0 + %reg0:gpr(s32) = COPY $w0 + %cond:gpr(s1) = G_TRUNC %reg0(s32) + %reg1:gpr(s32) = COPY $w1 + %t:gpr(s32) = COPY $w2 + %zero:gpr(s32) = G_CONSTANT i32 0 + %sub:gpr(s32) = G_SUB %zero(s32), %reg1 + %select:gpr(s32) = G_SELECT %cond(s1), %t, %sub + $w0 = COPY %select(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: csneg_s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1, $x2 + ; G_SELECT cc, true, (G_SUB 0, x) -> CSNEG true, x, cc + + ; CHECK-LABEL: name: csneg_s64 + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: %reg0:gpr64 = COPY $x0 + ; CHECK: %cond:gpr32 = COPY %reg0.sub_32 + ; CHECK: %reg1:gpr64 = COPY $x1 + ; CHECK: %t:gpr64 = COPY $x2 + ; CHECK: [[ANDSWri:%[0-9]+]]:gpr32 = ANDSWri %cond, 0, implicit-def $nzcv + ; CHECK: %select:gpr64 = CSNEGXr %t, %reg1, 1, implicit $nzcv + ; CHECK: $x0 = COPY %select + ; CHECK: RET_ReallyLR implicit $x0 + %reg0:gpr(s64) = COPY $x0 + %cond:gpr(s1) = G_TRUNC %reg0(s64) + %reg1:gpr(s64) = COPY $x1 + %t:gpr(s64) = COPY $x2 + %zero:gpr(s64) = G_CONSTANT i64 0 + %sub:gpr(s64) = G_SUB %zero(s64), %reg1 + %select:gpr(s64) = G_SELECT %cond(s1), %t, %sub + $x0 = COPY %select(s64) + RET_ReallyLR implicit $x0 +... +--- +name: csneg_with_true_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2 + ; We should prefer eliminating the G_SUB over eliminating the constant true + ; value. + + ; CHECK-LABEL: name: csneg_with_true_cst + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: %reg0:gpr32 = COPY $w0 + ; CHECK: %t:gpr32 = MOVi32imm 1 + ; CHECK: %reg2:gpr32 = COPY $w2 + ; CHECK: [[ANDSWri:%[0-9]+]]:gpr32 = ANDSWri %reg0, 0, implicit-def $nzcv + ; CHECK: %select:gpr32 = CSNEGWr %t, %reg2, 1, implicit $nzcv + ; CHECK: $w0 = COPY %select + ; CHECK: RET_ReallyLR implicit $w0 + %reg0:gpr(s32) = COPY $w0 + %cond:gpr(s1) = G_TRUNC %reg0(s32) + %reg1:gpr(s32) = COPY $w1 + %t:gpr(s32) = G_CONSTANT i32 1 + %zero:gpr(s32) = G_CONSTANT i32 0 + %reg2:gpr(s32) = COPY $w2 + %sub:gpr(s32) = G_SUB %zero(s32), %reg2 + %select:gpr(s32) = G_SELECT %cond(s1), %t, %sub + $w0 = COPY %select(s32) + RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll b/llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll new file mode 100644 index 00000000000000..a9041d8d978287 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll @@ -0,0 +1,103 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names < %s | FileCheck %s \ +; RUN: --check-prefix=CHECK-BE + +; This test checks that LSR properly recognizes lxvp/stxvp as load/store +; intrinsics to avoid generating x-form instructions instead of d-forms. + +declare <256 x i1> @llvm.ppc.mma.lxvp(i8*) +declare void @llvm.ppc.mma.stxvp(<256 x i1>, i8*) +define void @foo(i32 zeroext %n, <256 x i1>* %ptr, <256 x i1>* %ptr2) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmplwi r3, 0 +; CHECK-NEXT: beqlr cr0 +; CHECK-NEXT: # %bb.1: # %for.body.lr.ph +; CHECK-NEXT: clrldi r6, r3, 32 +; CHECK-NEXT: addi r3, r4, 64 +; CHECK-NEXT: addi r4, r5, 64 +; CHECK-NEXT: mtctr r6 +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_2: # %for.body +; CHECK-NEXT: # +; CHECK-NEXT: lxvp vsp0, -64(r3) +; CHECK-NEXT: lxvp vsp2, -32(r3) +; CHECK-NEXT: lxvp vsp4, 0(r3) +; CHECK-NEXT: lxvp vsp6, 32(r3) +; CHECK-NEXT: addi r3, r3, 1 +; CHECK-NEXT: stxvp vsp0, -64(r4) +; CHECK-NEXT: stxvp vsp2, -32(r4) +; CHECK-NEXT: stxvp vsp4, 0(r4) +; CHECK-NEXT: stxvp vsp6, 32(r4) +; CHECK-NEXT: addi r4, r4, 1 +; CHECK-NEXT: bdnz .LBB0_2 +; CHECK-NEXT: # %bb.3: # %for.cond.cleanup +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: foo: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: cmplwi r3, 0 +; CHECK-BE-NEXT: beqlr cr0 +; CHECK-BE-NEXT: # %bb.1: # %for.body.lr.ph +; CHECK-BE-NEXT: clrldi r6, r3, 32 +; CHECK-BE-NEXT: addi r3, r4, 64 +; CHECK-BE-NEXT: addi r4, r5, 64 +; CHECK-BE-NEXT: mtctr r6 +; CHECK-BE-NEXT: .p2align 4 +; CHECK-BE-NEXT: .LBB0_2: # %for.body +; CHECK-BE-NEXT: # +; CHECK-BE-NEXT: lxvp vsp0, -64(r3) +; CHECK-BE-NEXT: lxvp vsp2, -32(r3) +; CHECK-BE-NEXT: lxvp vsp4, 0(r3) +; CHECK-BE-NEXT: lxvp vsp6, 32(r3) +; CHECK-BE-NEXT: addi r3, r3, 1 +; CHECK-BE-NEXT: stxvp vsp0, -64(r4) +; CHECK-BE-NEXT: stxvp vsp2, -32(r4) +; CHECK-BE-NEXT: stxvp vsp4, 0(r4) +; CHECK-BE-NEXT: stxvp vsp6, 32(r4) +; CHECK-BE-NEXT: addi r4, r4, 1 +; CHECK-BE-NEXT: bdnz .LBB0_2 +; CHECK-BE-NEXT: # %bb.3: # %for.cond.cleanup +; CHECK-BE-NEXT: blr +entry: + %cmp35.not = icmp eq i32 %n, 0 + br i1 %cmp35.not, label %for.cond.cleanup, label %for.body.lr.ph + +for.body.lr.ph: + %0 = bitcast <256 x i1>* %ptr to i8* + %1 = bitcast <256 x i1>* %ptr2 to i8* + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.cond.cleanup: + ret void + +for.body: + %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] + %2 = getelementptr i8, i8* %0, i64 %indvars.iv + %3 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %2) + %add2 = add nuw nsw i64 %indvars.iv, 32 + %4 = getelementptr i8, i8* %0, i64 %add2 + %5 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %4) + %add4 = add nuw nsw i64 %indvars.iv, 64 + %6 = getelementptr i8, i8* %0, i64 %add4 + %7 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %6) + %add6 = add nuw nsw i64 %indvars.iv, 96 + %8 = getelementptr i8, i8* %0, i64 %add6 + %9 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %8) + %10 = getelementptr i8, i8* %1, i64 %indvars.iv + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %3, i8* %10) + %11 = getelementptr i8, i8* %1, i64 %add2 + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %5, i8* %11) + %12 = getelementptr i8, i8* %1, i64 %add4 + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %7, i8* %12) + %13 = getelementptr i8, i8* %1, i64 %add6 + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %9, i8* %13) + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + diff --git a/llvm/test/CodeGen/PowerPC/loop-p10-pair-prepare.ll b/llvm/test/CodeGen/PowerPC/loop-p10-pair-prepare.ll new file mode 100644 index 00000000000000..816a28a61241b9 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/loop-p10-pair-prepare.ll @@ -0,0 +1,114 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs -disable-lsr \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 < %s | FileCheck %s +; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs -disable-lsr \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr10 < %s | FileCheck %s \ +; RUN: --check-prefix=CHECK-BE + +; This test checks the PPCLoopInstrFormPrep pass supports the lxvp and stxvp +; intrinsics so we generate more dq-form instructions instead of x-forms. + +%_elem_type_of_x = type <{ double }> +%_elem_type_of_y = type <{ double }> + +define void @foo(i64* %.n, [0 x %_elem_type_of_x]* %.x, [0 x %_elem_type_of_y]* %.y, <2 x double>* %.sum) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld r5, 0(r3) +; CHECK-NEXT: cmpdi r5, 1 +; CHECK-NEXT: bltlr cr0 +; CHECK-NEXT: # %bb.1: # %_loop_1_do_.lr.ph +; CHECK-NEXT: addi r3, r4, 1 +; CHECK-NEXT: addi r4, r5, -1 +; CHECK-NEXT: lxv vs0, 0(r6) +; CHECK-NEXT: rldicl r4, r4, 60, 4 +; CHECK-NEXT: addi r4, r4, 1 +; CHECK-NEXT: mtctr r4 +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB0_2: # %_loop_1_do_ +; CHECK-NEXT: # +; CHECK-NEXT: lxvp vsp2, 0(r3) +; CHECK-NEXT: lxvp vsp4, 32(r3) +; CHECK-NEXT: addi r3, r3, 128 +; CHECK-NEXT: xvadddp vs0, vs0, vs3 +; CHECK-NEXT: xvadddp vs0, vs0, vs2 +; CHECK-NEXT: xvadddp vs0, vs0, vs5 +; CHECK-NEXT: xvadddp vs0, vs0, vs4 +; CHECK-NEXT: bdnz .LBB0_2 +; CHECK-NEXT: # %bb.3: # %_loop_1_loopHeader_._return_bb_crit_edge +; CHECK-NEXT: stxv vs0, 0(r6) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: foo: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: ld r5, 0(r3) +; CHECK-BE-NEXT: cmpdi r5, 1 +; CHECK-BE-NEXT: bltlr cr0 +; CHECK-BE-NEXT: # %bb.1: # %_loop_1_do_.lr.ph +; CHECK-BE-NEXT: addi r3, r4, 1 +; CHECK-BE-NEXT: addi r4, r5, -1 +; CHECK-BE-NEXT: lxv vs0, 0(r6) +; CHECK-BE-NEXT: rldicl r4, r4, 60, 4 +; CHECK-BE-NEXT: addi r4, r4, 1 +; CHECK-BE-NEXT: mtctr r4 +; CHECK-BE-NEXT: .p2align 5 +; CHECK-BE-NEXT: .LBB0_2: # %_loop_1_do_ +; CHECK-BE-NEXT: # +; CHECK-BE-NEXT: lxvp vsp2, 0(r3) +; CHECK-BE-NEXT: lxvp vsp4, 32(r3) +; CHECK-BE-NEXT: addi r3, r3, 128 +; CHECK-BE-NEXT: xvadddp vs0, vs0, vs2 +; CHECK-BE-NEXT: xvadddp vs0, vs0, vs3 +; CHECK-BE-NEXT: xvadddp vs0, vs0, vs4 +; CHECK-BE-NEXT: xvadddp vs0, vs0, vs5 +; CHECK-BE-NEXT: bdnz .LBB0_2 +; CHECK-BE-NEXT: # %bb.3: # %_loop_1_loopHeader_._return_bb_crit_edge +; CHECK-BE-NEXT: stxv vs0, 0(r6) +; CHECK-BE-NEXT: blr +entry: + %_val_n_2 = load i64, i64* %.n, align 8 + %_grt_tmp7 = icmp slt i64 %_val_n_2, 1 + br i1 %_grt_tmp7, label %_return_bb, label %_loop_1_do_.lr.ph + +_loop_1_do_.lr.ph: ; preds = %entry + %x_rvo_based_addr_5 = getelementptr inbounds [0 x %_elem_type_of_x], [0 x %_elem_type_of_x]* %.x, i64 0, i64 -1 + %.sum.promoted = load <2 x double>, <2 x double>* %.sum, align 16 + br label %_loop_1_do_ + +_loop_1_do_: ; preds = %_loop_1_do_.lr.ph, %_loop_1_do_ + %_val_sum_9 = phi <2 x double> [ %.sum.promoted, %_loop_1_do_.lr.ph ], [ %_add_tmp49, %_loop_1_do_ ] + %i.08 = phi i64 [ 1, %_loop_1_do_.lr.ph ], [ %_loop_1_update_loop_ix, %_loop_1_do_ ] + %x_ix_dim_0_6 = getelementptr %_elem_type_of_x, %_elem_type_of_x* %x_rvo_based_addr_5, i64 %i.08 + %x_ix_dim_0_ = bitcast %_elem_type_of_x* %x_ix_dim_0_6 to i8* + %0 = getelementptr i8, i8* %x_ix_dim_0_, i64 1 + %1 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %0) + %2 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %1) + %.fca.0.extract1 = extractvalue { <16 x i8>, <16 x i8> } %2, 0 + %.fca.1.extract2 = extractvalue { <16 x i8>, <16 x i8> } %2, 1 + %3 = getelementptr i8, i8* %x_ix_dim_0_, i64 33 + %4 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %3) + %5 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %4) + %.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %5, 0 + %.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %5, 1 + %6 = bitcast <16 x i8> %.fca.0.extract1 to <2 x double> + %_add_tmp23 = fadd contract <2 x double> %_val_sum_9, %6 + %7 = bitcast <16 x i8> %.fca.1.extract2 to <2 x double> + %_add_tmp32 = fadd contract <2 x double> %_add_tmp23, %7 + %8 = bitcast <16 x i8> %.fca.0.extract to <2 x double> + %_add_tmp40 = fadd contract <2 x double> %_add_tmp32, %8 + %9 = bitcast <16 x i8> %.fca.1.extract to <2 x double> + %_add_tmp49 = fadd contract <2 x double> %_add_tmp40, %9 + %_loop_1_update_loop_ix = add nuw nsw i64 %i.08, 16 + %_grt_tmp = icmp sgt i64 %_loop_1_update_loop_ix, %_val_n_2 + br i1 %_grt_tmp, label %_loop_1_loopHeader_._return_bb_crit_edge, label %_loop_1_do_ + +_loop_1_loopHeader_._return_bb_crit_edge: ; preds = %_loop_1_do_ + store <2 x double> %_add_tmp49, <2 x double>* %.sum, align 16 + br label %_return_bb + +_return_bb: ; preds = %_loop_1_loopHeader_._return_bb_crit_edge, %entry + ret void +} + +declare <256 x i1> @llvm.ppc.mma.lxvp(i8*) +declare { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1>) diff --git a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll index a0f855200b682e..0eb633ab3f2c9c 100644 --- a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll @@ -698,3 +698,315 @@ entry: declare <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1>, <256 x i1>, <16 x i8>, i32, i32) declare <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1>, <256 x i1>, <16 x i8>) + +; Function Attrs: nounwind +define void @test_ldst_1(<256 x i1>* %vpp, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvp vsp0, 0(r3) +; CHECK-NEXT: stxvp vsp0, 0(r4) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_ldst_1: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvp vsp0, 0(r3) +; CHECK-BE-NEXT: stxvp vsp0, 0(r4) +; CHECK-BE-NEXT: blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %0) + %2 = bitcast <256 x i1>* %vp2 to i8* + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %1, i8* %2) + ret void +} + +; Function Attrs: argmemonly nounwind readonly +declare <256 x i1> @llvm.ppc.mma.lxvp(i8*) + +; Function Attrs: argmemonly nounwind writeonly +declare void @llvm.ppc.mma.stxvp(<256 x i1>, i8*) + +; Function Attrs: nounwind +define void @test_ldst_2(<256 x i1>* %vpp, i64 %offset, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvpx vsp0, r3, r4 +; CHECK-NEXT: stxvpx vsp0, r5, r4 +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_ldst_2: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvpx vsp0, r3, r4 +; CHECK-BE-NEXT: stxvpx vsp0, r5, r4 +; CHECK-BE-NEXT: blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = getelementptr i8, i8* %0, i64 %offset + %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1) + %3 = bitcast <256 x i1>* %vp2 to i8* + %4 = getelementptr i8, i8* %3, i64 %offset + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4) + ret void +} + +; Function Attrs: nounwind +define void @test_ldst_3(<256 x i1>* %vpp, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li r5, 18 +; CHECK-NEXT: lxvpx vsp0, r3, r5 +; CHECK-NEXT: stxvpx vsp0, r4, r5 +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_ldst_3: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: li r5, 18 +; CHECK-BE-NEXT: lxvpx vsp0, r3, r5 +; CHECK-BE-NEXT: stxvpx vsp0, r4, r5 +; CHECK-BE-NEXT: blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = getelementptr i8, i8* %0, i64 18 + %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1) + %3 = bitcast <256 x i1>* %vp2 to i8* + %4 = getelementptr i8, i8* %3, i64 18 + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4) + ret void +} + +; Function Attrs: nounwind +define void @test_ldst_4(<256 x i1>* %vpp, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li r5, 1 +; CHECK-NEXT: lxvpx vsp0, r3, r5 +; CHECK-NEXT: stxvpx vsp0, r4, r5 +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_ldst_4: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: li r5, 1 +; CHECK-BE-NEXT: lxvpx vsp0, r3, r5 +; CHECK-BE-NEXT: stxvpx vsp0, r4, r5 +; CHECK-BE-NEXT: blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = getelementptr i8, i8* %0, i64 1 + %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1) + %3 = bitcast <256 x i1>* %vp2 to i8* + %4 = getelementptr i8, i8* %3, i64 1 + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4) + ret void +} + +; Function Attrs: nounwind +define void @test_ldst_5(<256 x i1>* %vpp, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li r5, 42 +; CHECK-NEXT: lxvpx vsp0, r3, r5 +; CHECK-NEXT: stxvpx vsp0, r4, r5 +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_ldst_5: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: li r5, 42 +; CHECK-BE-NEXT: lxvpx vsp0, r3, r5 +; CHECK-BE-NEXT: stxvpx vsp0, r4, r5 +; CHECK-BE-NEXT: blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = getelementptr i8, i8* %0, i64 42 + %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1) + %3 = bitcast <256 x i1>* %vp2 to i8* + %4 = getelementptr i8, i8* %3, i64 42 + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4) + ret void +} + +; Function Attrs: nounwind +define void @test_ldst_6(<256 x i1>* %vpp, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_6: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvp vsp0, 4096(r3) +; CHECK-NEXT: stxvp vsp0, 4096(r4) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_ldst_6: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvp vsp0, 4096(r3) +; CHECK-BE-NEXT: stxvp vsp0, 4096(r4) +; CHECK-BE-NEXT: blr +entry: + %0 = getelementptr <256 x i1>, <256 x i1>* %vpp, i64 128 + %1 = bitcast <256 x i1>* %0 to i8* + %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1) + %3 = getelementptr <256 x i1>, <256 x i1>* %vp2, i64 128 + %4 = bitcast <256 x i1>* %3 to i8* + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4) + ret void +} + +; Function Attrs: nounwind +define void @test_ldst_7(<256 x i1>* %vpp, <256 x i1>* %vp2) { +; FIXME: A prefixed load (plxvp) is expected here as the offset in this +; test case is a constant that fits within 34-bits. +; CHECK-LABEL: test_ldst_7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li r5, 0 +; CHECK-NEXT: ori r5, r5, 32799 +; CHECK-NEXT: lxvpx vsp0, r3, r5 +; CHECK-NEXT: stxvpx vsp0, r4, r5 +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_ldst_7: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: li r5, 0 +; CHECK-BE-NEXT: ori r5, r5, 32799 +; CHECK-BE-NEXT: lxvpx vsp0, r3, r5 +; CHECK-BE-NEXT: stxvpx vsp0, r4, r5 +; CHECK-BE-NEXT: blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = getelementptr i8, i8* %0, i64 32799 + %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1) + %3 = bitcast <256 x i1>* %vp2 to i8* + %4 = getelementptr i8, i8* %3, i64 32799 + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4) + ret void +} + +; Function Attrs: nofree nounwind +define void @test_ldst_8(i8* nocapture readonly %vqp, <256 x i1>* %vpp, <16 x i8> %vc, i8* nocapture %resp) { +; CHECK-LABEL: test_ldst_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: li r3, 8 +; CHECK-NEXT: lxvpx vsp4, r4, r3 +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: pmxvf64gernn acc0, vsp4, v2, 0, 0 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_ldst_8: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: li r3, 8 +; CHECK-BE-NEXT: lxvpx vsp4, r4, r3 +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: pmxvf64gernn acc0, vsp4, v2, 0, 0 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: blr +entry: + %0 = bitcast i8* %vqp to <512 x i1>* + %1 = load <512 x i1>, <512 x i1>* %0, align 64 + %2 = bitcast <256 x i1>* %vpp to i8* + %3 = getelementptr i8, i8* %2, i64 8 + %4 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %3) + %5 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> %1, <256 x i1> %4, <16 x i8> %vc, i32 0, i32 0) + %6 = bitcast i8* %resp to <512 x i1>* + store <512 x i1> %5, <512 x i1>* %6, align 64 + ret void +} + +; Function Attrs: nofree nounwind +define void @test_ldst_9(i8* nocapture readonly %vqp, <256 x i1>* %vpp, <16 x i8> %vc, i8* nocapture %resp) { +; CHECK-LABEL: test_ldst_9: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxvp vsp4, 0(r4) +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: xvf64gernp acc0, vsp4, v2 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_ldst_9: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxvp vsp4, 0(r4) +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: xvf64gernp acc0, vsp4, v2 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: blr +entry: + %0 = bitcast i8* %vqp to <512 x i1>* + %1 = load <512 x i1>, <512 x i1>* %0, align 64 + %2 = bitcast <256 x i1>* %vpp to i8* + %3 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %2) + %4 = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> %1, <256 x i1> %3, <16 x i8> %vc) + %5 = bitcast i8* %resp to <512 x i1>* + store <512 x i1> %4, <512 x i1>* %5, align 64 + ret void +} + +; Function Attrs: nofree nounwind +define void @test_ldst_10(i8* nocapture readonly %vqp, i64 %offs, <256 x i1>* %vpp, <16 x i8> %vc, i8* nocapture %resp) { +; CHECK-LABEL: test_ldst_10: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxvp vsp4, 0(r5) +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: xvf64gernp acc0, vsp4, v2 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r9) +; CHECK-NEXT: stxv vs1, 32(r9) +; CHECK-NEXT: stxv vs2, 16(r9) +; CHECK-NEXT: stxv vs3, 0(r9) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_ldst_10: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxvp vsp4, 0(r5) +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: xvf64gernp acc0, vsp4, v2 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r9) +; CHECK-BE-NEXT: stxv vs0, 0(r9) +; CHECK-BE-NEXT: stxv vs3, 48(r9) +; CHECK-BE-NEXT: stxv vs2, 32(r9) +; CHECK-BE-NEXT: blr +entry: + %0 = bitcast i8* %vqp to <512 x i1>* + %1 = load <512 x i1>, <512 x i1>* %0, align 64 + %2 = bitcast <256 x i1>* %vpp to i8* + %3 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %2) + %4 = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> %1, <256 x i1> %3, <16 x i8> %vc) + %5 = bitcast i8* %resp to <512 x i1>* + store <512 x i1> %4, <512 x i1>* %5, align 64 + ret void +} diff --git a/llvm/test/MC/WebAssembly/debug-info64.ll b/llvm/test/MC/WebAssembly/debug-info64.ll new file mode 100644 index 00000000000000..48f46ee10694f5 --- /dev/null +++ b/llvm/test/MC/WebAssembly/debug-info64.ll @@ -0,0 +1,289 @@ +; RUN: llc -filetype=obj %s -o - | llvm-readobj -r -S --symbols - | FileCheck %s + +; CHECK: Format: WASM +; CHECK-NEXT: Arch: wasm64 +; CHECK-NEXT: AddressSize: 64bit +; CHECK-NEXT: Sections [ +; CHECK-NEXT: Section { +; CHECK-NEXT: Type: TYPE (0x1) +; CHECK-NEXT: Size: 4 +; CHECK-NEXT: Offset: 8 +; CHECK-NEXT: } +; CHECK-NEXT: Section { +; CHECK-NEXT: Type: IMPORT (0x2) +; CHECK-NEXT: Size: 81 +; CHECK-NEXT: Offset: 18 +; CHECK-NEXT: } +; CHECK-NEXT: Section { +; CHECK-NEXT: Type: FUNCTION (0x3) +; CHECK-NEXT: Size: 2 +; CHECK-NEXT: Offset: 105 +; CHECK-NEXT: } +; CHECK-NEXT: Section { +; CHECK-NEXT: Type: ELEM (0x9) +; CHECK-NEXT: Size: 7 +; CHECK-NEXT: Offset: 113 +; CHECK-NEXT: } +; CHECK-NEXT: Section { +; CHECK-NEXT: Type: DATACOUNT (0xC) +; CHECK-NEXT: Size: 1 +; CHECK-NEXT: Offset: 126 +; CHECK-NEXT: } +; CHECK-NEXT: Section { +; CHECK-NEXT: Type: CODE (0xA) +; CHECK-NEXT: Size: 4 +; CHECK-NEXT: Offset: 133 +; CHECK-NEXT: } +; CHECK-NEXT: Section { +; CHECK-NEXT: Type: DATA (0xB) +; CHECK-NEXT: Size: 27 +; CHECK-NEXT: Offset: 143 +; CHECK-NEXT: Segments [ +; CHECK-NEXT: Segment { +; CHECK-NEXT: Name: .data.foo +; CHECK-NEXT: Size: 8 +; CHECK-NEXT: Offset: 0 +; CHECK-NEXT: } +; CHECK-NEXT: Segment { +; CHECK-NEXT: Name: .data.ptr2 +; CHECK-NEXT: Size: 8 +; CHECK-NEXT: Offset: 8 +; CHECK-NEXT: } +; CHECK-NEXT: ] +; CHECK-NEXT: } +; CHECK-NEXT: Section { +; CHECK-NEXT: Type: CUSTOM (0x0) +; CHECK-NEXT: Size: 86 +; CHECK-NEXT: Offset: 176 +; CHECK-NEXT: Name: .debug_abbrev +; CHECK-NEXT: } +; CHECK-NEXT: Section { +; CHECK-NEXT: Type: CUSTOM (0x0) +; CHECK-NEXT: Size: 130 +; CHECK-NEXT: Offset: 282 +; CHECK-NEXT: Name: .debug_info +; CHECK-NEXT: } +; CHECK-NEXT: Section { +; CHECK-NEXT: Type: CUSTOM (0x0) +; CHECK-NEXT: Size: 121 +; CHECK-NEXT: Offset: 430 +; CHECK-NEXT: Name: .debug_str +; CHECK-NEXT: } +; CHECK-NEXT: Section { +; CHECK-NEXT: Type: CUSTOM (0x0) +; CHECK-NEXT: Size: 42 +; CHECK-NEXT: Offset: 568 +; CHECK-NEXT: Name: .debug_pubnames +; CHECK-NEXT: } +; CHECK-NEXT: Section { +; CHECK-NEXT: Type: CUSTOM (0x0) +; CHECK-NEXT: Size: 26 +; CHECK-NEXT: Offset: 632 +; CHECK-NEXT: Name: .debug_pubtypes +; CHECK-NEXT: } +; CHECK-NEXT: Section { +; CHECK-NEXT: Type: CUSTOM (0x0) +; CHECK-NEXT: Size: 61 +; CHECK-NEXT: Offset: 680 +; CHECK-NEXT: Name: .debug_line +; CHECK-NEXT: } +; CHECK-NEXT: Section { +; CHECK-NEXT: Type: CUSTOM (0x0) +; CHECK-NEXT: Size: 91 +; CHECK-NEXT: Offset: 759 +; CHECK-NEXT: Name: linking +; CHECK-NEXT: } +; CHECK-NEXT: Section { +; CHECK-NEXT: Type: CUSTOM (0x0) +; CHECK-NEXT: Size: 9 +; CHECK-NEXT: Offset: 864 +; CHECK-NEXT: Name: reloc.DATA +; CHECK-NEXT: } +; CHECK-NEXT: Section { +; CHECK-NEXT: Type: CUSTOM (0x0) +; CHECK-NEXT: Size: 61 +; CHECK-NEXT: Offset: 890 +; CHECK-NEXT: Name: reloc..debug_info +; CHECK-NEXT: } +; CHECK-NEXT: Section { +; CHECK-NEXT: Type: CUSTOM (0x0) +; CHECK-NEXT: Size: 6 +; CHECK-NEXT: Offset: 975 +; CHECK-NEXT: Name: reloc..debug_pubnames +; CHECK-NEXT: } +; CHECK-NEXT: Section { +; CHECK-NEXT: Type: CUSTOM (0x0) +; CHECK-NEXT: Size: 6 +; CHECK-NEXT: Offset: 1009 +; CHECK-NEXT: Name: reloc..debug_pubtypes +; CHECK-NEXT: } +; CHECK-NEXT: Section { +; CHECK-NEXT: Type: CUSTOM (0x0) +; CHECK-NEXT: Size: 6 +; CHECK-NEXT: Offset: 1043 +; CHECK-NEXT: Name: reloc..debug_line +; CHECK-NEXT: } +; CHECK-NEXT: Section { +; CHECK-NEXT: Type: CUSTOM (0x0) +; CHECK-NEXT: Size: 77 +; CHECK-NEXT: Offset: 1073 +; CHECK-NEXT: Name: producers +; CHECK-NEXT: } +; CHECK-NEXT: ] +; CHECK-NEXT: Relocations [ +; CHECK-NEXT: Section (7) DATA { +; CHECK-NEXT: 0x6 R_WASM_MEMORY_ADDR_I64 myextern 0 +; CHECK-NEXT: 0x13 R_WASM_TABLE_INDEX_I64 f2 +; CHECK-NEXT: } +; CHECK-NEXT: Section (9) .debug_info { +; CHECK-NEXT: 0x6 R_WASM_SECTION_OFFSET_I32 .debug_abbrev 0 +; CHECK-NEXT: 0xC R_WASM_SECTION_OFFSET_I32 .debug_str 0 +; CHECK-NEXT: 0x12 R_WASM_SECTION_OFFSET_I32 .debug_str 55 +; CHECK-NEXT: 0x16 R_WASM_SECTION_OFFSET_I32 .debug_line 0 +; CHECK-NEXT: 0x1A R_WASM_SECTION_OFFSET_I32 .debug_str 62 +; CHECK-NEXT: 0x1E R_WASM_FUNCTION_OFFSET_I64 f2 0 +; CHECK-NEXT: 0x2B R_WASM_SECTION_OFFSET_I32 .debug_str 105 +; CHECK-NEXT: 0x37 R_WASM_MEMORY_ADDR_I64 foo 0 +; CHECK-NEXT: 0x45 R_WASM_SECTION_OFFSET_I32 .debug_str 109 +; CHECK-NEXT: 0x4C R_WASM_SECTION_OFFSET_I32 .debug_str 113 +; CHECK-NEXT: 0x58 R_WASM_MEMORY_ADDR_I64 ptr2 0 +; CHECK-NEXT: 0x67 R_WASM_FUNCTION_OFFSET_I64 f2 0 +; CHECK-NEXT: 0x76 R_WASM_GLOBAL_INDEX_I32 __stack_pointer +; CHECK-NEXT: 0x7B R_WASM_SECTION_OFFSET_I32 .debug_str 118 +; CHECK-NEXT: } +; CHECK-NEXT: Section (11) .debug_pubnames { +; CHECK-NEXT: 0x6 R_WASM_SECTION_OFFSET_I32 .debug_info 0 +; CHECK-NEXT: } +; CHECK-NEXT: Section (12) .debug_pubtypes { +; CHECK-NEXT: 0x6 R_WASM_SECTION_OFFSET_I32 .debug_info 0 +; CHECK-NEXT: } +; CHECK-NEXT: Section (13) .debug_line { +; CHECK-NEXT: 0x2B R_WASM_FUNCTION_OFFSET_I64 f2 0 +; CHECK-NEXT: } +; CHECK-NEXT: ] +; CHECK-NEXT: Symbols [ +; CHECK-NEXT: Symbol { +; CHECK-NEXT: Name: f2 +; CHECK-NEXT: Type: FUNCTION (0x0) +; CHECK-NEXT: Flags [ (0x4) +; CHECK-NEXT: VISIBILITY_HIDDEN (0x4) +; CHECK-NEXT: ] +; CHECK-NEXT: ElementIndex: 0x0 +; CHECK-NEXT: } +; CHECK-NEXT: Symbol { +; CHECK-NEXT: Name: foo +; CHECK-NEXT: Type: DATA (0x1) +; CHECK-NEXT: Flags [ (0x4) +; CHECK-NEXT: VISIBILITY_HIDDEN (0x4) +; CHECK-NEXT: ] +; CHECK-NEXT: Offset: 0x0 +; CHECK-NEXT: Segment: 0x0 +; CHECK-NEXT: Size: 0x8 +; CHECK-NEXT: } +; CHECK-NEXT: Symbol { +; CHECK-NEXT: Name: myextern +; CHECK-NEXT: Type: DATA (0x1) +; CHECK-NEXT: Flags [ (0x10) +; CHECK-NEXT: UNDEFINED (0x10) +; CHECK-NEXT: ] +; CHECK-NEXT: } +; CHECK-NEXT: Symbol { +; CHECK-NEXT: Name: ptr2 +; CHECK-NEXT: Type: DATA (0x1) +; CHECK-NEXT: Flags [ (0x4) +; CHECK-NEXT: VISIBILITY_HIDDEN (0x4) +; CHECK-NEXT: ] +; CHECK-NEXT: Offset: 0x0 +; CHECK-NEXT: Segment: 0x1 +; CHECK-NEXT: Size: 0x8 +; CHECK-NEXT: } +; CHECK-NEXT: Symbol { +; CHECK-NEXT: Name: .debug_abbrev +; CHECK-NEXT: Type: SECTION (0x3) +; CHECK-NEXT: Flags [ (0x2) +; CHECK-NEXT: BINDING_LOCAL (0x2) +; CHECK-NEXT: ] +; CHECK-NEXT: ElementIndex: 0x7 +; CHECK-NEXT: } +; CHECK-NEXT: Symbol { +; CHECK-NEXT: Name: .debug_info +; CHECK-NEXT: Type: SECTION (0x3) +; CHECK-NEXT: Flags [ (0x2) +; CHECK-NEXT: BINDING_LOCAL (0x2) +; CHECK-NEXT: ] +; CHECK-NEXT: ElementIndex: 0x8 +; CHECK-NEXT: } +; CHECK-NEXT: Symbol { +; CHECK-NEXT: Name: __stack_pointer +; CHECK-NEXT: Type: GLOBAL (0x2) +; CHECK-NEXT: Flags [ (0x10) +; CHECK-NEXT: UNDEFINED (0x10) +; CHECK-NEXT: ] +; CHECK-NEXT: ImportName: __stack_pointer +; CHECK-NEXT: ImportModule: env +; CHECK-NEXT: ElementIndex: 0x0 +; CHECK-NEXT: } +; CHECK-NEXT: Symbol { +; CHECK-NEXT: Name: .debug_str +; CHECK-NEXT: Type: SECTION (0x3) +; CHECK-NEXT: Flags [ (0x2) +; CHECK-NEXT: BINDING_LOCAL (0x2) +; CHECK-NEXT: ] +; CHECK-NEXT: ElementIndex: 0x9 +; CHECK-NEXT: } +; CHECK-NEXT: Symbol { +; CHECK-NEXT: Name: .debug_line +; CHECK-NEXT: Type: SECTION (0x3) +; CHECK-NEXT: Flags [ (0x2) +; CHECK-NEXT: BINDING_LOCAL (0x2) +; CHECK-NEXT: ] +; CHECK-NEXT: ElementIndex: 0xC +; CHECK-NEXT: } +; CHECK-NEXT: ] + +; generated from the following C code using: clang --target=wasm64 -g -O0 -S -emit-llvm test.c +; extern int myextern; +; void f2(void) { return; } +; +; int* foo = &myextern; +; void (*ptr2)(void) = f2; + +target triple = "wasm64-unknown-unknown" + +source_filename = "test.c" + +@myextern = external global i32, align 4 +@foo = hidden global i32* @myextern, align 4, !dbg !0 +@ptr2 = hidden global void ()* @f2, align 4, !dbg !6 + +; Function Attrs: noinline nounwind optnone +define hidden void @f2() #0 !dbg !17 { +entry: + ret void, !dbg !18 +} + +attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!13, !14, !15} +!llvm.ident = !{!16} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "foo", scope: !2, file: !3, line: 4, type: !11, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 7.0.0 (trunk 332303) (llvm/trunk 332406)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5) +!3 = !DIFile(filename: "test.c", directory: "/usr/local/google/home/sbc/dev/wasm/simple") +!4 = !{} +!5 = !{!0, !6} +!6 = !DIGlobalVariableExpression(var: !7, expr: !DIExpression()) +!7 = distinct !DIGlobalVariable(name: "ptr2", scope: !2, file: !3, line: 5, type: !8, isLocal: false, isDefinition: true) +!8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 32) +!9 = !DISubroutineType(types: !10) +!10 = !{null} +!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 32) +!12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!13 = !{i32 2, !"Dwarf Version", i32 4} +!14 = !{i32 2, !"Debug Info Version", i32 3} +!15 = !{i32 1, !"wchar_size", i32 4} +!16 = !{!"clang version 7.0.0 (trunk 332303) (llvm/trunk 332406)"} +!17 = distinct !DISubprogram(name: "f2", scope: !3, file: !3, line: 2, type: !9, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, unit: !2, retainedNodes: !4) +!18 = !DILocation(line: 2, column: 17, scope: !17) diff --git a/llvm/test/MC/WebAssembly/dwarfdump64.ll b/llvm/test/MC/WebAssembly/dwarfdump64.ll index 0166858c913425..ff711ed5398822 100644 --- a/llvm/test/MC/WebAssembly/dwarfdump64.ll +++ b/llvm/test/MC/WebAssembly/dwarfdump64.ll @@ -2,7 +2,7 @@ ; RUN: llc -filetype=obj %s -o - | llvm-dwarfdump --show-form - | FileCheck %s ; CHECK: .debug_info contents: -; CHECK-NEXT: 0x00000000: Compile Unit: length = 0x0000006e, format = DWARF32, version = 0x0004, abbr_offset = 0x0000, addr_size = 0x04 (next unit at 0x00000072) +; CHECK-NEXT: 0x00000000: Compile Unit: length = 0x0000007e, format = DWARF32, version = 0x0004, abbr_offset = 0x0000, addr_size = 0x08 (next unit at 0x00000082) ; CHECK: 0x0000000b: DW_TAG_compile_unit ; CHECK-NEXT: DW_AT_producer [DW_FORM_strp] ("clang version 6.0.0 (trunk 315924) (llvm/trunk 315960)") @@ -11,46 +11,41 @@ ; CHECK-NEXT: DW_AT_stmt_list [DW_FORM_sec_offset] (0x00000000) ; CHECK-NEXT: DW_AT_comp_dir [DW_FORM_strp] ("/usr/local/google/home/sbc/dev/wasm/simple") ; CHECK-NEXT: DW_AT_GNU_pubnames [DW_FORM_flag_present] (true) -; CHECK-NEXT: DW_AT_low_pc [DW_FORM_addr] (0x00000002) +; CHECK-NEXT: DW_AT_low_pc [DW_FORM_addr] (0x0000000000000002) ; CHECK-NEXT: DW_AT_high_pc [DW_FORM_data4] (0x00000002) -; CHECK: 0x00000026: DW_TAG_variable +; CHECK: 0x0000002a: DW_TAG_variable ; CHECK-NEXT: DW_AT_name [DW_FORM_strp] ("foo") -; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (0x00000037 "int*") +; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (0x0000003f "int*") ; CHECK-NEXT: DW_AT_external [DW_FORM_flag_present] (true) ; CHECK-NEXT: DW_AT_decl_file [DW_FORM_data1] ("/usr/local/google/home/sbc/dev/wasm/simple{{[/\\]}}test.c") ; CHECK-NEXT: DW_AT_decl_line [DW_FORM_data1] (4) ; CHECK-NEXT: DW_AT_location [DW_FORM_exprloc] (DW_OP_addr 0x0) -; CHECK: 0x00000037: DW_TAG_pointer_type -; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (0x0000003c "int") +; CHECK: 0x0000003f: DW_TAG_pointer_type +; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (0x00000044 "int") -; CHECK: 0x0000003c: DW_TAG_base_type +; CHECK: 0x00000044: DW_TAG_base_type ; CHECK-NEXT: DW_AT_name [DW_FORM_strp] ("int") ; CHECK-NEXT: DW_AT_encoding [DW_FORM_data1] (DW_ATE_signed) ; CHECK-NEXT: DW_AT_byte_size [DW_FORM_data1] (0x04) -; CHECK: 0x00000043: DW_TAG_variable +; CHECK: 0x0000004b: DW_TAG_variable ; CHECK-NEXT: DW_AT_name [DW_FORM_strp] ("ptr2") -; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (0x00000054 "void()*") +; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (0x00000060 "void()*") ; CHECK-NEXT: DW_AT_external [DW_FORM_flag_present] (true) ; CHECK-NEXT: DW_AT_decl_file [DW_FORM_data1] ("/usr/local/google/home/sbc/dev/wasm/simple{{[/\\]}}test.c") ; CHECK-NEXT: DW_AT_decl_line [DW_FORM_data1] (5) - - - -; TODO: is this correct? - ; CHECK-NEXT: DW_AT_location [DW_FORM_exprloc] (DW_OP_addr 0x8) -; CHECK: 0x00000054: DW_TAG_pointer_type -; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (0x00000059 "void()") +; CHECK: 0x00000060: DW_TAG_pointer_type +; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (0x00000065 "void()") -; CHECK: 0x00000059: DW_TAG_subroutine_type -; CHECK-NEXT: DW_AT_prototyped [DW_FORM_flag_present] (true) +; CHECK: 0x00000065: DW_TAG_subroutine_type +; CHECK-NEXT: DW_AT_prototyped [DW_FORM_flag_present] (true) -; CHECK: 0x0000005a: DW_TAG_subprogram -; CHECK-NEXT: DW_AT_low_pc [DW_FORM_addr] (0x00000002) +; CHECK: 0x00000066: DW_TAG_subprogram +; CHECK-NEXT: DW_AT_low_pc [DW_FORM_addr] (0x0000000000000002) ; CHECK-NEXT: DW_AT_high_pc [DW_FORM_data4] (0x00000002) ; CHECK-NEXT: DW_AT_frame_base [DW_FORM_exprloc] (DW_OP_WASM_location 0x3 0x0, DW_OP_stack_value) ; CHECK-NEXT: DW_AT_name [DW_FORM_strp] ("f2") @@ -59,7 +54,7 @@ ; CHECK-NEXT: DW_AT_prototyped [DW_FORM_flag_present] (true) ; CHECK-NEXT: DW_AT_external [DW_FORM_flag_present] (true) -; CHECK: 0x00000071: NULL +; CHECK: 0x00000081: NULL target triple = "wasm64-unknown-unknown" diff --git a/llvm/test/Transforms/FunctionImport/Inputs/cg_profile.ll b/llvm/test/Transforms/FunctionImport/Inputs/cg_profile.ll new file mode 100644 index 00000000000000..5143575e1712b7 --- /dev/null +++ b/llvm/test/Transforms/FunctionImport/Inputs/cg_profile.ll @@ -0,0 +1,12 @@ +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%class.A = type { [16 x i8] } + +define void @bar(%class.A*) { + ret void +} + +!llvm.module.flags = !{!1} + +!1 = !{i32 1, !"EnableSplitLTOUnit", i32 0} diff --git a/llvm/test/Transforms/FunctionImport/cg_profile.ll b/llvm/test/Transforms/FunctionImport/cg_profile.ll new file mode 100644 index 00000000000000..2cf920c2a5ddcc --- /dev/null +++ b/llvm/test/Transforms/FunctionImport/cg_profile.ll @@ -0,0 +1,32 @@ +; Check that bitcast in "CG Profile" related metadata nodes (in this test case, +; generated during function importing in IRMover's RAUW operations) are accepted +; by verifier. +; RUN: opt -cg-profile -module-summary %s -o %t.bc +; RUN: opt -module-summary %p/Inputs/cg_profile.ll -o %t2.bc +; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc +; RUN: opt -function-import -print-imports -summary-file %t3.thinlto.bc %t.bc \ +; RUN: -S 2>&1 | FileCheck %s + +; CHECK: !0 = !{i32 1, !"EnableSplitLTOUnit", i32 0} +; CHECK-NEXT: !1 = !{i32 5, !"CG Profile", !2} +; CHECK-NEXT: !2 = !{!3} +; CHECK-NEXT: !3 = !{void ()* @foo, void (%class.A*)* bitcast (void (%class.A.0*)* @bar to void (%class.A*)*), i64 2753} + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; %class.A is defined differently in %p/Inputs/cg_profile.ll. This is to trigger +; bitcast. +%class.A = type { i8 } + +define void @foo() !prof !2 { + call void @bar(%class.A* null) + ret void +} + +declare void @bar(%class.A*) + +!llvm.module.flags = !{!1} + +!1 = !{i32 1, !"EnableSplitLTOUnit", i32 0} +!2 = !{!"function_entry_count", i64 2753} diff --git a/llvm/test/Transforms/InstCombine/sub-gep.ll b/llvm/test/Transforms/InstCombine/sub-gep.ll index ce9657433bb785..9868ed1cdf5758 100644 --- a/llvm/test/Transforms/InstCombine/sub-gep.ll +++ b/llvm/test/Transforms/InstCombine/sub-gep.ll @@ -143,8 +143,8 @@ define i64 @test_inbounds_nuw_multi_index([0 x [2 x i32]]* %base, i64 %idx, i64 ; CHECK-LABEL: @test_inbounds_nuw_multi_index( ; CHECK-NEXT: [[P2_IDX:%.*]] = shl nsw i64 [[IDX:%.*]], 3 ; CHECK-NEXT: [[P2_IDX1:%.*]] = shl nsw i64 [[IDX2:%.*]], 2 -; CHECK-NEXT: [[P2_OFFS2:%.*]] = add i64 [[P2_IDX1]], [[P2_IDX]] -; CHECK-NEXT: ret i64 [[P2_OFFS2]] +; CHECK-NEXT: [[P2_OFFS:%.*]] = add nsw i64 [[P2_IDX]], [[P2_IDX1]] +; CHECK-NEXT: ret i64 [[P2_OFFS]] ; %p1 = getelementptr inbounds [0 x [2 x i32]], [0 x [2 x i32]]* %base, i64 0, i64 0, i64 0 %p2 = getelementptr inbounds [0 x [2 x i32]], [0 x [2 x i32]]* %base, i64 0, i64 %idx, i64 %idx2 diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll index 3e9c066dfbf3ac..d9c67d0568f74a 100644 --- a/llvm/test/Transforms/InstCombine/sub.ll +++ b/llvm/test/Transforms/InstCombine/sub.ll @@ -1075,8 +1075,8 @@ define i64 @test58([100 x [100 x i8]]* %foo, i64 %i, i64 %j) { ; "%sub = i64 %i, %j, ret i64 %sub" ; gep1 and gep2 have only one use ; CHECK-LABEL: @test58( -; CHECK-NEXT: [[GEP1_OFFS:%.*]] = add i64 [[I:%.*]], 4200 -; CHECK-NEXT: [[GEP2_OFFS:%.*]] = add i64 [[J:%.*]], 4200 +; CHECK-NEXT: [[GEP1_OFFS:%.*]] = add nsw i64 [[I:%.*]], 4200 +; CHECK-NEXT: [[GEP2_OFFS:%.*]] = add nsw i64 [[J:%.*]], 4200 ; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i64 [[GEP1_OFFS]], [[GEP2_OFFS]] ; CHECK-NEXT: ret i64 [[GEPDIFF]] ; diff --git a/llvm/test/Transforms/LoopFusion/simple.ll b/llvm/test/Transforms/LoopFusion/simple.ll index 97591ddf5d9975..bb4cf17d84697c 100644 --- a/llvm/test/Transforms/LoopFusion/simple.ll +++ b/llvm/test/Transforms/LoopFusion/simple.ll @@ -303,9 +303,8 @@ define void @forward_dep(i32* noalias %arg) { ; CHECK-NEXT: bb: ; CHECK-NEXT: br label [[BB7:%.*]] ; CHECK: bb7: -; CHECK-NEXT: [[DOT013:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[TMP15:%.*]], [[BB25:%.*]] ] -; CHECK-NEXT: [[INDVARS_IV22:%.*]] = phi i64 [ 0, [[BB]] ], [ [[INDVARS_IV_NEXT3:%.*]], [[BB25]] ] -; CHECK-NEXT: [[INDVARS_IV1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BB25]] ], [ 0, [[BB]] ] +; CHECK-NEXT: [[DOT013:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[TMP15:%.*]], [[BB14:%.*]] ] +; CHECK-NEXT: [[INDVARS_IV22:%.*]] = phi i64 [ 0, [[BB]] ], [ [[INDVARS_IV_NEXT3:%.*]], [[BB14]] ] ; CHECK-NEXT: [[TMP:%.*]] = add nsw i32 [[DOT013]], -3 ; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[INDVARS_IV22]], 3 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP8]] to i32 @@ -314,8 +313,16 @@ define void @forward_dep(i32* noalias %arg) { ; CHECK-NEXT: [[TMP12:%.*]] = srem i32 [[TMP10]], [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[ARG:%.*]], i64 [[INDVARS_IV22]] ; CHECK-NEXT: store i32 [[TMP12]], i32* [[TMP13]], align 4 -; CHECK-NEXT: br label [[BB14:%.*]] +; CHECK-NEXT: br label [[BB14]] ; CHECK: bb14: +; CHECK-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV22]], 1 +; CHECK-NEXT: [[TMP15]] = add nuw nsw i32 [[DOT013]], 1 +; CHECK-NEXT: [[EXITCOND4:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], 100 +; CHECK-NEXT: br i1 [[EXITCOND4]], label [[BB7]], label [[BB19_PREHEADER:%.*]] +; CHECK: bb19.preheader: +; CHECK-NEXT: br label [[BB19:%.*]] +; CHECK: bb19: +; CHECK-NEXT: [[INDVARS_IV1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BB25:%.*]] ], [ 0, [[BB19_PREHEADER]] ] ; CHECK-NEXT: [[TMP20:%.*]] = add nsw i64 [[INDVARS_IV1]], -3 ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[ARG]], i64 [[TMP20]] ; CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 @@ -324,12 +331,9 @@ define void @forward_dep(i32* noalias %arg) { ; CHECK-NEXT: store i32 [[TMP23]], i32* [[TMP24]], align 4 ; CHECK-NEXT: br label [[BB25]] ; CHECK: bb25: -; CHECK-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV22]], 1 -; CHECK-NEXT: [[TMP15]] = add nuw nsw i32 [[DOT013]], 1 -; CHECK-NEXT: [[EXITCOND4:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], 100 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV1]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 100 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[BB7]], label [[BB26:%.*]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[BB19]], label [[BB26:%.*]] ; CHECK: bb26: ; CHECK-NEXT: ret void ; diff --git a/llvm/unittests/CodeGen/GlobalISel/PatternMatchTest.cpp b/llvm/unittests/CodeGen/GlobalISel/PatternMatchTest.cpp index 6a54084cacafbc..c6a7f8e87bb01e 100644 --- a/llvm/unittests/CodeGen/GlobalISel/PatternMatchTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/PatternMatchTest.cpp @@ -392,6 +392,71 @@ TEST_F(AArch64GISelMITest, MatchMiscellaneous) { match = mi_match(MIBAdd.getReg(0), *MRI, m_OneUse(m_GAdd(m_Reg(), m_Reg()))); EXPECT_FALSE(match); } + +TEST_F(AArch64GISelMITest, MatchSpecificConstant) { + setUp(); + if (!TM) + return; + + // Basic case: Can we match a G_CONSTANT with a specific value? + auto FortyTwo = B.buildConstant(LLT::scalar(64), 42); + EXPECT_TRUE(mi_match(FortyTwo.getReg(0), *MRI, m_SpecificICst(42))); + EXPECT_FALSE(mi_match(FortyTwo.getReg(0), *MRI, m_SpecificICst(123))); + + // Test that this works inside of a more complex pattern. + LLT s64 = LLT::scalar(64); + auto MIBAdd = B.buildAdd(s64, Copies[0], FortyTwo); + EXPECT_TRUE(mi_match(MIBAdd.getReg(2), *MRI, m_SpecificICst(42))); + + // Wrong constant. + EXPECT_FALSE(mi_match(MIBAdd.getReg(2), *MRI, m_SpecificICst(123))); + + // No constant on the LHS. + EXPECT_FALSE(mi_match(MIBAdd.getReg(1), *MRI, m_SpecificICst(42))); +} + +TEST_F(AArch64GISelMITest, MatchZeroInt) { + setUp(); + if (!TM) + return; + auto Zero = B.buildConstant(LLT::scalar(64), 0); + EXPECT_TRUE(mi_match(Zero.getReg(0), *MRI, m_ZeroInt())); + + auto FortyTwo = B.buildConstant(LLT::scalar(64), 42); + EXPECT_FALSE(mi_match(FortyTwo.getReg(0), *MRI, m_ZeroInt())); +} + +TEST_F(AArch64GISelMITest, MatchNeg) { + setUp(); + if (!TM) + return; + + LLT s64 = LLT::scalar(64); + auto Zero = B.buildConstant(LLT::scalar(64), 0); + auto NegInst = B.buildSub(s64, Zero, Copies[0]); + Register NegatedReg; + + // Match: G_SUB = 0, %Reg + EXPECT_TRUE(mi_match(NegInst.getReg(0), *MRI, m_Neg(m_Reg(NegatedReg)))); + EXPECT_EQ(NegatedReg, Copies[0]); + + // Don't match: G_SUB = %Reg, 0 + auto NotNegInst1 = B.buildSub(s64, Copies[0], Zero); + EXPECT_FALSE(mi_match(NotNegInst1.getReg(0), *MRI, m_Neg(m_Reg(NegatedReg)))); + + // Don't match: G_SUB = 42, %Reg + auto FortyTwo = B.buildConstant(LLT::scalar(64), 42); + auto NotNegInst2 = B.buildSub(s64, FortyTwo, Copies[0]); + EXPECT_FALSE(mi_match(NotNegInst2.getReg(0), *MRI, m_Neg(m_Reg(NegatedReg)))); + + // Complex testcase. + // %sub = G_SUB = 0, %negated_reg + // %add = G_ADD = %x, %sub + auto AddInst = B.buildAdd(s64, Copies[1], NegInst); + NegatedReg = Register(); + EXPECT_TRUE(mi_match(AddInst.getReg(2), *MRI, m_Neg(m_Reg(NegatedReg)))); + EXPECT_EQ(NegatedReg, Copies[0]); +} } // namespace int main(int argc, char **argv) { diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/test/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/test/BUILD.gn index c929f88dec73d1..e6ea3b5c56c676 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/test/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/test/BUILD.gn @@ -1,5 +1,6 @@ import("//clang-tools-extra/clangd/xpc/enable.gni") import("//llvm/triples.gni") +import("//llvm/utils/gn/build/libs/zlib/enable.gni") import("//llvm/utils/gn/build/write_cmake_config.gni") import("clangd_lit_site_cfg_files.gni") @@ -42,6 +43,12 @@ write_lit_config("lit_site_cfg") { } else { extra_values += [ "CLANGD_BUILD_XPC=0" ] } + + if (llvm_enable_zlib) { + extra_values += [ "LLVM_ENABLE_ZLIB=1" ] + } else { + extra_values += [ "LLVM_ENABLE_ZLIB=0" ] + } } write_lit_config("lit_unit_site_cfg") {