From 11a3f040c71020b0247144268570d571858b119c Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Sat, 4 Jul 2020 20:04:28 +0300 Subject: [PATCH 1/3] [Utils] Make -assume-builder/-assume-simplify actually work on Old-PM clang w/ old-pm currently would simply crash when -mllvm -enable-knowledge-retention=true is specified. Clearly, these two passes had no Old-PM test coverage, which would have shown the problem - not requiring AssumptionCacheTracker, but then trying to always get it. Also, why try to get domtree only if it's cached, but at the same time marking it as required? --- llvm/include/llvm/InitializePasses.h | 1 + .../Transforms/Utils/AssumeBundleBuilder.cpp | 45 +++++++++-- llvm/lib/Transforms/Utils/Utils.cpp | 1 + llvm/test/Transforms/Util/assume-builder.ll | 77 +++++++++++++------ llvm/test/Transforms/Util/assume-simplify.ll | 1 + 5 files changed, 97 insertions(+), 28 deletions(-) diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 3b8bc9f8b7955f..f0d5accf13c5c2 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -72,6 +72,7 @@ void initializeAliasSetPrinterPass(PassRegistry&); void initializeAlignmentFromAssumptionsPass(PassRegistry&); void initializeAlwaysInlinerLegacyPassPass(PassRegistry&); void initializeAssumeSimplifyPassLegacyPassPass(PassRegistry &); +void initializeAssumeBuilderPassLegacyPassPass(PassRegistry &); void initializeOpenMPOptLegacyPassPass(PassRegistry &); void initializeArgPromotionPass(PassRegistry&); void initializeAssumptionCacheTrackerPass(PassRegistry&); diff --git a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp index ad457440d48a38..f2208edd5b196b 100644 --- a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp +++ b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp @@ -525,14 +525,16 @@ class AssumeSimplifyPassLegacyPass : public FunctionPass { bool runOnFunction(Function &F) override { if (skipFunction(F) || !EnableKnowledgeRetention) return false; - DominatorTreeWrapperPass *DT = - getAnalysisIfAvailable(); AssumptionCache &AC = getAnalysis().getAssumptionCache(F); - return simplifyAssumes(F, &AC, DT ? &DT->getDomTree() : nullptr); + DominatorTreeWrapperPass *DTWP = + getAnalysisIfAvailable(); + return simplifyAssumes(F, &AC, DTWP ? &DTWP->getDomTree() : nullptr); } void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.setPreservesAll(); } }; @@ -542,7 +544,6 @@ char AssumeSimplifyPassLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(AssumeSimplifyPassLegacyPass, "assume-simplify", "Assume Simplify", false, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_END(AssumeSimplifyPassLegacyPass, "assume-simplify", "Assume Simplify", false, false) @@ -553,9 +554,43 @@ FunctionPass *llvm::createAssumeSimplifyPass() { PreservedAnalyses AssumeBuilderPass::run(Function &F, FunctionAnalysisManager &AM) { - AssumptionCache* AC = AM.getCachedResult(F); + AssumptionCache *AC = &AM.getResult(F); DominatorTree* DT = AM.getCachedResult(F); for (Instruction &I : instructions(F)) salvageKnowledge(&I, AC, DT); return PreservedAnalyses::all(); } + +namespace { +class AssumeBuilderPassLegacyPass : public FunctionPass { +public: + static char ID; + + AssumeBuilderPassLegacyPass() : FunctionPass(ID) { + initializeAssumeBuilderPassLegacyPassPass(*PassRegistry::getPassRegistry()); + } + bool runOnFunction(Function &F) override { + AssumptionCache &AC = + getAnalysis().getAssumptionCache(F); + DominatorTreeWrapperPass *DTWP = + getAnalysisIfAvailable(); + for (Instruction &I : instructions(F)) + salvageKnowledge(&I, &AC, DTWP ? &DTWP->getDomTree() : nullptr); + return true; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + + AU.setPreservesAll(); + } +}; +} // namespace + +char AssumeBuilderPassLegacyPass::ID = 0; + +INITIALIZE_PASS_BEGIN(AssumeBuilderPassLegacyPass, "assume-builder", + "Assume Builder", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_END(AssumeBuilderPassLegacyPass, "assume-builder", + "Assume Builder", false, false) diff --git a/llvm/lib/Transforms/Utils/Utils.cpp b/llvm/lib/Transforms/Utils/Utils.cpp index fc1e824aadea79..ce98a739bea882 100644 --- a/llvm/lib/Transforms/Utils/Utils.cpp +++ b/llvm/lib/Transforms/Utils/Utils.cpp @@ -25,6 +25,7 @@ using namespace llvm; void llvm::initializeTransformUtils(PassRegistry &Registry) { initializeAddDiscriminatorsLegacyPassPass(Registry); initializeAssumeSimplifyPassLegacyPassPass(Registry); + initializeAssumeBuilderPassLegacyPassPass(Registry); initializeBreakCriticalEdgesPass(Registry); initializeCanonicalizeAliasesLegacyPassPass(Registry); initializeCanonicalizeFreezeInLoopsPass(Registry); diff --git a/llvm/test/Transforms/Util/assume-builder.ll b/llvm/test/Transforms/Util/assume-builder.ll index 33cc3ff3c985e4..991768655ada35 100644 --- a/llvm/test/Transforms/Util/assume-builder.ll +++ b/llvm/test/Transforms/Util/assume-builder.ll @@ -1,8 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature +; RUN: opt -assume-builder -verify --enable-knowledge-retention -S %s | FileCheck %s --check-prefixes=BASIC +; RUN: opt -assume-builder -verify --enable-knowledge-retention --assume-preserve-all -S %s | FileCheck %s --check-prefixes=ALL +; RUN: opt -assumption-cache-tracker -assume-builder -verify --enable-knowledge-retention -S %s | FileCheck %s --check-prefixes=WITH-AC +; RUN: opt -domtree -assumption-cache-tracker -assume-builder -verify --enable-knowledge-retention -S %s | FileCheck %s --check-prefixes=CROSS-BLOCK,CROSS-BLOCK-OLDPM +; RUN: opt -assume-builder -domtree -assumption-cache-tracker -assume-simplify -verify --enable-knowledge-retention -S %s | FileCheck %s --check-prefixes=FULL-SIMPLIFY + ; RUN: opt -passes='assume-builder,verify' --enable-knowledge-retention -S %s | FileCheck %s --check-prefixes=BASIC ; RUN: opt -passes='assume-builder,verify' --enable-knowledge-retention --assume-preserve-all -S %s | FileCheck %s --check-prefixes=ALL ; RUN: opt -passes='require,assume-builder,verify' --enable-knowledge-retention -S %s | FileCheck %s --check-prefixes=WITH-AC -; RUN: opt -passes='require,require,assume-builder,verify' --enable-knowledge-retention -S %s | FileCheck %s --check-prefixes=CROSS-BLOCK +; RUN: opt -passes='require,require,assume-builder,verify' --enable-knowledge-retention -S %s | FileCheck %s --check-prefixes=CROSS-BLOCK,CROSS-BLOCK-NEWMP ; RUN: opt -passes='assume-builder,require,require,assume-simplify,verify' --enable-knowledge-retention -S %s | FileCheck %s --check-prefixes=FULL-SIMPLIFY target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" @@ -641,28 +647,30 @@ define dso_local i32 @_Z6squarePi(i32* %P, i32* %P1, i1 %cond) { ; WITH-AC-NEXT: store i32 0, i32* [[P1]], align 4 ; WITH-AC-NEXT: ret i32 0 ; -; CROSS-BLOCK-LABEL: define {{[^@]+}}@_Z6squarePi -; CROSS-BLOCK-SAME: (i32* [[P:%.*]], i32* [[P1:%.*]], i1 [[COND:%.*]]) -; CROSS-BLOCK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(i32* [[P]], i64 4), "nonnull"(i32* [[P]]), "align"(i32* [[P]], i64 4) ] -; CROSS-BLOCK-NEXT: store i32 0, i32* [[P]], align 4 -; CROSS-BLOCK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(i32* [[P1]], i64 4), "nonnull"(i32* [[P1]]), "align"(i32* [[P1]], i64 8) ] -; CROSS-BLOCK-NEXT: store i32 0, i32* [[P1]], align 8 -; CROSS-BLOCK-NEXT: br i1 [[COND]], label [[A:%.*]], label [[B:%.*]] -; CROSS-BLOCK: A: -; CROSS-BLOCK-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[P]], i64 8) ] -; CROSS-BLOCK-NEXT: store i32 0, i32* [[P]], align 8 -; CROSS-BLOCK-NEXT: store i32 0, i32* [[P1]], align 4 -; CROSS-BLOCK-NEXT: br i1 [[COND]], label [[C:%.*]], label [[B]] -; CROSS-BLOCK: B: -; CROSS-BLOCK-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[P]], i64 8) ] -; CROSS-BLOCK-NEXT: store i32 0, i32* [[P]], align 8 -; CROSS-BLOCK-NEXT: store i32 0, i32* [[P1]], align 8 -; CROSS-BLOCK-NEXT: br label [[C]] -; CROSS-BLOCK: C: -; CROSS-BLOCK-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[P]], i64 32) ] -; CROSS-BLOCK-NEXT: store i32 0, i32* [[P]], align 32 -; CROSS-BLOCK-NEXT: store i32 0, i32* [[P1]], align 4 -; CROSS-BLOCK-NEXT: ret i32 0 +; CROSS-BLOCK-OLDPM-LABEL: define {{[^@]+}}@_Z6squarePi +; CROSS-BLOCK-OLDPM-SAME: (i32* [[P:%.*]], i32* [[P1:%.*]], i1 [[COND:%.*]]) +; CROSS-BLOCK-OLDPM-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(i32* [[P]], i64 4), "nonnull"(i32* [[P]]), "align"(i32* [[P]], i64 4) ] +; CROSS-BLOCK-OLDPM-NEXT: store i32 0, i32* [[P]], align 4 +; CROSS-BLOCK-OLDPM-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(i32* [[P1]], i64 4), "nonnull"(i32* [[P1]]), "align"(i32* [[P1]], i64 8) ] +; CROSS-BLOCK-OLDPM-NEXT: store i32 0, i32* [[P1]], align 8 +; CROSS-BLOCK-OLDPM-NEXT: br i1 [[COND]], label [[A:%.*]], label [[B:%.*]] +; CROSS-BLOCK-OLDPM: A: +; CROSS-BLOCK-OLDPM-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[P]], i64 8) ] +; CROSS-BLOCK-OLDPM-NEXT: store i32 0, i32* [[P]], align 8 +; CROSS-BLOCK-OLDPM-NEXT: store i32 0, i32* [[P1]], align 4 +; CROSS-BLOCK-OLDPM-NEXT: br i1 [[COND]], label [[C:%.*]], label [[B]] +; CROSS-BLOCK-OLDPM: B: +; CROSS-BLOCK-OLDPM-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(i32* [[P]], i64 4), "nonnull"(i32* [[P]]), "align"(i32* [[P]], i64 8) ] +; CROSS-BLOCK-OLDPM-NEXT: store i32 0, i32* [[P]], align 8 +; CROSS-BLOCK-OLDPM-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(i32* [[P1]], i64 4), "nonnull"(i32* [[P1]]), "align"(i32* [[P1]], i64 8) ] +; CROSS-BLOCK-OLDPM-NEXT: store i32 0, i32* [[P1]], align 8 +; CROSS-BLOCK-OLDPM-NEXT: br label [[C]] +; CROSS-BLOCK-OLDPM: C: +; CROSS-BLOCK-OLDPM-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(i32* [[P]], i64 4), "nonnull"(i32* [[P]]), "align"(i32* [[P]], i64 32) ] +; CROSS-BLOCK-OLDPM-NEXT: store i32 0, i32* [[P]], align 32 +; CROSS-BLOCK-OLDPM-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(i32* [[P1]], i64 4), "nonnull"(i32* [[P1]]), "align"(i32* [[P1]], i64 4) ] +; CROSS-BLOCK-OLDPM-NEXT: store i32 0, i32* [[P1]], align 4 +; CROSS-BLOCK-OLDPM-NEXT: ret i32 0 ; ; FULL-SIMPLIFY-LABEL: define {{[^@]+}}@_Z6squarePi ; FULL-SIMPLIFY-SAME: (i32* nonnull align 4 dereferenceable(4) [[P:%.*]], i32* nonnull align 8 dereferenceable(4) [[P1:%.*]], i1 [[COND:%.*]]) @@ -684,6 +692,29 @@ define dso_local i32 @_Z6squarePi(i32* %P, i32* %P1, i1 %cond) { ; FULL-SIMPLIFY-NEXT: store i32 0, i32* [[P]], align 32 ; FULL-SIMPLIFY-NEXT: store i32 0, i32* [[P1]], align 4 ; FULL-SIMPLIFY-NEXT: ret i32 0 +; +; CROSS-BLOCK-NEWMP-LABEL: define {{[^@]+}}@_Z6squarePi +; CROSS-BLOCK-NEWMP-SAME: (i32* [[P:%.*]], i32* [[P1:%.*]], i1 [[COND:%.*]]) +; CROSS-BLOCK-NEWMP-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(i32* [[P]], i64 4), "nonnull"(i32* [[P]]), "align"(i32* [[P]], i64 4) ] +; CROSS-BLOCK-NEWMP-NEXT: store i32 0, i32* [[P]], align 4 +; CROSS-BLOCK-NEWMP-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(i32* [[P1]], i64 4), "nonnull"(i32* [[P1]]), "align"(i32* [[P1]], i64 8) ] +; CROSS-BLOCK-NEWMP-NEXT: store i32 0, i32* [[P1]], align 8 +; CROSS-BLOCK-NEWMP-NEXT: br i1 [[COND]], label [[A:%.*]], label [[B:%.*]] +; CROSS-BLOCK-NEWMP: A: +; CROSS-BLOCK-NEWMP-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[P]], i64 8) ] +; CROSS-BLOCK-NEWMP-NEXT: store i32 0, i32* [[P]], align 8 +; CROSS-BLOCK-NEWMP-NEXT: store i32 0, i32* [[P1]], align 4 +; CROSS-BLOCK-NEWMP-NEXT: br i1 [[COND]], label [[C:%.*]], label [[B]] +; CROSS-BLOCK-NEWMP: B: +; CROSS-BLOCK-NEWMP-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[P]], i64 8) ] +; CROSS-BLOCK-NEWMP-NEXT: store i32 0, i32* [[P]], align 8 +; CROSS-BLOCK-NEWMP-NEXT: store i32 0, i32* [[P1]], align 8 +; CROSS-BLOCK-NEWMP-NEXT: br label [[C]] +; CROSS-BLOCK-NEWMP: C: +; CROSS-BLOCK-NEWMP-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[P]], i64 32) ] +; CROSS-BLOCK-NEWMP-NEXT: store i32 0, i32* [[P]], align 32 +; CROSS-BLOCK-NEWMP-NEXT: store i32 0, i32* [[P1]], align 4 +; CROSS-BLOCK-NEWMP-NEXT: ret i32 0 ; store i32 0, i32* %P, align 4 store i32 0, i32* %P1, align 8 diff --git a/llvm/test/Transforms/Util/assume-simplify.ll b/llvm/test/Transforms/Util/assume-simplify.ll index 3005de3688e32c..5f5eece453ac00 100644 --- a/llvm/test/Transforms/Util/assume-simplify.ll +++ b/llvm/test/Transforms/Util/assume-simplify.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature +; RUN: opt -domtree -assumption-cache-tracker -assume-simplify -verify --enable-knowledge-retention -S %s | FileCheck %s ; RUN: opt -passes='require,require,assume-simplify,verify' --enable-knowledge-retention -S %s | FileCheck %s target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" From 21d8f66d20b813edadac84b12cc83b66eaeae630 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 4 Jul 2020 11:40:40 -0700 Subject: [PATCH 2/3] [X86] Add test caes for pr46585. NFC --- llvm/test/CodeGen/X86/pr46585.ll | 35 ++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 llvm/test/CodeGen/X86/pr46585.ll diff --git a/llvm/test/CodeGen/X86/pr46585.ll b/llvm/test/CodeGen/X86/pr46585.ll new file mode 100644 index 00000000000000..65cd6b897ea36a --- /dev/null +++ b/llvm/test/CodeGen/X86/pr46585.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-apple-macosx | FileCheck %s + +@global = external local_unnamed_addr global i8 +@global.1 = external local_unnamed_addr global i64 + +define void @spam() local_unnamed_addr { +; CHECK-LABEL: spam: +; CHECK: ## %bb.0: ## %bb +; CHECK-NEXT: pmovmskb %xmm0, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: je LBB0_2 +; CHECK-NEXT: ## %bb.1: ## %bb9 +; CHECK-NEXT: movq _global.1@{{.*}}(%rip), %rax +; CHECK-NEXT: movq $1, (%rax) +; CHECK-NEXT: LBB0_2: ## %bb8 +; CHECK-NEXT: retq +bb: + %tmp = load i8, i8* @global, align 1 + %tmp1 = and i8 %tmp, 1 + %tmp2 = insertelement <8 x i8> undef, i8 %tmp1, i32 0 + %tmp3 = ashr <8 x i8> %tmp2, + %tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> undef, <8 x i32> zeroinitializer + %tmp5 = icmp slt <8 x i8> %tmp4, zeroinitializer + %tmp6 = bitcast <8 x i1> %tmp5 to i8 + %tmp7 = icmp eq i8 %tmp6, 0 + br i1 %tmp7, label %bb8, label %bb9 + +bb8: ; preds = %bb9, %bb + ret void + +bb9: ; preds = %bb + store i64 1, i64* @global.1, align 8 + br label %bb8 +} From 120c5f1057dc50229f73bc75bbabf4df6ee50fef Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 4 Jul 2020 11:42:00 -0700 Subject: [PATCH 3/3] [DAGCombiner] Don't fold zext_vector_inreg/sext_vector_inreg(undef) to undef. Fold to 0. zext_vector_inreg needs to produces 0s in the extended bits and sext_vector_inreg needs to produce upper bits that are all the same. So we should fold them to a 0 vector instead of undef. Fixes PR46585. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 ++++-- llvm/test/CodeGen/X86/pr46585.ll | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index bd7d94e2243331..c0d5337ecc3395 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11086,8 +11086,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + // sext_vector_inreg(undef) = 0 because the top bit will all be the same. if (N0.isUndef()) - return DAG.getUNDEF(VT); + return DAG.getConstant(0, SDLoc(N), VT); if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) return Res; @@ -11102,8 +11103,9 @@ SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + // zext_vector_inreg(undef) = 0 because the top bits will be zero. if (N0.isUndef()) - return DAG.getUNDEF(VT); + return DAG.getConstant(0, SDLoc(N), VT); if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) return Res; diff --git a/llvm/test/CodeGen/X86/pr46585.ll b/llvm/test/CodeGen/X86/pr46585.ll index 65cd6b897ea36a..7bea63176d1bb9 100644 --- a/llvm/test/CodeGen/X86/pr46585.ll +++ b/llvm/test/CodeGen/X86/pr46585.ll @@ -7,7 +7,7 @@ define void @spam() local_unnamed_addr { ; CHECK-LABEL: spam: ; CHECK: ## %bb.0: ## %bb -; CHECK-NEXT: pmovmskb %xmm0, %eax +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: je LBB0_2 ; CHECK-NEXT: ## %bb.1: ## %bb9