From b65c19afd44dcb803afa1deca3a78552a2f9084f Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Sun, 16 Jan 2022 18:01:19 -0500 Subject: [PATCH 01/10] [LLVM] Remove LLVM 11 patches from source-tree (#43824) --- deps/llvm.mk | 42 - deps/patches/llvm-10-D85553.patch | 53 - .../llvm-10-unique_function_clang-sa.patch | 28 - .../llvm-11-AArch64-FastIsel-bug.patch | 50 - .../llvm-11-D75072-SCEV-add-type.patch | 452 ------- ...lvm-11-D85313-debuginfo-empty-arange.patch | 277 ---- ...lvm-11-D90722-rtdyld-absolute-relocs.patch | 81 -- deps/patches/llvm-11-D92906-ppc-setjmp.patch | 226 ---- .../llvm-11-D93092-ppc-knownbits.patch | 106 -- .../llvm-11-D93154-globalisel-as.patch | 88 -- .../llvm-11-D94058-sext-atomic-ops.patch | 1201 ----------------- deps/patches/llvm-11-D94813-mergeicmps.patch | 111 -- deps/patches/llvm-11-D94980-CTR-half.patch | 398 ------ .../llvm-11-D96283-dagcombine-half.patch | 175 --- .../llvm-11-D97435-AArch64-movaddrreg.patch | 484 ------- deps/patches/llvm-11-D97571-AArch64-loh.patch | 101 -- .../llvm-11-PR48458-X86ISelDAGToDAG.patch | 61 - deps/patches/llvm-11-aarch64-addrspace.patch | 31 - deps/patches/llvm-11-ppc-half-ctr.patch | 96 -- deps/patches/llvm-11-ppc-sp-from-bp.patch | 621 --------- .../patches/llvm-6.0-DISABLE_ABI_CHECKS.patch | 39 - deps/patches/llvm-7.0-D44650.patch | 13 - ...lvm-D27629-AArch64-large_model_6.0.1.patch | 53 - deps/patches/llvm-D80101.patch | 13 - deps/patches/llvm-D84031.patch | 10 - deps/patches/llvm-D88630-clang-cmake.patch | 29 - .../llvm-invalid-addrspacecast-sink.patch | 116 -- deps/patches/llvm-julia-tsan-custom-as.patch | 28 - ...llvm-rGb498303066a6-gcc11-header-fix.patch | 21 - deps/patches/llvm7-revert-D44485.patch | 94 -- .../patches/llvm8-D34078-vectorize-fdiv.patch | 42 - deps/patches/llvm9-D50010-VNCoercion-ni.patch | 64 - 32 files changed, 5204 deletions(-) delete mode 100644 deps/patches/llvm-10-D85553.patch delete mode 100644 deps/patches/llvm-10-unique_function_clang-sa.patch delete mode 100644 deps/patches/llvm-11-AArch64-FastIsel-bug.patch delete mode 100644 deps/patches/llvm-11-D75072-SCEV-add-type.patch delete mode 100644 deps/patches/llvm-11-D85313-debuginfo-empty-arange.patch delete mode 100644 deps/patches/llvm-11-D90722-rtdyld-absolute-relocs.patch delete mode 100644 deps/patches/llvm-11-D92906-ppc-setjmp.patch delete mode 100644 deps/patches/llvm-11-D93092-ppc-knownbits.patch delete mode 100644 deps/patches/llvm-11-D93154-globalisel-as.patch delete mode 100644 deps/patches/llvm-11-D94058-sext-atomic-ops.patch delete mode 100644 deps/patches/llvm-11-D94813-mergeicmps.patch delete mode 100644 deps/patches/llvm-11-D94980-CTR-half.patch delete mode 100644 deps/patches/llvm-11-D96283-dagcombine-half.patch delete mode 100644 deps/patches/llvm-11-D97435-AArch64-movaddrreg.patch delete mode 100644 deps/patches/llvm-11-D97571-AArch64-loh.patch delete mode 100644 deps/patches/llvm-11-PR48458-X86ISelDAGToDAG.patch delete mode 100644 deps/patches/llvm-11-aarch64-addrspace.patch delete mode 100644 deps/patches/llvm-11-ppc-half-ctr.patch delete mode 100644 deps/patches/llvm-11-ppc-sp-from-bp.patch delete mode 100644 deps/patches/llvm-6.0-DISABLE_ABI_CHECKS.patch delete mode 100644 deps/patches/llvm-7.0-D44650.patch delete mode 100644 deps/patches/llvm-D27629-AArch64-large_model_6.0.1.patch delete mode 100644 deps/patches/llvm-D80101.patch delete mode 100644 deps/patches/llvm-D84031.patch delete mode 100644 deps/patches/llvm-D88630-clang-cmake.patch delete mode 100644 deps/patches/llvm-invalid-addrspacecast-sink.patch delete mode 100644 deps/patches/llvm-julia-tsan-custom-as.patch delete mode 100644 deps/patches/llvm-rGb498303066a6-gcc11-header-fix.patch delete mode 100644 deps/patches/llvm7-revert-D44485.patch delete mode 100644 deps/patches/llvm8-D34078-vectorize-fdiv.patch delete mode 100644 deps/patches/llvm9-D50010-VNCoercion-ni.patch diff --git a/deps/llvm.mk b/deps/llvm.mk index ff30446df9fe6..518e764dcde04 100644 --- a/deps/llvm.mk +++ b/deps/llvm.mk @@ -230,48 +230,6 @@ $$(LLVM_BUILDDIR_withtype)/build-compiled: $$(SRCCACHE)/$$(LLVM_SRC_DIR)/$1.patc LLVM_PATCH_PREV := $$(SRCCACHE)/$$(LLVM_SRC_DIR)/$1.patch-applied endef -ifeq ($(LLVM_VER_SHORT),11.0) -ifeq ($(LLVM_VER_PATCH), 0) -$(eval $(call LLVM_PATCH,llvm-D27629-AArch64-large_model_6.0.1)) # remove for LLVM 12 -endif # LLVM_VER 11.0.0 -$(eval $(call LLVM_PATCH,llvm8-D34078-vectorize-fdiv)) # remove for LLVM 12 -$(eval $(call LLVM_PATCH,llvm-7.0-D44650)) # replaced by D90969 for LLVM 12 -$(eval $(call LLVM_PATCH,llvm-6.0-DISABLE_ABI_CHECKS)) # Needs upstreaming -$(eval $(call LLVM_PATCH,llvm9-D50010-VNCoercion-ni)) # remove for LLVM 12 -$(eval $(call LLVM_PATCH,llvm7-revert-D44485)) # Needs upstreaming -$(eval $(call LLVM_PATCH,llvm-11-D75072-SCEV-add-type)) -$(eval $(call LLVM_PATCH,llvm-julia-tsan-custom-as)) -$(eval $(call LLVM_PATCH,llvm-D80101)) # remove for LLVM 12 -$(eval $(call LLVM_PATCH,llvm-D84031)) # remove for LLVM 12 -ifeq ($(LLVM_VER_PATCH), 0) -$(eval $(call LLVM_PATCH,llvm-10-D85553)) # remove for LLVM 12 -endif # LLVM_VER 11.0.0 -$(eval $(call LLVM_PATCH,llvm-10-unique_function_clang-sa)) # Needs upstreaming -ifeq ($(BUILD_LLVM_CLANG),1) -$(eval $(call LLVM_PATCH,llvm-D88630-clang-cmake)) -endif -ifeq ($(LLVM_VER_PATCH), 0) -$(eval $(call LLVM_PATCH,llvm-11-D85313-debuginfo-empty-arange)) # remove for LLVM 12 -$(eval $(call LLVM_PATCH,llvm-11-D90722-rtdyld-absolute-relocs)) # remove for LLVM 12 -endif # LLVM_VER 11.0.0 -$(eval $(call LLVM_PATCH,llvm-invalid-addrspacecast-sink)) # Still being upstreamed as D92210 -$(eval $(call LLVM_PATCH,llvm-11-D92906-ppc-setjmp)) # remove for LLVM 12 -$(eval $(call LLVM_PATCH,llvm-11-PR48458-X86ISelDAGToDAG)) # remove for LLVM 12 -$(eval $(call LLVM_PATCH,llvm-11-D93092-ppc-knownbits)) # remove for LLVM 12 -$(eval $(call LLVM_PATCH,llvm-11-D93154-globalisel-as)) -$(eval $(call LLVM_PATCH,llvm-11-ppc-half-ctr)) # remove for LLVM 12 -$(eval $(call LLVM_PATCH,llvm-11-ppc-sp-from-bp)) # remove for LLVM 12 -$(eval $(call LLVM_PATCH,llvm-rGb498303066a6-gcc11-header-fix)) # remove for LLVM 12 -$(eval $(call LLVM_PATCH,llvm-11-D94813-mergeicmps)) -$(eval $(call LLVM_PATCH,llvm-11-D94980-CTR-half)) # remove for LLVM 12 -$(eval $(call LLVM_PATCH,llvm-11-D94058-sext-atomic-ops)) # remove for LLVM 12 -$(eval $(call LLVM_PATCH,llvm-11-D96283-dagcombine-half)) # remove for LLVM 12 -$(eval $(call LLVM_PROJ_PATCH,llvm-11-AArch64-FastIsel-bug)) -$(eval $(call LLVM_PROJ_PATCH,llvm-11-D97435-AArch64-movaddrreg)) -$(eval $(call LLVM_PROJ_PATCH,llvm-11-D97571-AArch64-loh)) # remove for LLVM 13 -$(eval $(call LLVM_PROJ_PATCH,llvm-11-aarch64-addrspace)) # remove for LLVM 13 -endif # LLVM_VER 11.0 - # NOTE: LLVM 12 and 13 have their patches applied to JuliaLang/llvm-project # Add a JL prefix to the version map. DO NOT REMOVE diff --git a/deps/patches/llvm-10-D85553.patch b/deps/patches/llvm-10-D85553.patch deleted file mode 100644 index cb48edba1c0ee..0000000000000 --- a/deps/patches/llvm-10-D85553.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 4d30f46738d417c305c0e748a49020d4513ac4ee Mon Sep 17 00:00:00 2001 -From: Keno Fischer -Date: Fri, 7 Aug 2020 16:38:15 -0400 -Subject: [PATCH] [X86] Don't produce bad x86andp nodes for i1 vectors - -In D85499, I attempted to fix this same issue by canonicalizing -andnp for i1 vectors, but since there was some opposition to such -a change, this commit just fixes the bug by using two different -forms depending on which kind of vector type is in use. We can -then always decide to switch the canonical forms later. - -Description of the original bug: -We have a DAG combine that tries to fold (vselect cond, 0000..., X) -> (andnp cond, x). -However, it does so by attempting to create an i64 vector with the number -of elements obtained by truncating division by 64 from the bitwidth. This is -bad for mask vectors like v8i1, since that division is just zero. Besides, -we don't want i64 vectors anyway. For i1 vectors, switch the pattern -to (andnp (not cond), x), which is the canonical form for `kandn` -on mask registers. - -Fixes https://github.com/JuliaLang/julia/issues/36955. - -Differential Revision: https://reviews.llvm.org/D85553 ---- - llvm/lib/Target/X86/X86ISelLowering.cpp | 12 ++++++++---- - 1 file changed, 8 insertions(+), 4 deletions(-) - -diff --git llvm/lib/Target/X86/X86ISelLowering.cpp llvm/lib/Target/X86/X86ISelLowering.cpp -index c8720d9ae3a..17eaa49c83f 100644 ---- llvm/lib/Target/X86/X86ISelLowering.cpp -+++ llvm/lib/Target/X86/X86ISelLowering.cpp -@@ -37630,10 +37630,14 @@ combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG, - - // vselect Cond, 000..., X -> andn Cond, X - if (TValIsAllZeros) { -- MVT AndNVT = MVT::getVectorVT(MVT::i64, CondVT.getSizeInBits() / 64); -- SDValue CastCond = DAG.getBitcast(AndNVT, Cond); -- SDValue CastRHS = DAG.getBitcast(AndNVT, RHS); -- SDValue AndN = DAG.getNode(X86ISD::ANDNP, DL, AndNVT, CastCond, CastRHS); -+ SDValue CastRHS = DAG.getBitcast(CondVT, RHS); -+ SDValue AndN; -+ // The canonical form differs for i1 vectors - x86andnp is not used -+ if (CondVT.getScalarType() == MVT::i1) -+ AndN = DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT), -+ CastRHS); -+ else -+ AndN = DAG.getNode(X86ISD::ANDNP, DL, CondVT, Cond, CastRHS); - return DAG.getBitcast(VT, AndN); - } - --- -2.28.0 - diff --git a/deps/patches/llvm-10-unique_function_clang-sa.patch b/deps/patches/llvm-10-unique_function_clang-sa.patch deleted file mode 100644 index cc09672a7a840..0000000000000 --- a/deps/patches/llvm-10-unique_function_clang-sa.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 1fa6efaa946243004c45be92e66b324dc980df7d Mon Sep 17 00:00:00 2001 -From: Valentin Churavy -Date: Thu, 17 Sep 2020 23:22:45 +0200 -Subject: [PATCH] clang-sa can't determine that !RHS implies !LHS - ---- - llvm/include/llvm/ADT/FunctionExtras.h | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/include/llvm/ADT/FunctionExtras.h b/include/llvm/ADT/FunctionExtras.h -index 121aa527a5d..b9b6d829b14 100644 ---- a/include/llvm/ADT/FunctionExtras.h -+++ b/include/llvm/ADT/FunctionExtras.h -@@ -193,9 +193,11 @@ public: - // Copy the callback and inline flag. - CallbackAndInlineFlag = RHS.CallbackAndInlineFlag; - -+#ifndef __clang_analyzer__ - // If the RHS is empty, just copying the above is sufficient. - if (!RHS) - return; -+#endif - - if (!isInlineStorage()) { - // The out-of-line case is easiest to move. --- -2.28.0 - diff --git a/deps/patches/llvm-11-AArch64-FastIsel-bug.patch b/deps/patches/llvm-11-AArch64-FastIsel-bug.patch deleted file mode 100644 index 5ab5e96c37a2e..0000000000000 --- a/deps/patches/llvm-11-AArch64-FastIsel-bug.patch +++ /dev/null @@ -1,50 +0,0 @@ -From d82434785ecab16223a0bd4ec8ede020bf244003 Mon Sep 17 00:00:00 2001 -From: Keno Fischer -Date: Mon, 1 Mar 2021 16:42:05 -0500 -Subject: [PATCH 4/4] AArch64: Remove Bad optimization - -Removes the code responsible for causing https://bugs.llvm.org/show_bug.cgi?id=49357. -A fix is in progress upstream, but I don't think it's easy, so this -fixes the bug in the meantime. The optimization it does is minor. ---- - llvm/lib/Target/AArch64/AArch64FastISel.cpp | 24 --------------------- - 1 file changed, 24 deletions(-) - -diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp -index cf3ebed6ef19..6908a51c47d6 100644 ---- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp -+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp -@@ -4600,30 +4600,6 @@ bool AArch64FastISel::selectIntExt(const Instruction *I) { - - // Try to optimize already sign-/zero-extended values from function arguments. - bool IsZExt = isa(I); -- if (const auto *Arg = dyn_cast(I->getOperand(0))) { -- if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) { -- if (RetVT == MVT::i64 && SrcVT != MVT::i64) { -- unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass); -- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, -- TII.get(AArch64::SUBREG_TO_REG), ResultReg) -- .addImm(0) -- .addReg(SrcReg, getKillRegState(SrcIsKill)) -- .addImm(AArch64::sub_32); -- SrcReg = ResultReg; -- } -- // Conservatively clear all kill flags from all uses, because we are -- // replacing a sign-/zero-extend instruction at IR level with a nop at MI -- // level. The result of the instruction at IR level might have been -- // trivially dead, which is now not longer true. -- unsigned UseReg = lookUpRegForValue(I); -- if (UseReg) -- MRI.clearKillFlags(UseReg); -- -- updateValueMap(I, SrcReg); -- return true; -- } -- } -- - unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt); - if (!ResultReg) - return false; --- -2.25.1 - diff --git a/deps/patches/llvm-11-D75072-SCEV-add-type.patch b/deps/patches/llvm-11-D75072-SCEV-add-type.patch deleted file mode 100644 index 4eef1425629cc..0000000000000 --- a/deps/patches/llvm-11-D75072-SCEV-add-type.patch +++ /dev/null @@ -1,452 +0,0 @@ -From 945fa720e2426328288943ced2510671be5e59b9 Mon Sep 17 00:00:00 2001 -From: Keno Fischer -Date: Fri, 9 Oct 2020 15:02:37 -0400 -Subject: [PATCH] [SCEV] Record NI types in add exprs - -This fixes a case where loop-reduce introduces ptrtoint/inttoptr for -non-integral address space pointers. Over the past several years, we -have gradually improved the SCEVExpander to actually do something -sensible for non-integral pointer types. However, that obviously -relies on the expander knowing what the type of the SCEV expression is. -That is usually the case, but there is one important case where it's -not: The type of an add expression is just the type of the last operand, -so if the non-integral pointer is not the last operand, later uses of -that SCEV may not realize that the given add expression contains -non-integral pointers and may try to expand it as integers. - -One interesting observation is that we do get away with this scheme in -shockingly many cases. The reason for this is that SCEV expressions -often have an `scUnknown` pointer base, which our sort order on the -operands of add expressions sort behind basically everything else, -so it usually ends up as the last operand. - -One situation where this fails is included as a test case. This test -case was bugpoint-reduced from the issue reported at -https://github.com/JuliaLang/julia/issues/31156. What happens here -is that the pointer base is an scAddRec from an outer loop, plus an -scUnknown integer offset. By our sort order, the scUnknown gets sorted -after the scAddRec pointer base, thus making an add expression of these -two operands have integer type. This then confuses the expander, into -attempting to expand the whole thing as integers, which will obviously -fail when reaching the non-integral pointer. - -I considered a few options to solve this, but here's what I ended up -settling on: The AddExpr class gains a new subclass that explicitly -stores the type of the expression. This subclass is used whenever one -of the operands is a non-integral pointer. To reduce the impact for the -regular case (where the SCEV expression contains no non-integral -pointers), a bit flag is kept in each flag expression to indicate -whether it is of non-integral pointer type (this should give the same -answer as asking if getType() is non-integral, but performing that -query may involve a pointer chase and requires the DataLayout). For -add expressions that flag is also used to indicate whether we're using -the subclass or not. This is slightly inefficient, because it uses -the subclass even in the (not uncommon) case where the last operand -does actually accurately reflect the non-integral pointer type. However, -it didn't seem worth the extra flag bit and complexity to do this -micro-optimization. - -I had hoped that we could additionally restrict mul exprs from -containing any non-integral pointers, and also require add exprs to -only have one operand containg such pointers (but not more), but this -turned out not to work. The reason for this is that SCEV wants to -form differences between pointers, which it represents as `A + B*-1`, -so we need to allow both multiplication by `-1` and addition with -multiple non-integral pointer arguments. I'm not super happy with -that situation, but I think it exposes a more general problem with -non-integral pointers in LLVM. We don't actually have a way to express -the difference between two non-integral pointers at the IR level. -In theory this is a problem for SCEV, because it means that we can't -materialize such SCEV expression. However, in practice, these -expressions generally have the same base pointer, so SCEV will -appropriately simplify them to just the integer components. -Nevertheless it is a bit unsatisfying. Perhaps we could have an -intrinsic that takes the byte difference between two pointers to the -same allocated object (in the same sense as is used in getelementptr), -which should be a sensible operation even for non-integral pointers. -However, given the practical considerations above, that's a project -for another time. For now, simply allowing the existing pointer-diff -pattern for non-integral pointers seems to work ok. - -Differential Revision: https://reviews.llvm.org/D75072 ---- - llvm/include/llvm/Analysis/ScalarEvolution.h | 26 ++++-- - .../Analysis/ScalarEvolutionExpressions.h | 81 ++++++++++++++++--- - llvm/lib/Analysis/ScalarEvolution.cpp | 44 +++++++--- - .../LoopStrengthReduce/nonintegral.ll | 35 +++++++- - 4 files changed, 159 insertions(+), 27 deletions(-) - -diff --git llvm/include/llvm/Analysis/ScalarEvolution.h llvm/include/llvm/Analysis/ScalarEvolution.h -index 81c5fc93258..964f57e940e 100644 ---- llvm/include/llvm/Analysis/ScalarEvolution.h -+++ llvm/include/llvm/Analysis/ScalarEvolution.h -@@ -119,6 +119,19 @@ public: - NoWrapMask = (1 << 3) - 1 - }; - -+ /// HasNonIntegralPointerFlag are bitfield indices into SubclassData. -+ /// -+ /// When constructing SCEV expressions for LLVM expressions with non-integral -+ /// pointer types, some additional processing is required to ensure that we -+ /// don't introduce any illegal transformations. However, non-integral pointer -+ /// types are a very rarely used feature, so we want to make sure to only do -+ /// such processing if they are actually used. To ensure minimal performance -+ /// impact, we memoize that fact in using these flags. -+ enum HasNonIntegralPointerFlag { -+ FlagNoNIPointers = 0, -+ FlagHasNIPointers = (1 << 3) -+ }; -+ - explicit SCEV(const FoldingSetNodeIDRef ID, unsigned SCEVTy, - unsigned short ExpressionSize) - : FastID(ID), SCEVType(SCEVTy), ExpressionSize(ExpressionSize) {} -@@ -155,6 +168,10 @@ public: - return ExpressionSize; - } - -+ bool hasNonIntegralPointers() const { -+ return SubclassData & FlagHasNIPointers; -+ } -+ - /// Print out the internal representation of this scalar to the specified - /// stream. This should really only be used for debugging purposes. - void print(raw_ostream &OS) const; -@@ -720,9 +737,8 @@ public: - unsigned getSmallConstantTripMultiple(const Loop *L, - BasicBlock *ExitingBlock); - -- - /// The terms "backedge taken count" and "exit count" are used -- /// interchangeably to refer to the number of times the backedge of a loop -+ /// interchangeably to refer to the number of times the backedge of a loop - /// has executed before the loop is exited. - enum ExitCountKind { - /// An expression exactly describing the number of times the backedge has -@@ -733,11 +749,11 @@ public: - }; - - /// Return the number of times the backedge executes before the given exit -- /// would be taken; if not exactly computable, return SCEVCouldNotCompute. -+ /// would be taken; if not exactly computable, return SCEVCouldNotCompute. - /// For a single exit loop, this value is equivelent to the result of - /// getBackedgeTakenCount. The loop is guaranteed to exit (via *some* exit) - /// before the backedge is executed (ExitCount + 1) times. Note that there -- /// is no guarantee about *which* exit is taken on the exiting iteration. -+ /// is no guarantee about *which* exit is taken on the exiting iteration. - const SCEV *getExitCount(const Loop *L, BasicBlock *ExitingBlock, - ExitCountKind Kind = Exact); - -@@ -766,7 +782,7 @@ public: - /// SCEVCouldNotCompute object. - const SCEV *getConstantMaxBackedgeTakenCount(const Loop *L) { - return getBackedgeTakenCount(L, ConstantMaximum); -- } -+ } - - /// Return true if the backedge taken count is either the value returned by - /// getConstantMaxBackedgeTakenCount or zero. -diff --git llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h -index 0076e02ae1b..1d0a6d20b26 100644 ---- llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h -+++ llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h -@@ -188,6 +188,13 @@ class Type; - return getNoWrapFlags(FlagNW) != FlagAnyWrap; - } - -+ void setHasNIPtr(bool HasNIPtr) { -+ if (HasNIPtr) -+ SubclassData |= FlagHasNIPointers; -+ else -+ SubclassData &= ~FlagHasNIPointers; -+ } -+ - /// Methods for support type inquiry through isa, cast, and dyn_cast: - static bool classof(const SCEV *S) { - return S->getSCEVType() == scAddExpr || S->getSCEVType() == scMulExpr || -@@ -224,19 +231,16 @@ class Type; - - Type *Ty; - -+ protected: - SCEVAddExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N) - : SCEVCommutativeExpr(ID, scAddExpr, O, N) { -- auto *FirstPointerTypedOp = find_if(operands(), [](const SCEV *Op) { -- return Op->getType()->isPointerTy(); -- }); -- if (FirstPointerTypedOp != operands().end()) -- Ty = (*FirstPointerTypedOp)->getType(); -- else -- Ty = getOperand(0)->getType(); -+ - } - - public: -- Type *getType() const { return Ty; } -+ // Returns the type of the add expression, by looking either at the last operand -+ // or deferring to the SCEVAddNIExpr subclass. -+ Type *getType() const; - - /// Methods for support type inquiry through isa, cast, and dyn_cast: - static bool classof(const SCEV *S) { -@@ -244,6 +248,46 @@ class Type; - } - }; - -+ /// This node represents an addition of some number of SCEVs, one which -+ /// is a non-integral pointer type, requiring us to know the type exactly for -+ /// correctness. -+ class SCEVAddNIExpr : public SCEVAddExpr { -+ friend class ScalarEvolution; -+ PointerType *NIType; -+ -+ SCEVAddNIExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N, -+ PointerType *NIType) -+ : SCEVAddExpr(ID, O, N), NIType(NIType) { -+ SubclassData |= FlagHasNIPointers; -+ } -+ -+ public: -+ Type *getType() const { return NIType; } -+ -+ /// Methods for support type inquiry through isa, cast, and dyn_cast: -+ static bool classof(const SCEV *S) { -+ return S->getSCEVType() == scAddExpr && S->hasNonIntegralPointers(); -+ } -+ }; -+ -+ inline Type *SCEVAddExpr::getType() const { -+ // In general, use the type of the last operand, which is likely to be a -+ // pointer type, if there is one. This doesn't usually matter, but it can -+ // help reduce casts when the expressions are expanded. In the (unusual) -+ // case that we're working with non-integral pointers, we have a subclass -+ // that stores that type explicitly. -+ if (hasNonIntegralPointers()) -+ return cast(this)->getType(); -+ -+ auto *FirstPointerTypedOp = find_if(operands(), [](const SCEV *Op) { -+ return Op->getType()->isPointerTy(); -+ }); -+ if (FirstPointerTypedOp != operands().end()) -+ return (*FirstPointerTypedOp)->getType(); -+ else -+ return getOperand(0)->getType(); -+ } -+ - /// This node represents multiplication of some number of SCEVs. - class SCEVMulExpr : public SCEVCommutativeExpr { - friend class ScalarEvolution; -@@ -253,6 +297,18 @@ class Type; - : SCEVCommutativeExpr(ID, scMulExpr, O, N) {} - - public: -+ Type *getType() const { -+ // In general, we can't form SCEVMulExprs with non-integral pointer types, -+ // but for the moment we need to allow a special case: Multiplying by -+ // -1 to be able express the difference between two pointers. In order -+ // to maintain the invariant that SCEVs with the NI flag set should have -+ // a type corresponding to the contained NI ptr, we need to return the -+ // type of the pointer here. -+ if (hasNonIntegralPointers()) -+ return getOperand(getNumOperands() - 1)->getType(); -+ return SCEVCommutativeExpr::getType(); -+ } -+ - /// Methods for support type inquiry through isa, cast, and dyn_cast: - static bool classof(const SCEV *S) { - return S->getSCEVType() == scMulExpr; -@@ -479,9 +535,12 @@ class Type; - /// instances owned by a ScalarEvolution. - SCEVUnknown *Next; - -- SCEVUnknown(const FoldingSetNodeIDRef ID, Value *V, -- ScalarEvolution *se, SCEVUnknown *next) : -- SCEV(ID, scUnknown, 1), CallbackVH(V), SE(se), Next(next) {} -+ SCEVUnknown(const FoldingSetNodeIDRef ID, Value *V, ScalarEvolution *se, -+ SCEVUnknown *next, bool ValueIsNIPtr) -+ : SCEV(ID, scUnknown, 1), CallbackVH(V), SE(se), Next(next) { -+ if (ValueIsNIPtr) -+ SubclassData |= FlagHasNIPointers; -+ } - - // Implement CallbackVH. - void deleted() override; -diff --git llvm/lib/Analysis/ScalarEvolution.cpp llvm/lib/Analysis/ScalarEvolution.cpp -index 3c96b3f2046..8197d368b40 100644 ---- llvm/lib/Analysis/ScalarEvolution.cpp -+++ llvm/lib/Analysis/ScalarEvolution.cpp -@@ -369,12 +369,13 @@ Type *SCEV::getType() const { - case scSignExtend: - return cast(this)->getType(); - case scAddRecExpr: -- case scMulExpr: - case scUMaxExpr: - case scSMaxExpr: - case scUMinExpr: - case scSMinExpr: - return cast(this)->getType(); -+ case scMulExpr: -+ return cast(this)->getType(); - case scAddExpr: - return cast(this)->getType(); - case scUDivExpr: -@@ -2193,8 +2194,9 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, - } - - // Limit recursion calls depth. -- if (Depth > MaxArithDepth || hasHugeExpression(Ops)) -+ if (Depth > MaxArithDepth || hasHugeExpression(Ops)) { - return getOrCreateAddExpr(Ops, Flags); -+ } - - if (SCEV *S = std::get<0>(findExistingSCEVInCache(scAddExpr, Ops))) { - static_cast(S)->setNoWrapFlags(Flags); -@@ -2540,16 +2542,27 @@ ScalarEvolution::getOrCreateAddExpr(ArrayRef Ops, - SCEV::NoWrapFlags Flags) { - FoldingSetNodeID ID; - ID.AddInteger(scAddExpr); -- for (const SCEV *Op : Ops) -- ID.AddPointer(Op); -+ bool HasNIPtr = false; -+ PointerType *NIPtrType = nullptr; -+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) { -+ ID.AddPointer(Ops[i]); -+ if (Ops[i]->hasNonIntegralPointers()) { -+ HasNIPtr = true; -+ NIPtrType = cast(Ops[i]->getType()); -+ } -+ } - void *IP = nullptr; - SCEVAddExpr *S = - static_cast(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); - if (!S) { - const SCEV **O = SCEVAllocator.Allocate(Ops.size()); - std::uninitialized_copy(Ops.begin(), Ops.end(), O); -- S = new (SCEVAllocator) -- SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size()); -+ if (HasNIPtr) -+ S = new (SCEVAllocator) -+ SCEVAddNIExpr(ID.Intern(SCEVAllocator), O, Ops.size(), NIPtrType); -+ else -+ S = new (SCEVAllocator) -+ SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size()); - UniqueSCEVs.InsertNode(S, IP); - addToLoopUseLists(S); - } -@@ -2562,8 +2575,10 @@ ScalarEvolution::getOrCreateAddRecExpr(ArrayRef Ops, - const Loop *L, SCEV::NoWrapFlags Flags) { - FoldingSetNodeID ID; - ID.AddInteger(scAddRecExpr); -- for (unsigned i = 0, e = Ops.size(); i != e; ++i) -+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) { -+ assert(i == 0 || !Ops[i]->hasNonIntegralPointers()); - ID.AddPointer(Ops[i]); -+ } - ID.AddPointer(L); - void *IP = nullptr; - SCEVAddRecExpr *S = -@@ -2577,6 +2592,7 @@ ScalarEvolution::getOrCreateAddRecExpr(ArrayRef Ops, - addToLoopUseLists(S); - } - S->setNoWrapFlags(Flags); -+ S->setHasNIPtr(Ops[0]->hasNonIntegralPointers()); - return S; - } - -@@ -2585,8 +2601,11 @@ ScalarEvolution::getOrCreateMulExpr(ArrayRef Ops, - SCEV::NoWrapFlags Flags) { - FoldingSetNodeID ID; - ID.AddInteger(scMulExpr); -- for (unsigned i = 0, e = Ops.size(); i != e; ++i) -+ bool HasNIPtr = false; -+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) { -+ HasNIPtr |= Ops[i]->hasNonIntegralPointers(); - ID.AddPointer(Ops[i]); -+ } - void *IP = nullptr; - SCEVMulExpr *S = - static_cast(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); -@@ -2599,6 +2618,7 @@ ScalarEvolution::getOrCreateMulExpr(ArrayRef Ops, - addToLoopUseLists(S); - } - S->setNoWrapFlags(Flags); -+ S->setHasNIPtr(HasNIPtr); - return S; - } - -@@ -3456,8 +3476,11 @@ const SCEV *ScalarEvolution::getMinMaxExpr(unsigned Kind, - return ExistingSCEV; - const SCEV **O = SCEVAllocator.Allocate(Ops.size()); - std::uninitialized_copy(Ops.begin(), Ops.end(), O); -- SCEV *S = new (SCEVAllocator) SCEVMinMaxExpr( -+ SCEVMinMaxExpr *S = new (SCEVAllocator) SCEVMinMaxExpr( - ID.Intern(SCEVAllocator), static_cast(Kind), O, Ops.size()); -+ // For MinMaxExprs it's sufficient to see if the first Op has NI data, as the -+ // operands all need to be of the same type. -+ S->setHasNIPtr(Ops[0]->hasNonIntegralPointers()); - - UniqueSCEVs.InsertNode(S, IP); - addToLoopUseLists(S); -@@ -3540,8 +3563,9 @@ const SCEV *ScalarEvolution::getUnknown(Value *V) { - "Stale SCEVUnknown in uniquing map!"); - return S; - } -+ bool ValueIsNIPtr = getDataLayout().isNonIntegralPointerType(V->getType()); - SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this, -- FirstUnknown); -+ FirstUnknown, ValueIsNIPtr); - FirstUnknown = cast(S); - UniqueSCEVs.InsertNode(S, IP); - return S; -diff --git llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll -index 5648e3aa74a..6936521f3a6 100644 ---- llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll -+++ llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll -@@ -2,7 +2,7 @@ - - ; Address Space 10 is non-integral. The optimizer is not allowed to use - ; ptrtoint/inttoptr instructions. Make sure that this doesn't happen --target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12" -+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13" - target triple = "x86_64-unknown-linux-gnu" - - define void @japi1__unsafe_getindex_65028(i64 addrspace(10)* %arg) { -@@ -43,3 +43,36 @@ if38: ; preds = %L119 - done: ; preds = %if38 - ret void - } -+ -+; This is a bugpoint-reduced regression test - It doesn't make too much sense by itself, -+; but creates the correct SCEV expressions to reproduce the issue. See -+; https://github.com/JuliaLang/julia/issues/31156 for the original bug report. -+define void @"japi1_permutedims!_4259"(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i1 %g, i8 addrspace(13)* %base) #0 { -+; CHECK-NOT: inttoptr -+; CHECK-NOT: ptrtoint -+; CHECK: getelementptr i8, i8 addrspace(13)* {{.*}}, i64 {{.*}} -+top: -+ br label %L42.L46_crit_edge.us -+ -+L42.L46_crit_edge.us: ; preds = %L82.us.us.loopexit, %top -+ %value_phi11.us = phi i64 [ %a, %top ], [ %2, %L82.us.us.loopexit ] -+ %0 = sub i64 %value_phi11.us, %b -+ %1 = add i64 %0, %c -+ %spec.select = select i1 %g, i64 %d, i64 0 -+ br label %L62.us.us -+ -+L82.us.us.loopexit: ; preds = %L62.us.us -+ %2 = add i64 %e, %value_phi11.us -+ br label %L42.L46_crit_edge.us -+ -+L62.us.us: ; preds = %L62.us.us, %L42.L46_crit_edge.us -+ %value_phi21.us.us = phi i64 [ %6, %L62.us.us ], [ %spec.select, %L42.L46_crit_edge.us ] -+ %3 = add i64 %1, %value_phi21.us.us -+ %4 = getelementptr inbounds i8, i8 addrspace(13)* %base, i64 %3 -+ %5 = load i8, i8 addrspace(13)* %4, align 1 -+ %6 = add i64 %f, %value_phi21.us.us -+ br i1 %g, label %L82.us.us.loopexit, label %L62.us.us, !llvm.loop !1 -+} -+ -+!1 = distinct !{!1, !2} -+!2 = !{!"llvm.loop.isvectorized", i32 1} --- -2.28.0 - diff --git a/deps/patches/llvm-11-D85313-debuginfo-empty-arange.patch b/deps/patches/llvm-11-D85313-debuginfo-empty-arange.patch deleted file mode 100644 index c4d6015605813..0000000000000 --- a/deps/patches/llvm-11-D85313-debuginfo-empty-arange.patch +++ /dev/null @@ -1,277 +0,0 @@ -From c8001215f1e32ae454053d9eab8cb1064c48e803 Mon Sep 17 00:00:00 2001 -From: James Henderson -Date: Mon, 10 Aug 2020 13:36:44 +0100 -Subject: [PATCH] [DebugInfo] Don't error for zero-length arange entries - -Although the DWARF specification states that .debug_aranges entries -can't have length zero, these can occur in the wild. There's no -particular reason to enforce this part of the spec, since functionally -they have no impact. The patch removes the error and introduces a new -warning for premature terminator entries which does not stop parsing. - -This is a relanding of cb3a598c87db, adding the missing obj2yaml part -that was needed. - -Fixes https://bugs.llvm.org/show_bug.cgi?id=46805. See also -https://reviews.llvm.org/D71932 which originally introduced the error. - -Reviewed by: ikudrin, dblaikie, Higuoxing - -Differential Revision: https://reviews.llvm.org/D85313 ---- - .../DebugInfo/DWARF/DWARFDebugArangeSet.h | 3 +- - llvm/lib/DebugInfo/DWARF/DWARFContext.cpp | 3 +- - .../DebugInfo/DWARF/DWARFDebugArangeSet.cpp | 18 ++-- - .../lib/DebugInfo/DWARF/DWARFDebugAranges.cpp | 3 +- - llvm/tools/obj2yaml/dwarf2yaml.cpp | 8 +- - .../DWARF/DWARFDebugArangeSetTest.cpp | 98 +++++++++++++++++-- - 6 files changed, 113 insertions(+), 20 deletions(-) - -diff --git llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h -index 0681a2e33a5..3d5852ee151 100644 ---- llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h -+++ llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h -@@ -60,7 +60,8 @@ public: - DWARFDebugArangeSet() { clear(); } - - void clear(); -- Error extract(DWARFDataExtractor data, uint64_t *offset_ptr); -+ Error extract(DWARFDataExtractor data, uint64_t *offset_ptr, -+ function_ref WarningHandler); - void dump(raw_ostream &OS) const; - - uint64_t getCompileUnitDIEOffset() const { return HeaderData.CuOffset; } -diff --git llvm/lib/DebugInfo/DWARF/DWARFContext.cpp llvm/lib/DebugInfo/DWARF/DWARFContext.cpp -index bf621949777..3bcde8fafb1 100644 ---- llvm/lib/DebugInfo/DWARF/DWARFContext.cpp -+++ llvm/lib/DebugInfo/DWARF/DWARFContext.cpp -@@ -502,7 +502,8 @@ void DWARFContext::dump( - 0); - DWARFDebugArangeSet set; - while (arangesData.isValidOffset(offset)) { -- if (Error E = set.extract(arangesData, &offset)) { -+ if (Error E = -+ set.extract(arangesData, &offset, DumpOpts.WarningHandler)) { - RecoverableErrorHandler(std::move(E)); - break; - } -diff --git llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp -index 608fc0388af..381dd476cd5 100644 ---- llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp -+++ llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp -@@ -32,7 +32,8 @@ void DWARFDebugArangeSet::clear() { - } - - Error DWARFDebugArangeSet::extract(DWARFDataExtractor data, -- uint64_t *offset_ptr) { -+ uint64_t *offset_ptr, -+ function_ref WarningHandler) { - assert(data.isValidOffset(*offset_ptr)); - ArangeDescriptors.clear(); - Offset = *offset_ptr; -@@ -132,19 +133,20 @@ Error DWARFDebugArangeSet::extract(DWARFDataExtractor data, - - uint64_t end_offset = Offset + full_length; - while (*offset_ptr < end_offset) { -+ uint64_t EntryOffset = *offset_ptr; - arangeDescriptor.Address = data.getUnsigned(offset_ptr, HeaderData.AddrSize); - arangeDescriptor.Length = data.getUnsigned(offset_ptr, HeaderData.AddrSize); - -- if (arangeDescriptor.Length == 0) { -- // Each set of tuples is terminated by a 0 for the address and 0 -- // for the length. -- if (arangeDescriptor.Address == 0 && *offset_ptr == end_offset) -+ // Each set of tuples is terminated by a 0 for the address and 0 -+ // for the length. -+ if (arangeDescriptor.Length == 0 && arangeDescriptor.Address == 0) { -+ if (*offset_ptr == end_offset) - return ErrorSuccess(); -- return createStringError( -+ WarningHandler(createStringError( - errc::invalid_argument, - "address range table at offset 0x%" PRIx64 -- " has an invalid tuple (length = 0) at offset 0x%" PRIx64, -- Offset, *offset_ptr - tuple_size); -+ " has a premature terminator entry at offset 0x%" PRIx64, -+ Offset, EntryOffset)); - } - - ArangeDescriptors.push_back(arangeDescriptor); -diff --git llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp -index e8ed6307505..e0db469752c 100644 ---- llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp -+++ llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp -@@ -28,7 +28,8 @@ void DWARFDebugAranges::extract( - DWARFDebugArangeSet Set; - - while (DebugArangesData.isValidOffset(Offset)) { -- if (Error E = Set.extract(DebugArangesData, &Offset)) { -+ if (Error E = -+ Set.extract(DebugArangesData, &Offset, RecoverableErrorHandler)) { - RecoverableErrorHandler(std::move(E)); - return; - } -diff --git llvm/tools/obj2yaml/dwarf2yaml.cpp llvm/tools/obj2yaml/dwarf2yaml.cpp -index a318a547e32..14e4450201c 100644 ---- llvm/tools/obj2yaml/dwarf2yaml.cpp -+++ llvm/tools/obj2yaml/dwarf2yaml.cpp -@@ -64,9 +64,15 @@ Error dumpDebugARanges(DWARFContext &DCtx, DWARFYAML::Data &Y) { - DCtx.isLittleEndian(), 0); - uint64_t Offset = 0; - DWARFDebugArangeSet Set; -+ std::vector DebugAranges; -+ -+ // We ignore any errors that don't prevent parsing the section, since we can -+ // still represent such sections. These errors are recorded via the -+ // WarningHandler parameter of Set.extract(). -+ auto DiscardError = [](Error Err) { consumeError(std::move(Err)); }; - - while (ArangesData.isValidOffset(Offset)) { -- if (Error E = Set.extract(ArangesData, &Offset)) -+ if (Error E = Set.extract(ArangesData, &Offset, DiscardError)) - return E; - DWARFYAML::ARange Range; - Range.Format = Set.getHeader().Format; -diff --git llvm/unittests/DebugInfo/DWARF/DWARFDebugArangeSetTest.cpp llvm/unittests/DebugInfo/DWARF/DWARFDebugArangeSetTest.cpp -index 4ec9c5d1c0b..face8ec024f 100644 ---- llvm/unittests/DebugInfo/DWARF/DWARFDebugArangeSetTest.cpp -+++ llvm/unittests/DebugInfo/DWARF/DWARFDebugArangeSetTest.cpp -@@ -7,12 +7,23 @@ - //===----------------------------------------------------------------------===// - - #include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h" -+#include "llvm/Testing/Support/Error.h" - #include "gtest/gtest.h" - - using namespace llvm; - - namespace { - -+struct WarningHandler { -+ ~WarningHandler() { EXPECT_THAT_ERROR(std::move(Err), Succeeded()); } -+ -+ void operator()(Error E) { Err = joinErrors(std::move(Err), std::move(E)); } -+ -+ Error getWarning() { return std::move(Err); } -+ -+ Error Err = Error::success(); -+}; -+ - template - void ExpectExtractError(const char (&SecDataRaw)[SecSize], - const char *ErrorMessage) { -@@ -21,7 +32,8 @@ void ExpectExtractError(const char (&SecDataRaw)[SecSize], - /* AddressSize = */ 4); - DWARFDebugArangeSet Set; - uint64_t Offset = 0; -- Error E = Set.extract(Extractor, &Offset); -+ WarningHandler Warnings; -+ Error E = Set.extract(Extractor, &Offset, Warnings); - ASSERT_TRUE(E.operator bool()); - EXPECT_STREQ(ErrorMessage, toString(std::move(E)).c_str()); - } -@@ -166,9 +178,9 @@ TEST(DWARFDebugArangeSet, UnevenLength) { - "of the tuple size"); - } - --TEST(DWARFDebugArangeSet, ZeroLengthEntry) { -+TEST(DWARFDebugArangeSet, ZeroAddressEntry) { - static const char DebugArangesSecRaw[] = -- "\x24\x00\x00\x00" // Length -+ "\x1c\x00\x00\x00" // Length - "\x02\x00" // Version - "\x00\x00\x00\x00" // Debug Info Offset - "\x04" // Address Size -@@ -176,14 +188,84 @@ TEST(DWARFDebugArangeSet, ZeroLengthEntry) { - "\x00\x00\x00\x00" // Padding - "\x00\x00\x00\x00" // Entry1: Address - "\x01\x00\x00\x00" // Length -+ "\x00\x00\x00\x00" // Termination tuple -+ "\x00\x00\x00\x00"; -+ DWARFDataExtractor Extractor( -+ StringRef(DebugArangesSecRaw, sizeof(DebugArangesSecRaw) - 1), -+ /*IsLittleEndian=*/true, -+ /*AddressSize=*/4); -+ DWARFDebugArangeSet Set; -+ uint64_t Offset = 0; -+ ASSERT_THAT_ERROR(Set.extract(Extractor, &Offset, WarningHandler()), -+ Succeeded()); -+ auto Range = Set.descriptors(); -+ auto Iter = Range.begin(); -+ ASSERT_EQ(std::distance(Iter, Range.end()), 1u); -+ EXPECT_EQ(Iter->Address, 0u); -+ EXPECT_EQ(Iter->Length, 1u); -+} -+ -+TEST(DWARFDebugArangeSet, ZeroLengthEntry) { -+ static const char DebugArangesSecRaw[] = -+ "\x1c\x00\x00\x00" // Length -+ "\x02\x00" // Version -+ "\x00\x00\x00\x00" // Debug Info Offset -+ "\x04" // Address Size -+ "\x00" // Segment Selector Size -+ "\x00\x00\x00\x00" // Padding -+ "\x01\x00\x00\x00" // Entry1: Address -+ "\x00\x00\x00\x00" // Length -+ "\x00\x00\x00\x00" // Termination tuple -+ "\x00\x00\x00\x00"; -+ DWARFDataExtractor Extractor( -+ StringRef(DebugArangesSecRaw, sizeof(DebugArangesSecRaw) - 1), -+ /*IsLittleEndian=*/true, -+ /*AddressSize=*/4); -+ DWARFDebugArangeSet Set; -+ uint64_t Offset = 0; -+ ASSERT_THAT_ERROR(Set.extract(Extractor, &Offset, WarningHandler()), -+ Succeeded()); -+ auto Range = Set.descriptors(); -+ auto Iter = Range.begin(); -+ ASSERT_EQ(std::distance(Iter, Range.end()), 1u); -+ EXPECT_EQ(Iter->Address, 1u); -+ EXPECT_EQ(Iter->Length, 0u); -+} -+ -+TEST(DWARFDebugArangesSet, PrematureTerminator) { -+ static const char DebugArangesSecRaw[] = -+ "\x24\x00\x00\x00" // Length -+ "\x02\x00" // Version -+ "\x00\x00\x00\x00" // Debug Info Offset -+ "\x04" // Address Size -+ "\x00" // Segment Selector Size -+ "\x00\x00\x00\x00" // Padding -+ "\x00\x00\x00\x00" // Entry1: Premature -+ "\x00\x00\x00\x00" // terminator - "\x01\x00\x00\x00" // Entry2: Address -- "\x00\x00\x00\x00" // Length (invalid) -+ "\x01\x00\x00\x00" // Length - "\x00\x00\x00\x00" // Termination tuple - "\x00\x00\x00\x00"; -- ExpectExtractError( -- DebugArangesSecRaw, -- "address range table at offset 0x0 has an invalid tuple (length = 0) " -- "at offset 0x18"); -+ DWARFDataExtractor Extractor( -+ StringRef(DebugArangesSecRaw, sizeof(DebugArangesSecRaw) - 1), -+ /*IsLittleEndian=*/true, -+ /*AddressSize=*/4); -+ DWARFDebugArangeSet Set; -+ uint64_t Offset = 0; -+ WarningHandler Warnings; -+ ASSERT_THAT_ERROR(Set.extract(Extractor, &Offset, Warnings), Succeeded()); -+ auto Range = Set.descriptors(); -+ auto Iter = Range.begin(); -+ ASSERT_EQ(std::distance(Iter, Range.end()), 2u); -+ EXPECT_EQ(Iter->Address, 0u); -+ EXPECT_EQ(Iter->Length, 0u); -+ ++Iter; -+ EXPECT_EQ(Iter->Address, 1u); -+ EXPECT_EQ(Iter->Length, 1u); -+ EXPECT_THAT_ERROR( -+ Warnings.getWarning(), -+ FailedWithMessage("address range table at offset 0x0 has a premature " -+ "terminator entry at offset 0x10")); - } - - } // end anonymous namespace --- -2.28.0 - diff --git a/deps/patches/llvm-11-D90722-rtdyld-absolute-relocs.patch b/deps/patches/llvm-11-D90722-rtdyld-absolute-relocs.patch deleted file mode 100644 index 29cdb07331335..0000000000000 --- a/deps/patches/llvm-11-D90722-rtdyld-absolute-relocs.patch +++ /dev/null @@ -1,81 +0,0 @@ -From 516f0d1a0ca26ad361bf84ea9e321b247ac7924e Mon Sep 17 00:00:00 2001 -From: Valentin Churavy -Date: Fri, 6 Nov 2020 14:08:30 -0500 -Subject: [PATCH] [RTDYLD] support absolute relocations where needed - -These appear in some sections, such as DWARF tables, since -RuntimeDyldELF explicitly maps to this as a sentinel value: -https://github.com/llvm/llvm-project/blob/29d1fba7b5335d969e3e5daa84b7a25cd1fa75ef/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp#L1199 - -That could then be a source of problems if it tried to examine these -sections (for example, with either setProcessAllSections(true) or ORCv2 on i686). - -Replaces https://reviews.llvm.org/D89241 - -Reviewed By: lhames, vchuravy - -Differential Revision: https://reviews.llvm.org/D90722 ---- - .../RuntimeDyld/RuntimeDyld.cpp | 19 +++++++++++++------ - 1 file changed, 13 insertions(+), 6 deletions(-) - -diff --git llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp -index 7e9b0690cce..04f541b5955 100644 ---- llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp -+++ llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp -@@ -308,7 +308,9 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) { - << " SID: " << SectionID - << " Offset: " << format("%p", (uintptr_t)Addr) - << " flags: " << *FlagsOrErr << "\n"); -- GlobalSymbolTable[Name] = SymbolTableEntry(SectionID, Addr, *JITSymFlags); -+ if (!Name.empty()) // Skip absolute symbol relocations. -+ GlobalSymbolTable[Name] = -+ SymbolTableEntry(SectionID, Addr, *JITSymFlags); - } else if (SymType == object::SymbolRef::ST_Function || - SymType == object::SymbolRef::ST_Data || - SymType == object::SymbolRef::ST_Unknown || -@@ -340,8 +342,9 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) { - << " SID: " << SectionID - << " Offset: " << format("%p", (uintptr_t)SectOffset) - << " flags: " << *FlagsOrErr << "\n"); -- GlobalSymbolTable[Name] = -- SymbolTableEntry(SectionID, SectOffset, *JITSymFlags); -+ if (!Name.empty()) // Skip absolute symbol relocations -+ GlobalSymbolTable[Name] = -+ SymbolTableEntry(SectionID, SectOffset, *JITSymFlags); - } - } - -@@ -769,8 +772,9 @@ Error RuntimeDyldImpl::emitCommonSymbols(const ObjectFile &Obj, - - LLVM_DEBUG(dbgs() << "Allocating common symbol " << Name << " address " - << format("%p", Addr) << "\n"); -- GlobalSymbolTable[Name] = -- SymbolTableEntry(SectionID, Offset, std::move(*JITSymFlags)); -+ if (!Name.empty()) // Skip absolute symbol relocations. -+ GlobalSymbolTable[Name] = -+ SymbolTableEntry(SectionID, Offset, std::move(*JITSymFlags)); - Offset += Size; - Addr += Size; - } -@@ -930,6 +934,8 @@ void RuntimeDyldImpl::addRelocationForSymbol(const RelocationEntry &RE, - if (Loc == GlobalSymbolTable.end()) { - ExternalSymbolRelocations[SymbolName].push_back(RE); - } else { -+ assert(!SymbolName.empty() && -+ "Empty symbol should not be in GlobalSymbolTable"); - // Copy the RE since we want to modify its addend. - RelocationEntry RECopy = RE; - const auto &SymInfo = Loc->second; -@@ -1234,7 +1240,8 @@ void RuntimeDyldImpl::finalizeAsync( - - for (auto &RelocKV : SharedThis->ExternalSymbolRelocations) { - StringRef Name = RelocKV.first(); -- assert(!Name.empty() && "Symbol has no name?"); -+ if (Name.empty()) // Skip absolute symbol relocations. -+ continue; - assert(!SharedThis->GlobalSymbolTable.count(Name) && - "Name already processed. RuntimeDyld instances can not be re-used " - "when finalizing with finalizeAsync."); --- -2.29.2 diff --git a/deps/patches/llvm-11-D92906-ppc-setjmp.patch b/deps/patches/llvm-11-D92906-ppc-setjmp.patch deleted file mode 100644 index b1e50cb897d8c..0000000000000 --- a/deps/patches/llvm-11-D92906-ppc-setjmp.patch +++ /dev/null @@ -1,226 +0,0 @@ -From 15d31d58aa6a7b9dc8e623b6da2eff4f0553dc62 Mon Sep 17 00:00:00 2001 -From: Valentin Churavy -Date: Wed, 9 Dec 2020 10:09:46 -0500 -Subject: [PATCH] [PowerPC] Restore stack ptr from frame ptr with setjmp - -If a function happens to: -- call `setjmp` -- do a 16-byte stack allocation -- call a function that sets up a stack frame and `longjmp`'s back - -The stack pointer that is restores by `setjmp` will no longer point to a valid back chain. According to the ABI, stack accesses in such a function are to be frame pointer based - so it is an error (quite obviously) to restore the stack from the back chain. -We already restore the stack from the frame pointer when there are calls to `fast_cc` functions. We just need to also do that when there are calls to `setjmp`. This patch simply does that. - -This was pointed out by the Julia team. - -Differential Revision: https://reviews.llvm.org/D92906 ---- - llvm/lib/Target/PowerPC/PPCFrameLowering.cpp | 15 +- - .../PowerPC/stack-restore-with-setjmp.ll | 156 ++++++++++++++++++ - 2 files changed, 164 insertions(+), 7 deletions(-) - create mode 100644 llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll - -diff --git llvm/lib/Target/PowerPC/PPCFrameLowering.cpp llvm/lib/Target/PowerPC/PPCFrameLowering.cpp -index 7df2f6dc9252..b93322c15534 100644 ---- llvm/lib/Target/PowerPC/PPCFrameLowering.cpp -+++ llvm/lib/Target/PowerPC/PPCFrameLowering.cpp -@@ -375,9 +375,10 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { - return false; - - return MF.getTarget().Options.DisableFramePointerElim(MF) || -- MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() || -- (MF.getTarget().Options.GuaranteedTailCallOpt && -- MF.getInfo()->hasFastCall()); -+ MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() || -+ MF.exposesReturnsTwice() || -+ (MF.getTarget().Options.GuaranteedTailCallOpt && -+ MF.getInfo()->hasFastCall()); - } - - void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { -@@ -584,8 +585,8 @@ bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const { - // Frame pointers and base pointers complicate matters so don't do anything - // if we have them. For example having a frame pointer will sometimes require - // a copy of r1 into r31 and that makes keeping track of updates to r1 more -- // difficult. -- if (hasFP(MF) || RegInfo->hasBasePointer(MF)) -+ // difficult. Similar situation exists with setjmp. -+ if (hasFP(MF) || RegInfo->hasBasePointer(MF) || MF.exposesReturnsTwice()) - return false; - - // Calls to fast_cc functions use different rules for passing parameters on -@@ -1646,8 +1647,8 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, - // If this function contained a fastcc call and GuaranteedTailCallOpt is - // enabled (=> hasFastCall()==true) the fastcc call might contain a tail - // call which invalidates the stack pointer value in SP(0). So we use the -- // value of R31 in this case. -- if (FI->hasFastCall()) { -+ // value of R31 in this case. Similar situation exists with setjmp. -+ if (FI->hasFastCall() || MF.exposesReturnsTwice()) { - assert(HasFP && "Expecting a valid frame pointer."); - if (!HasRedZone) - RBReg = FPReg; -diff --git llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll -new file mode 100644 -index 000000000000..9928a111734b ---- /dev/null -+++ llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll -@@ -0,0 +1,156 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc < %s -mtriple=powerpc64le-- -verify-machineinstrs | FileCheck %s -+; RUN: llc < %s -mtriple=powerpc64-- -verify-machineinstrs | FileCheck %s \ -+; RUN: --check-prefix=BE -+%struct.__jmp_buf_tag = type { [64 x i64], i32, %struct.__sigset_t, [8 x i8] } -+%struct.__sigset_t = type { [16 x i64] } -+ -+@.str = private unnamed_addr constant [33 x i8] c"Successfully returned from main\0A\00", align 1 -+ -+; Function Attrs: nounwind -+define dso_local signext i32 @main(i32 signext %argc, i8** nocapture readnone %argv) local_unnamed_addr #0 { -+; CHECK-LABEL: main: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: mfocrf 12, 32 -+; CHECK-NEXT: mflr 0 -+; CHECK-NEXT: std 31, -8(1) -+; CHECK-NEXT: std 0, 16(1) -+; CHECK-NEXT: stw 12, 8(1) -+; CHECK-NEXT: stdu 1, -784(1) -+; CHECK-NEXT: # kill: def $r3 killed $r3 killed $x3 -+; CHECK-NEXT: cmpwi 2, 3, 2 -+; CHECK-NEXT: mr 31, 1 -+; CHECK-NEXT: li 3, 0 -+; CHECK-NEXT: blt 2, .LBB0_3 -+; CHECK-NEXT: # %bb.1: # %if.end -+; CHECK-NEXT: addi 3, 31, 112 -+; CHECK-NEXT: bl _setjmp -+; CHECK-NEXT: nop -+; CHECK-NEXT: crmove 20, 10 -+; CHECK-NEXT: # kill: def $r3 killed $r3 killed $x3 -+; CHECK-NEXT: cmpwi 3, 0 -+; CHECK-NEXT: crorc 20, 10, 2 -+; CHECK-NEXT: crmove 21, 2 -+; CHECK-NEXT: bc 4, 20, .LBB0_4 -+; CHECK-NEXT: # %bb.2: # %if.end5 -+; CHECK-NEXT: addis 3, 2, .L.str@toc@ha -+; CHECK-NEXT: addi 3, 3, .L.str@toc@l -+; CHECK-NEXT: bl printf -+; CHECK-NEXT: nop -+; CHECK-NEXT: # kill: def $r3 killed $r3 killed $x3 -+; CHECK-NEXT: .LBB0_3: # %return -+; CHECK-NEXT: extsw 3, 3 -+; CHECK-NEXT: addi 1, 31, 784 -+; CHECK-NEXT: ld 0, 16(1) -+; CHECK-NEXT: lwz 12, 8(1) -+; CHECK-NEXT: ld 31, -8(1) -+; CHECK-NEXT: mtocrf 32, 12 -+; CHECK-NEXT: mtlr 0 -+; CHECK-NEXT: blr -+; CHECK-NEXT: .LBB0_4: # %if.then3 -+; CHECK-NEXT: ld 4, 0(1) -+; CHECK-NEXT: stdu 4, -16(1) -+; CHECK-NEXT: addi 3, 1, 96 -+; CHECK-NEXT: li 4, -1 -+; CHECK-NEXT: stb 4, 0(3) -+; CHECK-NEXT: addi 4, 31, 112 -+; CHECK-NEXT: bl test -+; CHECK-NEXT: nop -+; -+; BE-LABEL: main: -+; BE: # %bb.0: # %entry -+; BE-NEXT: mflr 0 -+; BE-NEXT: std 31, -8(1) -+; BE-NEXT: std 0, 16(1) -+; BE-NEXT: mfcr 12 -+; BE-NEXT: stw 12, 8(1) -+; BE-NEXT: stdu 1, -800(1) -+; BE-NEXT: li 4, 0 -+; BE-NEXT: # kill: def $r3 killed $r3 killed $x3 -+; BE-NEXT: cmpwi 2, 3, 2 -+; BE-NEXT: mr 3, 4 -+; BE-NEXT: mr 31, 1 -+; BE-NEXT: blt 2, .LBB0_3 -+; BE-NEXT: # %bb.1: # %if.end -+; BE-NEXT: addi 3, 31, 128 -+; BE-NEXT: bl _setjmp -+; BE-NEXT: nop -+; BE-NEXT: crmove 20, 10 -+; BE-NEXT: # kill: def $r3 killed $r3 killed $x3 -+; BE-NEXT: cmpwi 3, 0 -+; BE-NEXT: crorc 20, 10, 2 -+; BE-NEXT: crmove 21, 2 -+; BE-NEXT: bc 4, 20, .LBB0_4 -+; BE-NEXT: # %bb.2: # %if.end5 -+; BE-NEXT: addis 3, 2, .L.str@toc@ha -+; BE-NEXT: addi 3, 3, .L.str@toc@l -+; BE-NEXT: bl printf -+; BE-NEXT: nop -+; BE-NEXT: # kill: def $r3 killed $r3 killed $x3 -+; BE-NEXT: .LBB0_3: # %return -+; BE-NEXT: extsw 3, 3 -+; BE-NEXT: addi 1, 31, 800 -+; BE-NEXT: ld 0, 16(1) -+; BE-NEXT: lwz 12, 8(1) -+; BE-NEXT: ld 31, -8(1) -+; BE-NEXT: mtlr 0 -+; BE-NEXT: mtcrf 32, 12 # cr2 -+; BE-NEXT: blr -+; BE-NEXT: .LBB0_4: # %if.then3 -+; BE-NEXT: ld 4, 0(1) -+; BE-NEXT: stdu 4, -16(1) -+; BE-NEXT: addi 3, 1, 112 -+; BE-NEXT: li 4, -1 -+; BE-NEXT: stb 4, 0(3) -+; BE-NEXT: addi 4, 31, 128 -+; BE-NEXT: bl test -+; BE-NEXT: nop -+entry: -+ %env_buffer = alloca [1 x %struct.__jmp_buf_tag], align 16 -+ %cmp = icmp slt i32 %argc, 2 -+ br i1 %cmp, label %return, label %if.end -+ -+if.end: ; preds = %entry -+ %0 = bitcast [1 x %struct.__jmp_buf_tag]* %env_buffer to i8* -+ call void @llvm.lifetime.start.p0i8(i64 656, i8* nonnull %0) #5 -+ %arraydecay = getelementptr inbounds [1 x %struct.__jmp_buf_tag], [1 x %struct.__jmp_buf_tag]* %env_buffer, i64 0, i64 0 -+ %call = call signext i32 @_setjmp(%struct.__jmp_buf_tag* nonnull %arraydecay) #6 -+ %cmp1 = icmp ne i32 %argc, 2 -+ %cmp2 = icmp eq i32 %call, 0 -+ %or.cond = and i1 %cmp1, %cmp2 -+ br i1 %or.cond, label %if.then3, label %if.end5 -+ -+if.then3: ; preds = %if.end -+ %1 = alloca [8 x i8], align 16 -+ %.sub = getelementptr inbounds [8 x i8], [8 x i8]* %1, i64 0, i64 0 -+ store i8 -1, i8* %.sub, align 16 -+ call void @test(i8* nonnull %.sub, %struct.__jmp_buf_tag* nonnull %arraydecay) #7 -+ unreachable -+ -+if.end5: ; preds = %if.end -+ %call6 = call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([33 x i8], [33 x i8]* @.str, i64 0, i64 0)) -+ call void @llvm.lifetime.end.p0i8(i64 656, i8* nonnull %0) #5 -+ br label %return -+ -+return: ; preds = %entry, %if.end5 -+ %retval.0 = phi i32 [ %call6, %if.end5 ], [ 0, %entry ] -+ ret i32 %retval.0 -+} -+ -+; Function Attrs: argmemonly nofree nosync nounwind willreturn -+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) -+ -+; Function Attrs: nounwind returns_twice -+declare signext i32 @_setjmp(%struct.__jmp_buf_tag*) local_unnamed_addr -+ -+; Function Attrs: noreturn -+declare void @test(i8*, %struct.__jmp_buf_tag*) local_unnamed_addr -+ -+; Function Attrs: nofree nounwind -+declare noundef signext i32 @printf(i8* nocapture noundef readonly, ...) local_unnamed_addr -+ -+; Function Attrs: argmemonly nofree nosync nounwind willreturn -+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) -+ -+attributes #0 = { nounwind } -+attributes #6 = { nounwind returns_twice } --- -2.29.2 diff --git a/deps/patches/llvm-11-D93092-ppc-knownbits.patch b/deps/patches/llvm-11-D93092-ppc-knownbits.patch deleted file mode 100644 index 47e6f743ddefd..0000000000000 --- a/deps/patches/llvm-11-D93092-ppc-knownbits.patch +++ /dev/null @@ -1,106 +0,0 @@ -From b5a0e6ca2b0c6367b082dd9a77b02c26607c8d7d Mon Sep 17 00:00:00 2001 -From: Kai Luo -Date: Tue, 29 Dec 2020 12:11:55 +0000 -Subject: [PATCH 2/4] [PowerPC] Remaining KnownBits should be constant when - performing non-sign comparison - -In `PPCTargetLowering::DAGCombineTruncBoolExt`, when checking if it's correct to perform the transformation for non-sign comparison, as the comment says -``` - // This is neither a signed nor an unsigned comparison, just make sure - // that the high bits are equal. -``` -Origin check -``` - if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One) - return SDValue(); -``` -is not strong enough. For example, -``` -Op1Known = 111x000x; -Op2Known = 111x000x; -``` -Bit 4, besides bit 0, is still unknown and affects the final result. - -This patch fixes https://bugs.llvm.org/show_bug.cgi?id=48388. - -Reviewed By: nemanjai, #powerpc - -Differential Revision: https://reviews.llvm.org/D93092 ---- - llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 10 +++-- - llvm/test/CodeGen/PowerPC/pr48388.ll | 41 +++++++++++++++++++++ - 2 files changed, 47 insertions(+), 4 deletions(-) - create mode 100644 llvm/test/CodeGen/PowerPC/pr48388.ll - -diff --git llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm/lib/Target/PowerPC/PPCISelLowering.cpp -index f54f1673526d..38dbff4197b9 100644 ---- llvm/lib/Target/PowerPC/PPCISelLowering.cpp -+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp -@@ -13287,11 +13287,13 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, - KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1)); - - // We don't really care about what is known about the first bit (if -- // anything), so clear it in all masks prior to comparing them. -- Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0); -- Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0); -+ // anything), so pretend that it is known zero for both to ensure they can -+ // be compared as constants. -+ Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0); -+ Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0); - -- if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One) -+ if (!Op1Known.isConstant() || !Op2Known.isConstant() || -+ Op1Known.getConstant() != Op2Known.getConstant()) - return SDValue(); - } - } -diff --git llvm/test/CodeGen/PowerPC/pr48388.ll llvm/test/CodeGen/PowerPC/pr48388.ll -new file mode 100644 -index 000000000000..822e5d852317 ---- /dev/null -+++ llvm/test/CodeGen/PowerPC/pr48388.ll -@@ -0,0 +1,41 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le -ppc-asm-full-reg-names \ -+; RUN: < %s | FileCheck %s -+ -+define i64 @julia_div_i64(i64 %0, i64 %1) local_unnamed_addr #0 { -+; CHECK-LABEL: julia_div_i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: divd r6, r3, r4 -+; CHECK-NEXT: lis r5, -1592 -+; CHECK-NEXT: ori r7, r5, 21321 -+; CHECK-NEXT: ori r5, r5, 65519 -+; CHECK-NEXT: cmpdi r3, 0 -+; CHECK-NEXT: rldic r7, r7, 4, 17 -+; CHECK-NEXT: rldic r5, r5, 4, 17 -+; CHECK-NEXT: iselgt r9, r5, r7 -+; CHECK-NEXT: cmpdi r4, 0 -+; CHECK-NEXT: mulld r8, r6, r4 -+; CHECK-NEXT: iselgt r4, r5, r7 -+; CHECK-NEXT: xor r4, r9, r4 -+; CHECK-NEXT: cntlzd r4, r4 -+; CHECK-NEXT: rldicl r4, r4, 58, 63 -+; CHECK-NEXT: xor r3, r8, r3 -+; CHECK-NEXT: addic r5, r3, -1 -+; CHECK-NEXT: subfe r3, r5, r3 -+; CHECK-NEXT: and r3, r4, r3 -+; CHECK-NEXT: add r3, r6, r3 -+; CHECK-NEXT: blr -+entry: -+ %2 = sdiv i64 %0, %1 -+ %3 = icmp sgt i64 %0, 0 -+ %4 = icmp sgt i64 %1, 0 -+ %5 = select i1 %3, i64 140735820070640, i64 140735819363472 -+ %6 = select i1 %4, i64 140735820070640, i64 140735819363472 -+ %7 = icmp eq i64 %5, %6 -+ %8 = mul i64 %2, %1 -+ %9 = icmp ne i64 %8, %0 -+ %10 = and i1 %7, %9 -+ %11 = zext i1 %10 to i64 -+ %12 = add i64 %2, %11 -+ ret i64 %12 -+} --- -2.30.0 - diff --git a/deps/patches/llvm-11-D93154-globalisel-as.patch b/deps/patches/llvm-11-D93154-globalisel-as.patch deleted file mode 100644 index 7b5575e05c00d..0000000000000 --- a/deps/patches/llvm-11-D93154-globalisel-as.patch +++ /dev/null @@ -1,88 +0,0 @@ -From af809ec100ef60cdeeef776e54c123e4fc8f1071 Mon Sep 17 00:00:00 2001 -From: Jameson Nash -Date: Tue, 15 Dec 2020 10:04:08 -0500 -Subject: [PATCH] GlobalISel: remove assert that memcpy Src and Dst addrspace - must be identical - -The LangRef does not require these arguments to have the same type. - -Differential Revision: https://reviews.llvm.org/D93154 ---- - .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 27 +++++++++++-------- - 1 file changed, 16 insertions(+), 11 deletions(-) - -diff --git llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp -index 79f74a47d83c..7bd6f8f52c8b 100644 ---- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp -+++ llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp -@@ -1240,7 +1240,6 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst, - // of that value loaded. This can result in a sequence of loads and stores - // mixed types, depending on what the target specifies as good types to use. - unsigned CurrOffset = 0; -- LLT PtrTy = MRI.getType(Src); - unsigned Size = KnownLen; - for (auto CopyTy : MemOps) { - // Issuing an unaligned load / store pair that overlaps with the previous -@@ -1258,15 +1257,20 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst, - Register LoadPtr = Src; - Register Offset; - if (CurrOffset != 0) { -- Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset) -- .getReg(0); -- LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0); -+ LLT LoadTy = MRI.getType(Src); -+ Offset = -+ MIB.buildConstant(LLT::scalar(LoadTy.getSizeInBits()), CurrOffset) -+ .getReg(0); -+ LoadPtr = MIB.buildPtrAdd(LoadTy, Src, Offset).getReg(0); - } - auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO); - - // Create the store. -- Register StorePtr = -- CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); -+ Register StorePtr = Dst; -+ if (CurrOffset != 0) { -+ LLT StoreTy = MRI.getType(Dst); -+ StorePtr = MIB.buildPtrAdd(StoreTy, Dst, Offset).getReg(0); -+ } - MIB.buildStore(LdVal, StorePtr, *StoreMMO); - CurrOffset += CopyTy.getSizeInBytes(); - Size -= CopyTy.getSizeInBytes(); -@@ -1343,7 +1347,6 @@ bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst, - // Apart from that, this loop is pretty much doing the same thing as the - // memcpy codegen function. - unsigned CurrOffset = 0; -- LLT PtrTy = MRI.getType(Src); - SmallVector LoadVals; - for (auto CopyTy : MemOps) { - // Construct MMO for the load. -@@ -1353,9 +1356,10 @@ bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst, - // Create the load. - Register LoadPtr = Src; - if (CurrOffset != 0) { -+ LLT LoadTy = MRI.getType(Src); - auto Offset = -- MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); -- LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0); -+ MIB.buildConstant(LLT::scalar(LoadTy.getSizeInBits()), CurrOffset); -+ LoadPtr = MIB.buildPtrAdd(LoadTy, Src, Offset).getReg(0); - } - LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0)); - CurrOffset += CopyTy.getSizeInBytes(); -@@ -1370,9 +1374,10 @@ bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst, - - Register StorePtr = Dst; - if (CurrOffset != 0) { -+ LLT StoreTy = MRI.getType(Dst); - auto Offset = -- MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); -- StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); -+ MIB.buildConstant(LLT::scalar(StoreTy.getSizeInBits()), CurrOffset); -+ StorePtr = MIB.buildPtrAdd(StoreTy, Dst, Offset).getReg(0); - } - MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO); - CurrOffset += CopyTy.getSizeInBytes(); --- -2.29.2 - diff --git a/deps/patches/llvm-11-D94058-sext-atomic-ops.patch b/deps/patches/llvm-11-D94058-sext-atomic-ops.patch deleted file mode 100644 index 732ae2b2b143a..0000000000000 --- a/deps/patches/llvm-11-D94058-sext-atomic-ops.patch +++ /dev/null @@ -1,1201 +0,0 @@ -From b46706e1d5307d98a5a4895380f91380a0987ded Mon Sep 17 00:00:00 2001 -From: Nemanja Ivanovic -Date: Mon, 18 Jan 2021 21:19:11 -0600 -Subject: [PATCH] [PowerPC] Sign extend comparison operand for signed atomic - comparisons - -As of 8dacca943af8a53a23b1caf3142d10fb4a77b645, we sign extend the atomic loaded -operand for signed subword comparisons. However, the assumption that the other -operand is correctly sign extended doesn't always hold. This patch sign extends -the other operand if it needs to be sign extended. - -This is a second fix for https://bugs.llvm.org/show_bug.cgi?id=30451 - -Differential revision: https://reviews.llvm.org/D94058 ---- - llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 79 +++- - .../CodeGen/PowerPC/atomics-regression.ll | 440 ++++++++++-------- - llvm/test/CodeGen/PowerPC/sign-ext-atomics.ll | 105 +++++ - 3 files changed, 418 insertions(+), 206 deletions(-) - create mode 100644 llvm/test/CodeGen/PowerPC/sign-ext-atomics.ll - -diff --git llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm/lib/Target/PowerPC/PPCISelLowering.cpp -index f54f1673526d..867ef24ea53b 100644 ---- llvm/lib/Target/PowerPC/PPCISelLowering.cpp -+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp -@@ -11444,17 +11444,88 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, - return BB; - } - -+static bool isSignExtended(MachineInstr &MI, const PPCInstrInfo *TII) { -+ switch(MI.getOpcode()) { -+ default: -+ return false; -+ case PPC::COPY: -+ return TII->isSignExtended(MI); -+ case PPC::LHA: -+ case PPC::LHA8: -+ case PPC::LHAU: -+ case PPC::LHAU8: -+ case PPC::LHAUX: -+ case PPC::LHAUX8: -+ case PPC::LHAX: -+ case PPC::LHAX8: -+ case PPC::LWA: -+ case PPC::LWAUX: -+ case PPC::LWAX: -+ case PPC::LWAX_32: -+ case PPC::LWA_32: -+ case PPC::PLHA: -+ case PPC::PLHA8: -+ case PPC::PLHA8pc: -+ case PPC::PLHApc: -+ case PPC::PLWA: -+ case PPC::PLWA8: -+ case PPC::PLWA8pc: -+ case PPC::PLWApc: -+ case PPC::EXTSB: -+ case PPC::EXTSB8: -+ case PPC::EXTSB8_32_64: -+ case PPC::EXTSB8_rec: -+ case PPC::EXTSB_rec: -+ case PPC::EXTSH: -+ case PPC::EXTSH8: -+ case PPC::EXTSH8_32_64: -+ case PPC::EXTSH8_rec: -+ case PPC::EXTSH_rec: -+ case PPC::EXTSW: -+ case PPC::EXTSWSLI: -+ case PPC::EXTSWSLI_32_64: -+ case PPC::EXTSWSLI_32_64_rec: -+ case PPC::EXTSWSLI_rec: -+ case PPC::EXTSW_32: -+ case PPC::EXTSW_32_64: -+ case PPC::EXTSW_32_64_rec: -+ case PPC::EXTSW_rec: -+ case PPC::SRAW: -+ case PPC::SRAWI: -+ case PPC::SRAWI_rec: -+ case PPC::SRAW_rec: -+ return true; -+ } -+ return false; -+} -+ - MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary( - MachineInstr &MI, MachineBasicBlock *BB, - bool is8bit, // operation - unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const { -+ // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. -+ const PPCInstrInfo *TII = Subtarget.getInstrInfo(); -+ -+ // If this is a signed comparison and the value being compared is not known -+ // to be sign extended, sign extend it here. -+ DebugLoc dl = MI.getDebugLoc(); -+ MachineFunction *F = BB->getParent(); -+ MachineRegisterInfo &RegInfo = F->getRegInfo(); -+ Register incr = MI.getOperand(3).getReg(); -+ bool IsSignExtended = Register::isVirtualRegister(incr) && -+ isSignExtended(*RegInfo.getVRegDef(incr), TII); -+ -+ if (CmpOpcode == PPC::CMPW && !IsSignExtended) { -+ Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); -+ BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg) -+ .addReg(MI.getOperand(3).getReg()); -+ MI.getOperand(3).setReg(ValueReg); -+ } - // If we support part-word atomic mnemonics, just use them - if (Subtarget.hasPartwordAtomics()) - return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode, - CmpPred); - -- // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. -- const TargetInstrInfo *TII = Subtarget.getInstrInfo(); - // In 64 bit mode we have to use 64 bits for addresses, even though the - // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address - // registers without caring whether they're 32 or 64, but here we're -@@ -11464,14 +11535,11 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary( - unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; - - const BasicBlock *LLVM_BB = BB->getBasicBlock(); -- MachineFunction *F = BB->getParent(); - MachineFunction::iterator It = ++BB->getIterator(); - - Register dest = MI.getOperand(0).getReg(); - Register ptrA = MI.getOperand(1).getReg(); - Register ptrB = MI.getOperand(2).getReg(); -- Register incr = MI.getOperand(3).getReg(); -- DebugLoc dl = MI.getDebugLoc(); - - MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *loop2MBB = -@@ -11485,7 +11553,6 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary( - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - -- MachineRegisterInfo &RegInfo = F->getRegInfo(); - const TargetRegisterClass *RC = - is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; - const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; -diff --git llvm/test/CodeGen/PowerPC/atomics-regression.ll llvm/test/CodeGen/PowerPC/atomics-regression.ll -index ae79f82e1e06..3b7caeee91e4 100644 ---- llvm/test/CodeGen/PowerPC/atomics-regression.ll -+++ llvm/test/CodeGen/PowerPC/atomics-regression.ll -@@ -4352,16 +4352,17 @@ define i64 @test259(i64* %ptr, i64 %val) { - define i8 @test260(i8* %ptr, i8 %val) { - ; PPC64LE-LABEL: test260: - ; PPC64LE: # %bb.0: -+; PPC64LE-NEXT: extsb 5, 4 - ; PPC64LE-NEXT: .LBB260_1: --; PPC64LE-NEXT: lbarx 5, 0, 3 --; PPC64LE-NEXT: extsb 6, 5 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lbarx 4, 0, 3 -+; PPC64LE-NEXT: extsb 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: ble 0, .LBB260_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: stbcx. 4, 0, 3 -+; PPC64LE-NEXT: stbcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB260_1 - ; PPC64LE-NEXT: .LBB260_3: --; PPC64LE-NEXT: mr 3, 5 -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: blr - %ret = atomicrmw max i8* %ptr, i8 %val monotonic - ret i8 %ret -@@ -4370,16 +4371,17 @@ define i8 @test260(i8* %ptr, i8 %val) { - define i8 @test261(i8* %ptr, i8 %val) { - ; PPC64LE-LABEL: test261: - ; PPC64LE: # %bb.0: --; PPC64LE-NEXT: mr 5, 3 -+; PPC64LE-NEXT: extsb 5, 4 - ; PPC64LE-NEXT: .LBB261_1: --; PPC64LE-NEXT: lbarx 3, 0, 5 --; PPC64LE-NEXT: extsb 6, 3 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lbarx 4, 0, 3 -+; PPC64LE-NEXT: extsb 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: ble 0, .LBB261_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: stbcx. 4, 0, 5 -+; PPC64LE-NEXT: stbcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB261_1 - ; PPC64LE-NEXT: .LBB261_3: -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: blr - %ret = atomicrmw max i8* %ptr, i8 %val acquire -@@ -4389,17 +4391,18 @@ define i8 @test261(i8* %ptr, i8 %val) { - define i8 @test262(i8* %ptr, i8 %val) { - ; PPC64LE-LABEL: test262: - ; PPC64LE: # %bb.0: -+; PPC64LE-NEXT: extsb 5, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: .LBB262_1: --; PPC64LE-NEXT: lbarx 5, 0, 3 --; PPC64LE-NEXT: extsb 6, 5 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lbarx 4, 0, 3 -+; PPC64LE-NEXT: extsb 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: ble 0, .LBB262_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: stbcx. 4, 0, 3 -+; PPC64LE-NEXT: stbcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB262_1 - ; PPC64LE-NEXT: .LBB262_3: --; PPC64LE-NEXT: mr 3, 5 -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: blr - %ret = atomicrmw max i8* %ptr, i8 %val release - ret i8 %ret -@@ -4408,17 +4411,18 @@ define i8 @test262(i8* %ptr, i8 %val) { - define i8 @test263(i8* %ptr, i8 %val) { - ; PPC64LE-LABEL: test263: - ; PPC64LE: # %bb.0: -+; PPC64LE-NEXT: extsb 5, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: .LBB263_1: --; PPC64LE-NEXT: lbarx 5, 0, 3 --; PPC64LE-NEXT: extsb 6, 5 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lbarx 4, 0, 3 -+; PPC64LE-NEXT: extsb 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: ble 0, .LBB263_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: stbcx. 4, 0, 3 -+; PPC64LE-NEXT: stbcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB263_1 - ; PPC64LE-NEXT: .LBB263_3: --; PPC64LE-NEXT: mr 3, 5 -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: blr - %ret = atomicrmw max i8* %ptr, i8 %val acq_rel -@@ -4428,17 +4432,18 @@ define i8 @test263(i8* %ptr, i8 %val) { - define i8 @test264(i8* %ptr, i8 %val) { - ; PPC64LE-LABEL: test264: - ; PPC64LE: # %bb.0: -+; PPC64LE-NEXT: extsb 5, 4 - ; PPC64LE-NEXT: sync - ; PPC64LE-NEXT: .LBB264_1: --; PPC64LE-NEXT: lbarx 5, 0, 3 --; PPC64LE-NEXT: extsb 6, 5 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lbarx 4, 0, 3 -+; PPC64LE-NEXT: extsb 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: ble 0, .LBB264_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: stbcx. 4, 0, 3 -+; PPC64LE-NEXT: stbcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB264_1 - ; PPC64LE-NEXT: .LBB264_3: --; PPC64LE-NEXT: mr 3, 5 -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: blr - %ret = atomicrmw max i8* %ptr, i8 %val seq_cst -@@ -4448,16 +4453,17 @@ define i8 @test264(i8* %ptr, i8 %val) { - define i16 @test265(i16* %ptr, i16 %val) { - ; PPC64LE-LABEL: test265: - ; PPC64LE: # %bb.0: -+; PPC64LE-NEXT: extsh 5, 4 - ; PPC64LE-NEXT: .LBB265_1: --; PPC64LE-NEXT: lharx 5, 0, 3 --; PPC64LE-NEXT: extsh 6, 5 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lharx 4, 0, 3 -+; PPC64LE-NEXT: extsh 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: ble 0, .LBB265_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: sthcx. 4, 0, 3 -+; PPC64LE-NEXT: sthcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB265_1 - ; PPC64LE-NEXT: .LBB265_3: --; PPC64LE-NEXT: mr 3, 5 -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: blr - %ret = atomicrmw max i16* %ptr, i16 %val monotonic - ret i16 %ret -@@ -4466,16 +4472,17 @@ define i16 @test265(i16* %ptr, i16 %val) { - define i16 @test266(i16* %ptr, i16 %val) { - ; PPC64LE-LABEL: test266: - ; PPC64LE: # %bb.0: --; PPC64LE-NEXT: mr 5, 3 -+; PPC64LE-NEXT: extsh 5, 4 - ; PPC64LE-NEXT: .LBB266_1: --; PPC64LE-NEXT: lharx 3, 0, 5 --; PPC64LE-NEXT: extsh 6, 3 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lharx 4, 0, 3 -+; PPC64LE-NEXT: extsh 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: ble 0, .LBB266_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: sthcx. 4, 0, 5 -+; PPC64LE-NEXT: sthcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB266_1 - ; PPC64LE-NEXT: .LBB266_3: -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: blr - %ret = atomicrmw max i16* %ptr, i16 %val acquire -@@ -4485,17 +4492,18 @@ define i16 @test266(i16* %ptr, i16 %val) { - define i16 @test267(i16* %ptr, i16 %val) { - ; PPC64LE-LABEL: test267: - ; PPC64LE: # %bb.0: -+; PPC64LE-NEXT: extsh 5, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: .LBB267_1: --; PPC64LE-NEXT: lharx 5, 0, 3 --; PPC64LE-NEXT: extsh 6, 5 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lharx 4, 0, 3 -+; PPC64LE-NEXT: extsh 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: ble 0, .LBB267_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: sthcx. 4, 0, 3 -+; PPC64LE-NEXT: sthcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB267_1 - ; PPC64LE-NEXT: .LBB267_3: --; PPC64LE-NEXT: mr 3, 5 -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: blr - %ret = atomicrmw max i16* %ptr, i16 %val release - ret i16 %ret -@@ -4504,17 +4512,18 @@ define i16 @test267(i16* %ptr, i16 %val) { - define i16 @test268(i16* %ptr, i16 %val) { - ; PPC64LE-LABEL: test268: - ; PPC64LE: # %bb.0: -+; PPC64LE-NEXT: extsh 5, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: .LBB268_1: --; PPC64LE-NEXT: lharx 5, 0, 3 --; PPC64LE-NEXT: extsh 6, 5 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lharx 4, 0, 3 -+; PPC64LE-NEXT: extsh 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: ble 0, .LBB268_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: sthcx. 4, 0, 3 -+; PPC64LE-NEXT: sthcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB268_1 - ; PPC64LE-NEXT: .LBB268_3: --; PPC64LE-NEXT: mr 3, 5 -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: blr - %ret = atomicrmw max i16* %ptr, i16 %val acq_rel -@@ -4524,17 +4533,18 @@ define i16 @test268(i16* %ptr, i16 %val) { - define i16 @test269(i16* %ptr, i16 %val) { - ; PPC64LE-LABEL: test269: - ; PPC64LE: # %bb.0: -+; PPC64LE-NEXT: extsh 5, 4 - ; PPC64LE-NEXT: sync - ; PPC64LE-NEXT: .LBB269_1: --; PPC64LE-NEXT: lharx 5, 0, 3 --; PPC64LE-NEXT: extsh 6, 5 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lharx 4, 0, 3 -+; PPC64LE-NEXT: extsh 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: ble 0, .LBB269_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: sthcx. 4, 0, 3 -+; PPC64LE-NEXT: sthcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB269_1 - ; PPC64LE-NEXT: .LBB269_3: --; PPC64LE-NEXT: mr 3, 5 -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: blr - %ret = atomicrmw max i16* %ptr, i16 %val seq_cst -@@ -4726,16 +4736,17 @@ define i64 @test279(i64* %ptr, i64 %val) { - define i8 @test280(i8* %ptr, i8 %val) { - ; PPC64LE-LABEL: test280: - ; PPC64LE: # %bb.0: -+; PPC64LE-NEXT: extsb 5, 4 - ; PPC64LE-NEXT: .LBB280_1: --; PPC64LE-NEXT: lbarx 5, 0, 3 --; PPC64LE-NEXT: extsb 6, 5 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lbarx 4, 0, 3 -+; PPC64LE-NEXT: extsb 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: bge 0, .LBB280_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: stbcx. 4, 0, 3 -+; PPC64LE-NEXT: stbcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB280_1 - ; PPC64LE-NEXT: .LBB280_3: --; PPC64LE-NEXT: mr 3, 5 -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: blr - %ret = atomicrmw min i8* %ptr, i8 %val monotonic - ret i8 %ret -@@ -4744,16 +4755,17 @@ define i8 @test280(i8* %ptr, i8 %val) { - define i8 @test281(i8* %ptr, i8 %val) { - ; PPC64LE-LABEL: test281: - ; PPC64LE: # %bb.0: --; PPC64LE-NEXT: mr 5, 3 -+; PPC64LE-NEXT: extsb 5, 4 - ; PPC64LE-NEXT: .LBB281_1: --; PPC64LE-NEXT: lbarx 3, 0, 5 --; PPC64LE-NEXT: extsb 6, 3 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lbarx 4, 0, 3 -+; PPC64LE-NEXT: extsb 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: bge 0, .LBB281_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: stbcx. 4, 0, 5 -+; PPC64LE-NEXT: stbcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB281_1 - ; PPC64LE-NEXT: .LBB281_3: -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: blr - %ret = atomicrmw min i8* %ptr, i8 %val acquire -@@ -4763,17 +4775,18 @@ define i8 @test281(i8* %ptr, i8 %val) { - define i8 @test282(i8* %ptr, i8 %val) { - ; PPC64LE-LABEL: test282: - ; PPC64LE: # %bb.0: -+; PPC64LE-NEXT: extsb 5, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: .LBB282_1: --; PPC64LE-NEXT: lbarx 5, 0, 3 --; PPC64LE-NEXT: extsb 6, 5 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lbarx 4, 0, 3 -+; PPC64LE-NEXT: extsb 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: bge 0, .LBB282_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: stbcx. 4, 0, 3 -+; PPC64LE-NEXT: stbcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB282_1 - ; PPC64LE-NEXT: .LBB282_3: --; PPC64LE-NEXT: mr 3, 5 -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: blr - %ret = atomicrmw min i8* %ptr, i8 %val release - ret i8 %ret -@@ -4782,17 +4795,18 @@ define i8 @test282(i8* %ptr, i8 %val) { - define i8 @test283(i8* %ptr, i8 %val) { - ; PPC64LE-LABEL: test283: - ; PPC64LE: # %bb.0: -+; PPC64LE-NEXT: extsb 5, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: .LBB283_1: --; PPC64LE-NEXT: lbarx 5, 0, 3 --; PPC64LE-NEXT: extsb 6, 5 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lbarx 4, 0, 3 -+; PPC64LE-NEXT: extsb 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: bge 0, .LBB283_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: stbcx. 4, 0, 3 -+; PPC64LE-NEXT: stbcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB283_1 - ; PPC64LE-NEXT: .LBB283_3: --; PPC64LE-NEXT: mr 3, 5 -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: blr - %ret = atomicrmw min i8* %ptr, i8 %val acq_rel -@@ -4802,17 +4816,18 @@ define i8 @test283(i8* %ptr, i8 %val) { - define i8 @test284(i8* %ptr, i8 %val) { - ; PPC64LE-LABEL: test284: - ; PPC64LE: # %bb.0: -+; PPC64LE-NEXT: extsb 5, 4 - ; PPC64LE-NEXT: sync - ; PPC64LE-NEXT: .LBB284_1: --; PPC64LE-NEXT: lbarx 5, 0, 3 --; PPC64LE-NEXT: extsb 6, 5 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lbarx 4, 0, 3 -+; PPC64LE-NEXT: extsb 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: bge 0, .LBB284_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: stbcx. 4, 0, 3 -+; PPC64LE-NEXT: stbcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB284_1 - ; PPC64LE-NEXT: .LBB284_3: --; PPC64LE-NEXT: mr 3, 5 -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: blr - %ret = atomicrmw min i8* %ptr, i8 %val seq_cst -@@ -4822,16 +4837,17 @@ define i8 @test284(i8* %ptr, i8 %val) { - define i16 @test285(i16* %ptr, i16 %val) { - ; PPC64LE-LABEL: test285: - ; PPC64LE: # %bb.0: -+; PPC64LE-NEXT: extsh 5, 4 - ; PPC64LE-NEXT: .LBB285_1: --; PPC64LE-NEXT: lharx 5, 0, 3 --; PPC64LE-NEXT: extsh 6, 5 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lharx 4, 0, 3 -+; PPC64LE-NEXT: extsh 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: bge 0, .LBB285_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: sthcx. 4, 0, 3 -+; PPC64LE-NEXT: sthcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB285_1 - ; PPC64LE-NEXT: .LBB285_3: --; PPC64LE-NEXT: mr 3, 5 -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: blr - %ret = atomicrmw min i16* %ptr, i16 %val monotonic - ret i16 %ret -@@ -4840,16 +4856,17 @@ define i16 @test285(i16* %ptr, i16 %val) { - define i16 @test286(i16* %ptr, i16 %val) { - ; PPC64LE-LABEL: test286: - ; PPC64LE: # %bb.0: --; PPC64LE-NEXT: mr 5, 3 -+; PPC64LE-NEXT: extsh 5, 4 - ; PPC64LE-NEXT: .LBB286_1: --; PPC64LE-NEXT: lharx 3, 0, 5 --; PPC64LE-NEXT: extsh 6, 3 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lharx 4, 0, 3 -+; PPC64LE-NEXT: extsh 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: bge 0, .LBB286_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: sthcx. 4, 0, 5 -+; PPC64LE-NEXT: sthcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB286_1 - ; PPC64LE-NEXT: .LBB286_3: -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: blr - %ret = atomicrmw min i16* %ptr, i16 %val acquire -@@ -4859,17 +4876,18 @@ define i16 @test286(i16* %ptr, i16 %val) { - define i16 @test287(i16* %ptr, i16 %val) { - ; PPC64LE-LABEL: test287: - ; PPC64LE: # %bb.0: -+; PPC64LE-NEXT: extsh 5, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: .LBB287_1: --; PPC64LE-NEXT: lharx 5, 0, 3 --; PPC64LE-NEXT: extsh 6, 5 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lharx 4, 0, 3 -+; PPC64LE-NEXT: extsh 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: bge 0, .LBB287_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: sthcx. 4, 0, 3 -+; PPC64LE-NEXT: sthcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB287_1 - ; PPC64LE-NEXT: .LBB287_3: --; PPC64LE-NEXT: mr 3, 5 -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: blr - %ret = atomicrmw min i16* %ptr, i16 %val release - ret i16 %ret -@@ -4878,17 +4896,18 @@ define i16 @test287(i16* %ptr, i16 %val) { - define i16 @test288(i16* %ptr, i16 %val) { - ; PPC64LE-LABEL: test288: - ; PPC64LE: # %bb.0: -+; PPC64LE-NEXT: extsh 5, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: .LBB288_1: --; PPC64LE-NEXT: lharx 5, 0, 3 --; PPC64LE-NEXT: extsh 6, 5 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lharx 4, 0, 3 -+; PPC64LE-NEXT: extsh 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: bge 0, .LBB288_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: sthcx. 4, 0, 3 -+; PPC64LE-NEXT: sthcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB288_1 - ; PPC64LE-NEXT: .LBB288_3: --; PPC64LE-NEXT: mr 3, 5 -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: blr - %ret = atomicrmw min i16* %ptr, i16 %val acq_rel -@@ -4898,17 +4917,18 @@ define i16 @test288(i16* %ptr, i16 %val) { - define i16 @test289(i16* %ptr, i16 %val) { - ; PPC64LE-LABEL: test289: - ; PPC64LE: # %bb.0: -+; PPC64LE-NEXT: extsh 5, 4 - ; PPC64LE-NEXT: sync - ; PPC64LE-NEXT: .LBB289_1: --; PPC64LE-NEXT: lharx 5, 0, 3 --; PPC64LE-NEXT: extsh 6, 5 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lharx 4, 0, 3 -+; PPC64LE-NEXT: extsh 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: bge 0, .LBB289_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: sthcx. 4, 0, 3 -+; PPC64LE-NEXT: sthcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB289_1 - ; PPC64LE-NEXT: .LBB289_3: --; PPC64LE-NEXT: mr 3, 5 -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: blr - %ret = atomicrmw min i16* %ptr, i16 %val seq_cst -@@ -8076,16 +8096,17 @@ define i64 @test479(i64* %ptr, i64 %val) { - define i8 @test480(i8* %ptr, i8 %val) { - ; PPC64LE-LABEL: test480: - ; PPC64LE: # %bb.0: -+; PPC64LE-NEXT: extsb 5, 4 - ; PPC64LE-NEXT: .LBB480_1: --; PPC64LE-NEXT: lbarx 5, 0, 3 --; PPC64LE-NEXT: extsb 6, 5 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lbarx 4, 0, 3 -+; PPC64LE-NEXT: extsb 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: ble 0, .LBB480_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: stbcx. 4, 0, 3 -+; PPC64LE-NEXT: stbcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB480_1 - ; PPC64LE-NEXT: .LBB480_3: --; PPC64LE-NEXT: mr 3, 5 -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: blr - %ret = atomicrmw max i8* %ptr, i8 %val syncscope("singlethread") monotonic - ret i8 %ret -@@ -8094,16 +8115,17 @@ define i8 @test480(i8* %ptr, i8 %val) { - define i8 @test481(i8* %ptr, i8 %val) { - ; PPC64LE-LABEL: test481: - ; PPC64LE: # %bb.0: --; PPC64LE-NEXT: mr 5, 3 -+; PPC64LE-NEXT: extsb 5, 4 - ; PPC64LE-NEXT: .LBB481_1: --; PPC64LE-NEXT: lbarx 3, 0, 5 --; PPC64LE-NEXT: extsb 6, 3 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lbarx 4, 0, 3 -+; PPC64LE-NEXT: extsb 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: ble 0, .LBB481_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: stbcx. 4, 0, 5 -+; PPC64LE-NEXT: stbcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB481_1 - ; PPC64LE-NEXT: .LBB481_3: -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: blr - %ret = atomicrmw max i8* %ptr, i8 %val syncscope("singlethread") acquire -@@ -8113,17 +8135,18 @@ define i8 @test481(i8* %ptr, i8 %val) { - define i8 @test482(i8* %ptr, i8 %val) { - ; PPC64LE-LABEL: test482: - ; PPC64LE: # %bb.0: -+; PPC64LE-NEXT: extsb 5, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: .LBB482_1: --; PPC64LE-NEXT: lbarx 5, 0, 3 --; PPC64LE-NEXT: extsb 6, 5 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lbarx 4, 0, 3 -+; PPC64LE-NEXT: extsb 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: ble 0, .LBB482_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: stbcx. 4, 0, 3 -+; PPC64LE-NEXT: stbcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB482_1 - ; PPC64LE-NEXT: .LBB482_3: --; PPC64LE-NEXT: mr 3, 5 -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: blr - %ret = atomicrmw max i8* %ptr, i8 %val syncscope("singlethread") release - ret i8 %ret -@@ -8132,17 +8155,18 @@ define i8 @test482(i8* %ptr, i8 %val) { - define i8 @test483(i8* %ptr, i8 %val) { - ; PPC64LE-LABEL: test483: - ; PPC64LE: # %bb.0: -+; PPC64LE-NEXT: extsb 5, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: .LBB483_1: --; PPC64LE-NEXT: lbarx 5, 0, 3 --; PPC64LE-NEXT: extsb 6, 5 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lbarx 4, 0, 3 -+; PPC64LE-NEXT: extsb 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: ble 0, .LBB483_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: stbcx. 4, 0, 3 -+; PPC64LE-NEXT: stbcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB483_1 - ; PPC64LE-NEXT: .LBB483_3: --; PPC64LE-NEXT: mr 3, 5 -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: blr - %ret = atomicrmw max i8* %ptr, i8 %val syncscope("singlethread") acq_rel -@@ -8152,17 +8176,18 @@ define i8 @test483(i8* %ptr, i8 %val) { - define i8 @test484(i8* %ptr, i8 %val) { - ; PPC64LE-LABEL: test484: - ; PPC64LE: # %bb.0: -+; PPC64LE-NEXT: extsb 5, 4 - ; PPC64LE-NEXT: sync - ; PPC64LE-NEXT: .LBB484_1: --; PPC64LE-NEXT: lbarx 5, 0, 3 --; PPC64LE-NEXT: extsb 6, 5 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lbarx 4, 0, 3 -+; PPC64LE-NEXT: extsb 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: ble 0, .LBB484_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: stbcx. 4, 0, 3 -+; PPC64LE-NEXT: stbcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB484_1 - ; PPC64LE-NEXT: .LBB484_3: --; PPC64LE-NEXT: mr 3, 5 -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: blr - %ret = atomicrmw max i8* %ptr, i8 %val syncscope("singlethread") seq_cst -@@ -8172,16 +8197,17 @@ define i8 @test484(i8* %ptr, i8 %val) { - define i16 @test485(i16* %ptr, i16 %val) { - ; PPC64LE-LABEL: test485: - ; PPC64LE: # %bb.0: -+; PPC64LE-NEXT: extsh 5, 4 - ; PPC64LE-NEXT: .LBB485_1: --; PPC64LE-NEXT: lharx 5, 0, 3 --; PPC64LE-NEXT: extsh 6, 5 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lharx 4, 0, 3 -+; PPC64LE-NEXT: extsh 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: ble 0, .LBB485_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: sthcx. 4, 0, 3 -+; PPC64LE-NEXT: sthcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB485_1 - ; PPC64LE-NEXT: .LBB485_3: --; PPC64LE-NEXT: mr 3, 5 -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: blr - %ret = atomicrmw max i16* %ptr, i16 %val syncscope("singlethread") monotonic - ret i16 %ret -@@ -8190,16 +8216,17 @@ define i16 @test485(i16* %ptr, i16 %val) { - define i16 @test486(i16* %ptr, i16 %val) { - ; PPC64LE-LABEL: test486: - ; PPC64LE: # %bb.0: --; PPC64LE-NEXT: mr 5, 3 -+; PPC64LE-NEXT: extsh 5, 4 - ; PPC64LE-NEXT: .LBB486_1: --; PPC64LE-NEXT: lharx 3, 0, 5 --; PPC64LE-NEXT: extsh 6, 3 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lharx 4, 0, 3 -+; PPC64LE-NEXT: extsh 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: ble 0, .LBB486_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: sthcx. 4, 0, 5 -+; PPC64LE-NEXT: sthcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB486_1 - ; PPC64LE-NEXT: .LBB486_3: -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: blr - %ret = atomicrmw max i16* %ptr, i16 %val syncscope("singlethread") acquire -@@ -8209,17 +8236,18 @@ define i16 @test486(i16* %ptr, i16 %val) { - define i16 @test487(i16* %ptr, i16 %val) { - ; PPC64LE-LABEL: test487: - ; PPC64LE: # %bb.0: -+; PPC64LE-NEXT: extsh 5, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: .LBB487_1: --; PPC64LE-NEXT: lharx 5, 0, 3 --; PPC64LE-NEXT: extsh 6, 5 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lharx 4, 0, 3 -+; PPC64LE-NEXT: extsh 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: ble 0, .LBB487_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: sthcx. 4, 0, 3 -+; PPC64LE-NEXT: sthcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB487_1 - ; PPC64LE-NEXT: .LBB487_3: --; PPC64LE-NEXT: mr 3, 5 -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: blr - %ret = atomicrmw max i16* %ptr, i16 %val syncscope("singlethread") release - ret i16 %ret -@@ -8228,17 +8256,18 @@ define i16 @test487(i16* %ptr, i16 %val) { - define i16 @test488(i16* %ptr, i16 %val) { - ; PPC64LE-LABEL: test488: - ; PPC64LE: # %bb.0: -+; PPC64LE-NEXT: extsh 5, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: .LBB488_1: --; PPC64LE-NEXT: lharx 5, 0, 3 --; PPC64LE-NEXT: extsh 6, 5 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lharx 4, 0, 3 -+; PPC64LE-NEXT: extsh 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: ble 0, .LBB488_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: sthcx. 4, 0, 3 -+; PPC64LE-NEXT: sthcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB488_1 - ; PPC64LE-NEXT: .LBB488_3: --; PPC64LE-NEXT: mr 3, 5 -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: blr - %ret = atomicrmw max i16* %ptr, i16 %val syncscope("singlethread") acq_rel -@@ -8248,17 +8277,18 @@ define i16 @test488(i16* %ptr, i16 %val) { - define i16 @test489(i16* %ptr, i16 %val) { - ; PPC64LE-LABEL: test489: - ; PPC64LE: # %bb.0: -+; PPC64LE-NEXT: extsh 5, 4 - ; PPC64LE-NEXT: sync - ; PPC64LE-NEXT: .LBB489_1: --; PPC64LE-NEXT: lharx 5, 0, 3 --; PPC64LE-NEXT: extsh 6, 5 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lharx 4, 0, 3 -+; PPC64LE-NEXT: extsh 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: ble 0, .LBB489_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: sthcx. 4, 0, 3 -+; PPC64LE-NEXT: sthcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB489_1 - ; PPC64LE-NEXT: .LBB489_3: --; PPC64LE-NEXT: mr 3, 5 -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: blr - %ret = atomicrmw max i16* %ptr, i16 %val syncscope("singlethread") seq_cst -@@ -8450,16 +8480,17 @@ define i64 @test499(i64* %ptr, i64 %val) { - define i8 @test500(i8* %ptr, i8 %val) { - ; PPC64LE-LABEL: test500: - ; PPC64LE: # %bb.0: -+; PPC64LE-NEXT: extsb 5, 4 - ; PPC64LE-NEXT: .LBB500_1: --; PPC64LE-NEXT: lbarx 5, 0, 3 --; PPC64LE-NEXT: extsb 6, 5 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lbarx 4, 0, 3 -+; PPC64LE-NEXT: extsb 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: bge 0, .LBB500_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: stbcx. 4, 0, 3 -+; PPC64LE-NEXT: stbcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB500_1 - ; PPC64LE-NEXT: .LBB500_3: --; PPC64LE-NEXT: mr 3, 5 -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: blr - %ret = atomicrmw min i8* %ptr, i8 %val syncscope("singlethread") monotonic - ret i8 %ret -@@ -8468,16 +8499,17 @@ define i8 @test500(i8* %ptr, i8 %val) { - define i8 @test501(i8* %ptr, i8 %val) { - ; PPC64LE-LABEL: test501: - ; PPC64LE: # %bb.0: --; PPC64LE-NEXT: mr 5, 3 -+; PPC64LE-NEXT: extsb 5, 4 - ; PPC64LE-NEXT: .LBB501_1: --; PPC64LE-NEXT: lbarx 3, 0, 5 --; PPC64LE-NEXT: extsb 6, 3 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lbarx 4, 0, 3 -+; PPC64LE-NEXT: extsb 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: bge 0, .LBB501_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: stbcx. 4, 0, 5 -+; PPC64LE-NEXT: stbcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB501_1 - ; PPC64LE-NEXT: .LBB501_3: -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: blr - %ret = atomicrmw min i8* %ptr, i8 %val syncscope("singlethread") acquire -@@ -8487,17 +8519,18 @@ define i8 @test501(i8* %ptr, i8 %val) { - define i8 @test502(i8* %ptr, i8 %val) { - ; PPC64LE-LABEL: test502: - ; PPC64LE: # %bb.0: -+; PPC64LE-NEXT: extsb 5, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: .LBB502_1: --; PPC64LE-NEXT: lbarx 5, 0, 3 --; PPC64LE-NEXT: extsb 6, 5 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lbarx 4, 0, 3 -+; PPC64LE-NEXT: extsb 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: bge 0, .LBB502_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: stbcx. 4, 0, 3 -+; PPC64LE-NEXT: stbcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB502_1 - ; PPC64LE-NEXT: .LBB502_3: --; PPC64LE-NEXT: mr 3, 5 -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: blr - %ret = atomicrmw min i8* %ptr, i8 %val syncscope("singlethread") release - ret i8 %ret -@@ -8506,17 +8539,18 @@ define i8 @test502(i8* %ptr, i8 %val) { - define i8 @test503(i8* %ptr, i8 %val) { - ; PPC64LE-LABEL: test503: - ; PPC64LE: # %bb.0: -+; PPC64LE-NEXT: extsb 5, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: .LBB503_1: --; PPC64LE-NEXT: lbarx 5, 0, 3 --; PPC64LE-NEXT: extsb 6, 5 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lbarx 4, 0, 3 -+; PPC64LE-NEXT: extsb 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: bge 0, .LBB503_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: stbcx. 4, 0, 3 -+; PPC64LE-NEXT: stbcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB503_1 - ; PPC64LE-NEXT: .LBB503_3: --; PPC64LE-NEXT: mr 3, 5 -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: blr - %ret = atomicrmw min i8* %ptr, i8 %val syncscope("singlethread") acq_rel -@@ -8526,17 +8560,18 @@ define i8 @test503(i8* %ptr, i8 %val) { - define i8 @test504(i8* %ptr, i8 %val) { - ; PPC64LE-LABEL: test504: - ; PPC64LE: # %bb.0: -+; PPC64LE-NEXT: extsb 5, 4 - ; PPC64LE-NEXT: sync - ; PPC64LE-NEXT: .LBB504_1: --; PPC64LE-NEXT: lbarx 5, 0, 3 --; PPC64LE-NEXT: extsb 6, 5 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lbarx 4, 0, 3 -+; PPC64LE-NEXT: extsb 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: bge 0, .LBB504_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: stbcx. 4, 0, 3 -+; PPC64LE-NEXT: stbcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB504_1 - ; PPC64LE-NEXT: .LBB504_3: --; PPC64LE-NEXT: mr 3, 5 -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: blr - %ret = atomicrmw min i8* %ptr, i8 %val syncscope("singlethread") seq_cst -@@ -8546,16 +8581,17 @@ define i8 @test504(i8* %ptr, i8 %val) { - define i16 @test505(i16* %ptr, i16 %val) { - ; PPC64LE-LABEL: test505: - ; PPC64LE: # %bb.0: -+; PPC64LE-NEXT: extsh 5, 4 - ; PPC64LE-NEXT: .LBB505_1: --; PPC64LE-NEXT: lharx 5, 0, 3 --; PPC64LE-NEXT: extsh 6, 5 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lharx 4, 0, 3 -+; PPC64LE-NEXT: extsh 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: bge 0, .LBB505_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: sthcx. 4, 0, 3 -+; PPC64LE-NEXT: sthcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB505_1 - ; PPC64LE-NEXT: .LBB505_3: --; PPC64LE-NEXT: mr 3, 5 -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: blr - %ret = atomicrmw min i16* %ptr, i16 %val syncscope("singlethread") monotonic - ret i16 %ret -@@ -8564,16 +8600,17 @@ define i16 @test505(i16* %ptr, i16 %val) { - define i16 @test506(i16* %ptr, i16 %val) { - ; PPC64LE-LABEL: test506: - ; PPC64LE: # %bb.0: --; PPC64LE-NEXT: mr 5, 3 -+; PPC64LE-NEXT: extsh 5, 4 - ; PPC64LE-NEXT: .LBB506_1: --; PPC64LE-NEXT: lharx 3, 0, 5 --; PPC64LE-NEXT: extsh 6, 3 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lharx 4, 0, 3 -+; PPC64LE-NEXT: extsh 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: bge 0, .LBB506_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: sthcx. 4, 0, 5 -+; PPC64LE-NEXT: sthcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB506_1 - ; PPC64LE-NEXT: .LBB506_3: -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: blr - %ret = atomicrmw min i16* %ptr, i16 %val syncscope("singlethread") acquire -@@ -8583,17 +8620,18 @@ define i16 @test506(i16* %ptr, i16 %val) { - define i16 @test507(i16* %ptr, i16 %val) { - ; PPC64LE-LABEL: test507: - ; PPC64LE: # %bb.0: -+; PPC64LE-NEXT: extsh 5, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: .LBB507_1: --; PPC64LE-NEXT: lharx 5, 0, 3 --; PPC64LE-NEXT: extsh 6, 5 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lharx 4, 0, 3 -+; PPC64LE-NEXT: extsh 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: bge 0, .LBB507_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: sthcx. 4, 0, 3 -+; PPC64LE-NEXT: sthcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB507_1 - ; PPC64LE-NEXT: .LBB507_3: --; PPC64LE-NEXT: mr 3, 5 -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: blr - %ret = atomicrmw min i16* %ptr, i16 %val syncscope("singlethread") release - ret i16 %ret -@@ -8602,17 +8640,18 @@ define i16 @test507(i16* %ptr, i16 %val) { - define i16 @test508(i16* %ptr, i16 %val) { - ; PPC64LE-LABEL: test508: - ; PPC64LE: # %bb.0: -+; PPC64LE-NEXT: extsh 5, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: .LBB508_1: --; PPC64LE-NEXT: lharx 5, 0, 3 --; PPC64LE-NEXT: extsh 6, 5 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lharx 4, 0, 3 -+; PPC64LE-NEXT: extsh 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: bge 0, .LBB508_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: sthcx. 4, 0, 3 -+; PPC64LE-NEXT: sthcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB508_1 - ; PPC64LE-NEXT: .LBB508_3: --; PPC64LE-NEXT: mr 3, 5 -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: blr - %ret = atomicrmw min i16* %ptr, i16 %val syncscope("singlethread") acq_rel -@@ -8622,17 +8661,18 @@ define i16 @test508(i16* %ptr, i16 %val) { - define i16 @test509(i16* %ptr, i16 %val) { - ; PPC64LE-LABEL: test509: - ; PPC64LE: # %bb.0: -+; PPC64LE-NEXT: extsh 5, 4 - ; PPC64LE-NEXT: sync - ; PPC64LE-NEXT: .LBB509_1: --; PPC64LE-NEXT: lharx 5, 0, 3 --; PPC64LE-NEXT: extsh 6, 5 --; PPC64LE-NEXT: cmpw 4, 6 -+; PPC64LE-NEXT: lharx 4, 0, 3 -+; PPC64LE-NEXT: extsh 6, 4 -+; PPC64LE-NEXT: cmpw 5, 6 - ; PPC64LE-NEXT: bge 0, .LBB509_3 - ; PPC64LE-NEXT: # %bb.2: --; PPC64LE-NEXT: sthcx. 4, 0, 3 -+; PPC64LE-NEXT: sthcx. 5, 0, 3 - ; PPC64LE-NEXT: bne 0, .LBB509_1 - ; PPC64LE-NEXT: .LBB509_3: --; PPC64LE-NEXT: mr 3, 5 -+; PPC64LE-NEXT: mr 3, 4 - ; PPC64LE-NEXT: lwsync - ; PPC64LE-NEXT: blr - %ret = atomicrmw min i16* %ptr, i16 %val syncscope("singlethread") seq_cst -diff --git llvm/test/CodeGen/PowerPC/sign-ext-atomics.ll llvm/test/CodeGen/PowerPC/sign-ext-atomics.ll -new file mode 100644 -index 000000000000..7716dc0cedcc ---- /dev/null -+++ llvm/test/CodeGen/PowerPC/sign-ext-atomics.ll -@@ -0,0 +1,105 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc -mtriple=powerpc64le-linux-gnu < %s | FileCheck %s -+define i16 @SEXTParam(i16 signext %0) #0 { -+; CHECK-LABEL: SEXTParam: -+; CHECK: # %bb.0: # %top -+; CHECK-NEXT: li 4, 0 -+; CHECK-NEXT: sth 4, -4(1) -+; CHECK-NEXT: addi 4, 1, -4 -+; CHECK-NEXT: lwsync -+; CHECK-NEXT: .LBB0_1: # %top -+; CHECK-NEXT: # -+; CHECK-NEXT: lharx 5, 0, 4 -+; CHECK-NEXT: extsh 5, 5 -+; CHECK-NEXT: cmpw 3, 5 -+; CHECK-NEXT: bge 0, .LBB0_3 -+; CHECK-NEXT: # %bb.2: # %top -+; CHECK-NEXT: # -+; CHECK-NEXT: sthcx. 3, 0, 4 -+; CHECK-NEXT: bne 0, .LBB0_1 -+; CHECK-NEXT: .LBB0_3: # %top -+; CHECK-NEXT: lwsync -+; CHECK-NEXT: lhz 3, -4(1) -+; CHECK-NEXT: cmpd 7, 3, 3 -+; CHECK-NEXT: bne- 7, .+4 -+; CHECK-NEXT: isync -+; CHECK-NEXT: blr -+top: -+ %1 = alloca i16, align 4 -+ %2 = bitcast i16* %1 to i8* -+ store i16 0, i16* %1, align 4 -+ %rv.i = atomicrmw min i16* %1, i16 %0 acq_rel -+ %rv.i2 = load atomic i16, i16* %1 acquire, align 16 -+ ret i16 %rv.i2 -+} -+ -+define i16 @noSEXTParam(i16 %0) #0 { -+; CHECK-LABEL: noSEXTParam: -+; CHECK: # %bb.0: # %top -+; CHECK-NEXT: li 4, 0 -+; CHECK-NEXT: extsh 3, 3 -+; CHECK-NEXT: sth 4, -4(1) -+; CHECK-NEXT: addi 4, 1, -4 -+; CHECK-NEXT: lwsync -+; CHECK-NEXT: .LBB1_1: # %top -+; CHECK-NEXT: # -+; CHECK-NEXT: lharx 5, 0, 4 -+; CHECK-NEXT: extsh 5, 5 -+; CHECK-NEXT: cmpw 3, 5 -+; CHECK-NEXT: bge 0, .LBB1_3 -+; CHECK-NEXT: # %bb.2: # %top -+; CHECK-NEXT: # -+; CHECK-NEXT: sthcx. 3, 0, 4 -+; CHECK-NEXT: bne 0, .LBB1_1 -+; CHECK-NEXT: .LBB1_3: # %top -+; CHECK-NEXT: lwsync -+; CHECK-NEXT: lhz 3, -4(1) -+; CHECK-NEXT: cmpd 7, 3, 3 -+; CHECK-NEXT: bne- 7, .+4 -+; CHECK-NEXT: isync -+; CHECK-NEXT: blr -+top: -+ %1 = alloca i16, align 4 -+ %2 = bitcast i16* %1 to i8* -+ store i16 0, i16* %1, align 4 -+ %rv.i = atomicrmw min i16* %1, i16 %0 acq_rel -+ %rv.i2 = load atomic i16, i16* %1 acquire, align 16 -+ ret i16 %rv.i2 -+} -+ -+define i16 @noSEXTLoad(i16 *%p) #0 { -+; CHECK-LABEL: noSEXTLoad: -+; CHECK: # %bb.0: # %top -+; CHECK-NEXT: lhz 5, 0(3) -+; CHECK-NEXT: li 4, 0 -+; CHECK-NEXT: addi 3, 1, -4 -+; CHECK-NEXT: sth 4, -4(1) -+; CHECK-NEXT: extsh 4, 5 -+; CHECK-NEXT: lwsync -+; CHECK-NEXT: .LBB2_1: # %top -+; CHECK-NEXT: # -+; CHECK-NEXT: lharx 5, 0, 3 -+; CHECK-NEXT: extsh 5, 5 -+; CHECK-NEXT: cmpw 4, 5 -+; CHECK-NEXT: bge 0, .LBB2_3 -+; CHECK-NEXT: # %bb.2: # %top -+; CHECK-NEXT: # -+; CHECK-NEXT: sthcx. 4, 0, 3 -+; CHECK-NEXT: bne 0, .LBB2_1 -+; CHECK-NEXT: .LBB2_3: # %top -+; CHECK-NEXT: lwsync -+; CHECK-NEXT: lhz 3, -4(1) -+; CHECK-NEXT: cmpd 7, 3, 3 -+; CHECK-NEXT: bne- 7, .+4 -+; CHECK-NEXT: isync -+; CHECK-NEXT: blr -+top: -+ %0 = load i16, i16* %p, align 2 -+ %1 = alloca i16, align 4 -+ %2 = bitcast i16* %1 to i8* -+ store i16 0, i16* %1, align 4 -+ %rv.i = atomicrmw min i16* %1, i16 %0 acq_rel -+ %rv.i2 = load atomic i16, i16* %1 acquire, align 16 -+ ret i16 %rv.i2 -+} -+attributes #0 = { nounwind } --- -2.30.0 - diff --git a/deps/patches/llvm-11-D94813-mergeicmps.patch b/deps/patches/llvm-11-D94813-mergeicmps.patch deleted file mode 100644 index 5eb98be41cbe5..0000000000000 --- a/deps/patches/llvm-11-D94813-mergeicmps.patch +++ /dev/null @@ -1,111 +0,0 @@ -From 5fda6724d697d428136266a61159a46c5da092f0 Mon Sep 17 00:00:00 2001 -From: Valentin Churavy -Date: Sat, 16 Jan 2021 17:36:09 -0500 -Subject: [PATCH] [MergeICmps] Don't merge icmps derived from pointers with - addressspaces - -IIUC we can't emit `memcmp` between pointers in addressspaces, -doing so will trigger an assertion since the signature of the memcmp -will not match it's arguments (https://bugs.llvm.org/show_bug.cgi?id=48661). - -This PR disables the attempt to merge icmps, -when the pointer is in an addressspace. - -Differential Revision: https://reviews.llvm.org/D94813 ---- - llvm/lib/Transforms/Scalar/MergeICmps.cpp | 4 ++ - .../Transforms/MergeICmps/addressspaces.ll | 67 +++++++++++++++++++ - 2 files changed, 71 insertions(+) - create mode 100644 llvm/test/Transforms/MergeICmps/addressspaces.ll - -diff --git llvm/lib/Transforms/Scalar/MergeICmps.cpp llvm/lib/Transforms/Scalar/MergeICmps.cpp -index 1559e7a41a7c..621c9e504398 100644 ---- llvm/lib/Transforms/Scalar/MergeICmps.cpp -+++ llvm/lib/Transforms/Scalar/MergeICmps.cpp -@@ -154,6 +154,10 @@ BCEAtom visitICmpLoadOperand(Value *const Val, BaseIdentifier &BaseId) { - return {}; - } - Value *const Addr = LoadI->getOperand(0); -+ if (Addr->getType()->getPointerAddressSpace() != 0) { -+ LLVM_DEBUG(dbgs() << "from non-zero AddressSpace\n"); -+ return {}; -+ } - auto *const GEP = dyn_cast(Addr); - if (!GEP) - return {}; -diff --git llvm/test/Transforms/MergeICmps/addressspaces.ll llvm/test/Transforms/MergeICmps/addressspaces.ll -new file mode 100644 -index 000000000000..9a74b4a5b2ca ---- /dev/null -+++ llvm/test/Transforms/MergeICmps/addressspaces.ll -@@ -0,0 +1,67 @@ -+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -+; RUN: opt < %s -mergeicmps -S | FileCheck %s -+ -+source_filename = "==" -+target datalayout = "e-m:e-i64:64-n32:64" -+target triple = "powerpc64le-unknown-linux-gnu" -+ -+define void @juliaAS([2 x [5 x i64]] addrspace(11)* nocapture nonnull readonly align 8 dereferenceable(80) %0, [2 x [5 x i64]] addrspace(11)* nocapture nonnull readonly align 8 dereferenceable(80) %1) { -+; CHECK-LABEL: @juliaAS( -+; CHECK-NEXT: top: -+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x [5 x i64]], [2 x [5 x i64]] addrspace(11)* [[TMP0:%.*]], i64 0, i64 1, i64 2 -+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x [5 x i64]], [2 x [5 x i64]] addrspace(11)* [[TMP0]], i64 0, i64 1, i64 3 -+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x [5 x i64]], [2 x [5 x i64]] addrspace(11)* [[TMP0]], i64 0, i64 1, i64 4 -+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x [5 x i64]], [2 x [5 x i64]] addrspace(11)* [[TMP1:%.*]], i64 0, i64 1, i64 2 -+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x [5 x i64]], [2 x [5 x i64]] addrspace(11)* [[TMP1]], i64 0, i64 1, i64 3 -+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x [5 x i64]], [2 x [5 x i64]] addrspace(11)* [[TMP1]], i64 0, i64 1, i64 4 -+; CHECK-NEXT: [[TMP8:%.*]] = load i64, i64 addrspace(11)* [[TMP2]], align 8 -+; CHECK-NEXT: [[TMP9:%.*]] = load i64, i64 addrspace(11)* [[TMP5]], align 8 -+; CHECK-NEXT: [[DOTNOT17:%.*]] = icmp eq i64 [[TMP8]], [[TMP9]] -+; CHECK-NEXT: br i1 [[DOTNOT17]], label [[L70:%.*]], label [[L90:%.*]] -+; CHECK: L70: -+; CHECK-NEXT: [[TMP10:%.*]] = load i64, i64 addrspace(11)* [[TMP3]], align 8 -+; CHECK-NEXT: [[TMP11:%.*]] = load i64, i64 addrspace(11)* [[TMP6]], align 8 -+; CHECK-NEXT: [[DOTNOT18:%.*]] = icmp eq i64 [[TMP10]], [[TMP11]] -+; CHECK-NEXT: br i1 [[DOTNOT18]], label [[L74:%.*]], label [[L90]] -+; CHECK: L74: -+; CHECK-NEXT: [[TMP12:%.*]] = load i64, i64 addrspace(11)* [[TMP4]], align 8 -+; CHECK-NEXT: [[TMP13:%.*]] = load i64, i64 addrspace(11)* [[TMP7]], align 8 -+; CHECK-NEXT: [[DOTNOT19:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] -+; CHECK-NEXT: br label [[L90]] -+; CHECK: L90: -+; CHECK-NEXT: [[VALUE_PHI2_OFF0:%.*]] = phi i1 [ false, [[TOP:%.*]] ], [ [[DOTNOT19]], [[L74]] ], [ false, [[L70]] ] -+; CHECK-NEXT: ret void -+; -+top: -+ %2 = getelementptr inbounds [2 x [5 x i64]], [2 x [5 x i64]] addrspace(11)* %0, i64 0, i64 1, i64 2 -+ %3 = getelementptr inbounds [2 x [5 x i64]], [2 x [5 x i64]] addrspace(11)* %0, i64 0, i64 1, i64 3 -+ %4 = getelementptr inbounds [2 x [5 x i64]], [2 x [5 x i64]] addrspace(11)* %0, i64 0, i64 1, i64 4 -+ %5 = getelementptr inbounds [2 x [5 x i64]], [2 x [5 x i64]] addrspace(11)* %1, i64 0, i64 1, i64 2 -+ %6 = getelementptr inbounds [2 x [5 x i64]], [2 x [5 x i64]] addrspace(11)* %1, i64 0, i64 1, i64 3 -+ %7 = getelementptr inbounds [2 x [5 x i64]], [2 x [5 x i64]] addrspace(11)* %1, i64 0, i64 1, i64 4 -+ %8 = load i64, i64 addrspace(11)* %2, align 8 -+ %9 = load i64, i64 addrspace(11)* %5, align 8 -+ %.not17 = icmp eq i64 %8, %9 -+ br i1 %.not17, label %L70, label %L90 -+ -+L70: ; preds = %top -+ %10 = load i64, i64 addrspace(11)* %3, align 8 -+ %11 = load i64, i64 addrspace(11)* %6, align 8 -+ %.not18 = icmp eq i64 %10, %11 -+ br i1 %.not18, label %L74, label %L90 -+ -+L74: ; preds = %L70 -+ %12 = load i64, i64 addrspace(11)* %4, align 8 -+ %13 = load i64, i64 addrspace(11)* %7, align 8 -+ %.not19 = icmp eq i64 %12, %13 -+ br label %L90 -+ -+L90: ; preds = %L74, %L70, %top -+ %value_phi2.off0 = phi i1 [ false, %top ], [ %.not19, %L74 ], [ false, %L70 ] -+ ret void -+} -+ -+!llvm.module.flags = !{!0} -+ -+!0 = !{i32 1, !"Debug Info Version", i32 3} -+ --- -2.30.0 - diff --git a/deps/patches/llvm-11-D94980-CTR-half.patch b/deps/patches/llvm-11-D94980-CTR-half.patch deleted file mode 100644 index 64debc43a9e92..0000000000000 --- a/deps/patches/llvm-11-D94980-CTR-half.patch +++ /dev/null @@ -1,398 +0,0 @@ -From 8a3be8c0ff83f2b9d2db4fd581ec014bd3217505 Mon Sep 17 00:00:00 2001 -From: Nemanja Ivanovic -Date: Tue, 19 Jan 2021 19:52:31 -0500 -Subject: [PATCH] [PowerPC] Do not emit HW loop with half precision operations - -If a loop has any operations on half precision values, there will be calls to library functions on Power8. Even on Power9, there is a small subset of instructions that are actually supported for the type. - -This patch disables HW loops whenever any operations on the type are found (other than the handfull of supported ones when compiling for Power9). Fixes a few PR's opened by Julia: - -https://bugs.llvm.org/show_bug.cgi?id=48785 -https://bugs.llvm.org/show_bug.cgi?id=48786 -https://bugs.llvm.org/show_bug.cgi?id=48519 ---- - .../Target/PowerPC/PPCTargetTransformInfo.cpp | 29 +- - llvm/test/CodeGen/PowerPC/pr48519.ll | 296 ++++++++++++++++-- - 2 files changed, 299 insertions(+), 26 deletions(-) - -diff --git llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp -index 49c10fdf8898..adf9f0df82f8 100644 ---- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp -+++ llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp -@@ -276,8 +276,33 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo, - return false; - }; - -+ auto supportedHalfPrecisionOp = [](Instruction *Inst) { -+ switch (Inst->getOpcode()) { -+ default: return false; -+ case Instruction::FPTrunc: -+ case Instruction::FPExt: -+ case Instruction::Load: -+ case Instruction::Store: -+ case Instruction::FPToUI: -+ case Instruction::UIToFP: -+ case Instruction::FPToSI: -+ case Instruction::SIToFP: -+ return true; -+ } -+ }; -+ - for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); - J != JE; ++J) { -+ // There are no direct operations on half precision so assume that -+ // anything with that type requires a call except for a few select -+ // operations with Power9. -+ if (Instruction *CurrInst = dyn_cast(J)) { -+ for (const auto &Op : CurrInst->operands()) { -+ if (Op->getType()->getScalarType()->isHalfTy() || -+ CurrInst->getType()->getScalarType()->isHalfTy()) -+ return !(ST->isISA3_0() && supportedHalfPrecisionOp(CurrInst)); -+ } -+ } - if (CallInst *CI = dyn_cast(J)) { - // Inline ASM is okay, unless it clobbers the ctr register. - if (InlineAsm *IA = dyn_cast(CI->getCalledOperand())) { -@@ -441,10 +466,6 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo, - isLargeIntegerTy(!TM.isPPC64(), CI->getSrcTy()->getScalarType()) || - isLargeIntegerTy(!TM.isPPC64(), CI->getDestTy()->getScalarType())) - return true; -- if (!ST->isISA3_0() && -- (CI->getSrcTy()->getScalarType()->isHalfTy() || -- CI->getDestTy()->getScalarType()->isHalfTy())) -- return true; - } else if (isLargeIntegerTy(!TM.isPPC64(), - J->getType()->getScalarType()) && - (J->getOpcode() == Instruction::UDiv || -diff --git llvm/test/CodeGen/PowerPC/pr48519.ll llvm/test/CodeGen/PowerPC/pr48519.ll -index 777874e91c26..50970cb185d8 100644 ---- llvm/test/CodeGen/PowerPC/pr48519.ll -+++ llvm/test/CodeGen/PowerPC/pr48519.ll -@@ -1,9 +1,13 @@ - ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py - ; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ - ; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s -+; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ -+; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \ -+; RUN: -check-prefix=CHECK-P9 -+ - define void @julia__typed_vcat_20() #0 { - ; CHECK-LABEL: julia__typed_vcat_20: --; CHECK: # %bb.0: # %top -+; CHECK: # %bb.0: # %bb - ; CHECK-NEXT: mflr r0 - ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill - ; CHECK-NEXT: std r0, 16(r1) -@@ -11,7 +15,7 @@ define void @julia__typed_vcat_20() #0 { - ; CHECK-NEXT: li r3, 1 - ; CHECK-NEXT: li r30, 0 - ; CHECK-NEXT: .p2align 4 --; CHECK-NEXT: .LBB0_1: # %L139 -+; CHECK-NEXT: .LBB0_1: # %bb3 - ; CHECK-NEXT: # - ; CHECK-NEXT: addi r3, r3, -1 - ; CHECK-NEXT: mtfprd f0, r3 -@@ -24,32 +28,280 @@ define void @julia__typed_vcat_20() #0 { - ; CHECK-NEXT: li r3, 0 - ; CHECK-NEXT: cmpldi r30, 0 - ; CHECK-NEXT: bne+ cr0, .LBB0_1 --; CHECK-NEXT: # %bb.2: # %pass.1 -+; CHECK-NEXT: # %bb.2: # %bb11 - ; CHECK-NEXT: bl __gnu_f2h_ieee - ; CHECK-NEXT: nop - ; CHECK-NEXT: sth r3, 0(r3) --top: -- %.sroa.6.0.copyload = load i64, i64 addrspace(11)* null, align 8 -- %0 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %.sroa.6.0.copyload, i64 0) -- %1 = extractvalue { i64, i1 } %0, 0 -- br label %L139 -- --L139: ; preds = %L139, %top -- %value_phi21 = phi i64 [ %5, %L139 ], [ 1, %top ] -- %value_phi23 = phi i64 [ 0, %L139 ], [ 1, %top ] -- %2 = add nsw i64 %value_phi23, -1 -- %3 = add i64 %2, 0 -- %4 = sitofp i64 %3 to half -- store half %4, half addrspace(13)* undef, align 2 -- %.not101.not = icmp eq i64 %value_phi21, 0 -- %5 = add i64 %value_phi21, 1 -- br i1 %.not101.not, label %pass.1, label %L139 -- --pass.1: ; preds = %L139 -+; -+; CHECK-P9-LABEL: julia__typed_vcat_20: -+; CHECK-P9: # %bb.0: # %bb -+; CHECK-P9-NEXT: li r3, 0 -+; CHECK-P9-NEXT: mtctr r3 -+; CHECK-P9-NEXT: li r3, 1 -+; CHECK-P9-NEXT: .p2align 4 -+; CHECK-P9-NEXT: .LBB0_1: # %bb3 -+; CHECK-P9-NEXT: # -+; CHECK-P9-NEXT: addi r3, r3, -1 -+; CHECK-P9-NEXT: mtfprd f0, r3 -+; CHECK-P9-NEXT: xscvsxdsp f0, f0 -+; CHECK-P9-NEXT: xscvdphp f0, f0 -+; CHECK-P9-NEXT: mffprwz r3, f0 -+; CHECK-P9-NEXT: mtfprwz f0, r3 -+; CHECK-P9-NEXT: li r3, 0 -+; CHECK-P9-NEXT: xscvhpdp f0, f0 -+; CHECK-P9-NEXT: bdnz .LBB0_1 -+; CHECK-P9-NEXT: # %bb.2: # %bb11 -+; CHECK-P9-NEXT: xscvdphp f0, f0 -+; CHECK-P9-NEXT: stxsihx f0, 0, r3 -+bb: -+ %i = load i64, i64 addrspace(11)* null, align 8 -+ %i1 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %i, i64 0) -+ %i2 = extractvalue { i64, i1 } %i1, 0 -+ br label %bb3 -+ -+bb3: ; preds = %bb3, %bb -+ %i4 = phi i64 [ %i10, %bb3 ], [ 1, %bb ] -+ %i5 = phi i64 [ 0, %bb3 ], [ 1, %bb ] -+ %i6 = add nsw i64 %i5, -1 -+ %i7 = add i64 %i6, 0 -+ %i8 = sitofp i64 %i7 to half -+ store half %i8, half addrspace(13)* undef, align 2 -+ %i9 = icmp eq i64 %i4, 0 -+ %i10 = add i64 %i4, 1 -+ br i1 %i9, label %bb11, label %bb3 -+ -+bb11: ; preds = %bb3 - unreachable - } - --; Function Attrs: nounwind readnone speculatable willreturn - declare { i64, i1 } @llvm.ssub.with.overflow.i64(i64, i64) #0 - -+define void @julia__hypot_17() #0 { -+; CHECK-LABEL: julia__hypot_17: -+; CHECK: # %bb.0: # %bb -+; CHECK-NEXT: mflr r0 -+; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill -+; CHECK-NEXT: std r0, 16(r1) -+; CHECK-NEXT: stdu r1, -48(r1) -+; CHECK-NEXT: li r30, 3 -+; CHECK-NEXT: .p2align 5 -+; CHECK-NEXT: .LBB1_1: # %bb1 -+; CHECK-NEXT: # -+; CHECK-NEXT: addi r30, r30, -1 -+; CHECK-NEXT: cmpldi r30, 0 -+; CHECK-NEXT: beq cr0, .LBB1_3 -+; CHECK-NEXT: # %bb.2: # %bb3 -+; CHECK-NEXT: # -+; CHECK-NEXT: lhz r3, 0(0) -+; CHECK-NEXT: bl __gnu_h2f_ieee -+; CHECK-NEXT: nop -+; CHECK-NEXT: fcmpu cr0, f1, f1 -+; CHECK-NEXT: bun cr0, .LBB1_1 -+; CHECK-NEXT: .LBB1_3: # %bb9 -+; CHECK-NEXT: addi r1, r1, 48 -+; CHECK-NEXT: ld r0, 16(r1) -+; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -+; CHECK-NEXT: mtlr r0 -+; CHECK-NEXT: blr -+; -+; CHECK-P9-LABEL: julia__hypot_17: -+; CHECK-P9: # %bb.0: # %bb -+; CHECK-P9-NEXT: li r3, 3 -+; CHECK-P9-NEXT: mtctr r3 -+; CHECK-P9-NEXT: li r3, 0 -+; CHECK-P9-NEXT: .p2align 5 -+; CHECK-P9-NEXT: .LBB1_1: # %bb1 -+; CHECK-P9-NEXT: # -+; CHECK-P9-NEXT: bdzlr -+; CHECK-P9-NEXT: # %bb.2: # %bb3 -+; CHECK-P9-NEXT: # -+; CHECK-P9-NEXT: lxsihzx f0, 0, r3 -+; CHECK-P9-NEXT: xscvhpdp f0, f0 -+; CHECK-P9-NEXT: fcmpu cr0, f0, f0 -+; CHECK-P9-NEXT: bun cr0, .LBB1_1 -+; CHECK-P9-NEXT: # %bb.3: # %bb9 -+; CHECK-P9-NEXT: blr -+bb: -+ br label %bb1 -+ -+bb1: ; preds = %bb3, %bb -+ %i = phi i64 [ %i4, %bb3 ], [ 2, %bb ] -+ %i2 = icmp eq i64 %i, 4 -+ br i1 %i2, label %bb9, label %bb3 -+ -+bb3: ; preds = %bb1 -+ %i4 = add nuw nsw i64 %i, 1 -+ %i5 = load half, half* null, align 2 -+ %i6 = fpext half %i5 to float -+ %i7 = fcmp uno float %i6, 0.000000e+00 -+ %i8 = or i1 %i7, false -+ br i1 %i8, label %bb1, label %bb9 -+ -+bb9: ; preds = %bb3, %bb1 -+ ret void -+} -+ -+define void @func_48786() #0 { -+; CHECK-LABEL: func_48786: -+; CHECK: # %bb.0: # %bb -+; CHECK-NEXT: mfocrf r12, 32 -+; CHECK-NEXT: mflr r0 -+; CHECK-NEXT: std r0, 16(r1) -+; CHECK-NEXT: stw r12, 8(r1) -+; CHECK-NEXT: stdu r1, -48(r1) -+; CHECK-NEXT: ld r3, 0(r3) -+; CHECK-NEXT: std r30, 32(r1) # 8-byte Folded Spill -+; CHECK-NEXT: # implicit-def: $x30 -+; CHECK-NEXT: cmpdi r3, 0 -+; CHECK-NEXT: crnot 4*cr2+lt, eq -+; CHECK-NEXT: bc 12, 4*cr5+lt, .LBB2_3 -+; CHECK-NEXT: .p2align 4 -+; CHECK-NEXT: .LBB2_1: # %bb4 -+; CHECK-NEXT: lhz r3, 0(r3) -+; CHECK-NEXT: bl __gnu_h2f_ieee -+; CHECK-NEXT: nop -+; CHECK-NEXT: bc 4, 4*cr2+lt, .LBB2_6 -+; CHECK-NEXT: # %bb.2: # %bb8 -+; CHECK-NEXT: bl __gnu_f2h_ieee -+; CHECK-NEXT: nop -+; CHECK-NEXT: sth r3, 0(0) -+; CHECK-NEXT: .LBB2_3: # %bb10 -+; CHECK-NEXT: # -+; CHECK-NEXT: cmpldi r30, 0 -+; CHECK-NEXT: beq cr0, .LBB2_5 -+; CHECK-NEXT: # %bb.4: # %bb12 -+; CHECK-NEXT: # -+; CHECK-NEXT: addi r30, r30, 1 -+; CHECK-NEXT: bc 4, 4*cr5+lt, .LBB2_1 -+; CHECK-NEXT: b .LBB2_3 -+; CHECK-NEXT: .LBB2_5: # %bb14 -+; CHECK-NEXT: ld r30, 32(r1) # 8-byte Folded Reload -+; CHECK-NEXT: addi r1, r1, 48 -+; CHECK-NEXT: ld r0, 16(r1) -+; CHECK-NEXT: lwz r12, 8(r1) -+; CHECK-NEXT: mtocrf 32, r12 -+; CHECK-NEXT: mtlr r0 -+; CHECK-NEXT: blr -+; CHECK-NEXT: .LBB2_6: # %bb15 -+; -+; CHECK-P9-LABEL: func_48786: -+; CHECK-P9: # %bb.0: # %bb -+; CHECK-P9-NEXT: ld r3, 0(r3) -+; CHECK-P9-NEXT: cmpdi r3, 0 -+; CHECK-P9-NEXT: mtctr r3 -+; CHECK-P9-NEXT: li r3, 0 -+; CHECK-P9-NEXT: crnot 4*cr5+lt, eq -+; CHECK-P9-NEXT: b .LBB2_2 -+; CHECK-P9-NEXT: .p2align 5 -+; CHECK-P9-NEXT: .LBB2_1: # %bb10 -+; CHECK-P9-NEXT: # -+; CHECK-P9-NEXT: bdzlr -+; CHECK-P9-NEXT: .LBB2_2: # %bb2 -+; CHECK-P9-NEXT: # -+; CHECK-P9-NEXT: bc 12, 4*cr5+lt, .LBB2_1 -+; CHECK-P9-NEXT: # %bb.3: # %bb4 -+; CHECK-P9-NEXT: # -+; CHECK-P9-NEXT: lxsihzx f0, 0, r3 -+; CHECK-P9-NEXT: xscvhpdp f0, f0 -+; CHECK-P9-NEXT: bc 4, 4*cr5+lt, .LBB2_5 -+; CHECK-P9-NEXT: # %bb.4: # %bb8 -+; CHECK-P9-NEXT: # -+; CHECK-P9-NEXT: xscvdphp f0, f0 -+; CHECK-P9-NEXT: stxsihx f0, 0, r3 -+; CHECK-P9-NEXT: b .LBB2_1 -+; CHECK-P9-NEXT: .LBB2_5: # %bb15 -+bb: -+ %i = load i64, i64 addrspace(11)* undef, align 8 -+ %i1 = load i64, i64 addrspace(11)* undef, align 8 -+ br label %bb2 -+ -+bb2: ; preds = %bb12, %bb -+ %i3 = phi i64 [ undef, %bb ], [ %i13, %bb12 ] -+ br i1 undef, label %bb10, label %bb4 -+ -+bb4: ; preds = %bb2 -+ switch i32 undef, label %bb9 [ -+ i32 1426063360, label %bb5 -+ i32 1275068416, label %bb5 -+ ] -+ -+bb5: ; preds = %bb4, %bb4 -+ %i6 = load half, half addrspace(13)* undef, align 2 -+ %i7 = icmp ult i64 0, %i1 -+ br i1 %i7, label %bb8, label %bb15 -+ -+bb8: ; preds = %bb5 -+ store half %i6, half addrspace(13)* null, align 2 -+ br label %bb10 -+ -+bb9: ; preds = %bb4 -+ unreachable -+ -+bb10: ; preds = %bb8, %bb2 -+ %i11 = icmp eq i64 %i3, 0 -+ br i1 %i11, label %bb14, label %bb12 -+ -+bb12: ; preds = %bb10 -+ %i13 = add i64 %i3, 1 -+ br label %bb2 -+ -+bb14: ; preds = %bb10 -+ ret void -+ -+bb15: ; preds = %bb5 -+ unreachable -+} -+ -+define void @func_48785(half %arg) #0 { -+; CHECK-LABEL: func_48785: -+; CHECK: # %bb.0: # %bb -+; CHECK-NEXT: mflr r0 -+; CHECK-NEXT: std r29, -32(r1) # 8-byte Folded Spill -+; CHECK-NEXT: std r30, -24(r1) # 8-byte Folded Spill -+; CHECK-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill -+; CHECK-NEXT: std r0, 16(r1) -+; CHECK-NEXT: stdu r1, -64(r1) -+; CHECK-NEXT: fmr f31, f1 -+; CHECK-NEXT: li r30, 0 -+; CHECK-NEXT: .p2align 5 -+; CHECK-NEXT: .LBB3_1: # %bb1 -+; CHECK-NEXT: # -+; CHECK-NEXT: fmr f1, f31 -+; CHECK-NEXT: sldi r29, r30, 1 -+; CHECK-NEXT: bl __gnu_f2h_ieee -+; CHECK-NEXT: nop -+; CHECK-NEXT: addi r30, r30, 12 -+; CHECK-NEXT: sth r3, 0(r29) -+; CHECK-NEXT: cmpldi r30, 0 -+; CHECK-NEXT: bne+ cr0, .LBB3_1 -+; CHECK-NEXT: # %bb.2: # %bb5 -+; -+; CHECK-P9-LABEL: func_48785: -+; CHECK-P9: # %bb.0: # %bb -+; CHECK-P9-NEXT: li r3, 1 -+; CHECK-P9-NEXT: rldic r3, r3, 62, 1 -+; CHECK-P9-NEXT: mtctr r3 -+; CHECK-P9-NEXT: li r3, 0 -+; CHECK-P9-NEXT: .p2align 4 -+; CHECK-P9-NEXT: .LBB3_1: # %bb1 -+; CHECK-P9-NEXT: # -+; CHECK-P9-NEXT: xscvdphp f0, f1 -+; CHECK-P9-NEXT: stxsihx f0, 0, r3 -+; CHECK-P9-NEXT: addi r3, r3, 24 -+; CHECK-P9-NEXT: bdnz .LBB3_1 -+; CHECK-P9-NEXT: # %bb.2: # %bb5 -+bb: -+ br label %bb1 -+ -+bb1: ; preds = %bb1, %bb -+ %i = phi i64 [ 0, %bb ], [ %i3, %bb1 ] -+ %i2 = getelementptr inbounds half, half addrspace(13)* null, i64 %i -+ store half %arg, half addrspace(13)* %i2, align 2 -+ %i3 = add i64 %i, 12 -+ %i4 = icmp eq i64 %i3, 0 -+ br i1 %i4, label %bb5, label %bb1 -+ -+bb5: ; preds = %bb1 -+ unreachable -+} - attributes #0 = { nounwind } --- -2.30.0 - diff --git a/deps/patches/llvm-11-D96283-dagcombine-half.patch b/deps/patches/llvm-11-D96283-dagcombine-half.patch deleted file mode 100644 index 6c0aa9cf414d0..0000000000000 --- a/deps/patches/llvm-11-D96283-dagcombine-half.patch +++ /dev/null @@ -1,175 +0,0 @@ -From a5222aa0858a42660629c410a5b669dee16a4359 Mon Sep 17 00:00:00 2001 -From: Nemanja Ivanovic -Date: Tue, 9 Feb 2021 06:33:48 -0600 -Subject: [PATCH] [DAGCombine] Do not remove masking argument to FP16_TO_FP for - some targets - -As of commit 284f2bffc9bc5, the DAG Combiner gets rid of the masking of the -input to this node if the mask only keeps the bottom 16 bits. This is because -the underlying library function does not use the high order bits. However, on -PowerPC's ELFv2 ABI, it is the caller that is responsible for clearing the bits -from the register. Therefore, the library implementation of __gnu_h2f_ieee will -return an incorrect result if the bits aren't cleared. - -This combine is desired for ARM (and possibly other targets) so this patch adds -a query to Target Lowering to check if this zeroing needs to be kept. - -Fixes: https://bugs.llvm.org/show_bug.cgi?id=49092 - -Differential revision: https://reviews.llvm.org/D96283 ---- - llvm/include/llvm/CodeGen/TargetLowering.h | 4 ++ - llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +- - llvm/lib/Target/PowerPC/PPCISelLowering.h | 3 ++ - .../PowerPC/handle-f16-storage-type.ll | 4 ++ - llvm/test/CodeGen/PowerPC/pr48519.ll | 2 + - llvm/test/CodeGen/PowerPC/pr49092.ll | 39 +++++++++++++++++++ - 6 files changed, 53 insertions(+), 1 deletion(-) - create mode 100644 llvm/test/CodeGen/PowerPC/pr49092.ll - -diff --git llvm/include/llvm/CodeGen/TargetLowering.h llvm/include/llvm/CodeGen/TargetLowering.h -index a8abd2973587..6c1cb1c54d05 100644 ---- llvm/include/llvm/CodeGen/TargetLowering.h -+++ llvm/include/llvm/CodeGen/TargetLowering.h -@@ -2789,6 +2789,10 @@ public: - return false; - } - -+ /// Does this target require the clearing of high-order bits in a register -+ /// passed to the fp16 to fp conversion library function. -+ virtual bool shouldKeepZExtForFP16Conv() const { return false; } -+ - //===--------------------------------------------------------------------===// - // Runtime Library hooks - // -diff --git llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp -index a17ac6fe06a2..762f58427649 100644 ---- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp -+++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp -@@ -21182,7 +21182,7 @@ SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) { - SDValue N0 = N->getOperand(0); - - // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) -- if (N0->getOpcode() == ISD::AND) { -+ if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) { - ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1)); - if (AndConst && AndConst->getAPIntValue() == 0xffff) { - return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0), -diff --git llvm/lib/Target/PowerPC/PPCISelLowering.h llvm/lib/Target/PowerPC/PPCISelLowering.h -index f1f3cb9e31d9..61e97e6b82d9 100644 ---- llvm/lib/Target/PowerPC/PPCISelLowering.h -+++ llvm/lib/Target/PowerPC/PPCISelLowering.h -@@ -987,6 +987,9 @@ namespace llvm { - shouldExpandBuildVectorWithShuffles(EVT VT, - unsigned DefinedValues) const override; - -+ // Keep the zero-extensions for arguments to libcalls. -+ bool shouldKeepZExtForFP16Conv() const override { return true; } -+ - /// createFastISel - This method returns a target-specific FastISel object, - /// or null if the target does not support "fast" instruction selection. - FastISel *createFastISel(FunctionLoweringInfo &FuncInfo, -diff --git llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll -index 9977b6b33560..ab19afa2beb5 100644 ---- llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll -+++ llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll -@@ -1156,6 +1156,7 @@ define float @test_sitofp_fadd_i32(i32 %a, half* %b) #0 { - ; P8-NEXT: xscvsxdsp f1, f0 - ; P8-NEXT: bl __gnu_f2h_ieee - ; P8-NEXT: nop -+; P8-NEXT: clrldi r3, r3, 48 - ; P8-NEXT: bl __gnu_h2f_ieee - ; P8-NEXT: nop - ; P8-NEXT: xsaddsp f1, f31, f1 -@@ -1175,6 +1176,7 @@ define float @test_sitofp_fadd_i32(i32 %a, half* %b) #0 { - ; CHECK-NEXT: xscvhpdp f0, f0 - ; CHECK-NEXT: xscvdphp f1, f1 - ; CHECK-NEXT: mffprwz r3, f1 -+; CHECK-NEXT: clrlwi r3, r3, 16 - ; CHECK-NEXT: mtfprwz f1, r3 - ; CHECK-NEXT: xscvhpdp f1, f1 - ; CHECK-NEXT: xsaddsp f1, f0, f1 -@@ -1225,6 +1227,7 @@ define half @PR40273(half) #0 { - ; P8-NEXT: stdu r1, -32(r1) - ; P8-NEXT: bl __gnu_f2h_ieee - ; P8-NEXT: nop -+; P8-NEXT: clrldi r3, r3, 48 - ; P8-NEXT: bl __gnu_h2f_ieee - ; P8-NEXT: nop - ; P8-NEXT: xxlxor f0, f0, f0 -@@ -1245,6 +1248,7 @@ define half @PR40273(half) #0 { - ; CHECK-NEXT: xscvdphp f0, f1 - ; CHECK-NEXT: xxlxor f1, f1, f1 - ; CHECK-NEXT: mffprwz r3, f0 -+; CHECK-NEXT: clrlwi r3, r3, 16 - ; CHECK-NEXT: mtfprwz f0, r3 - ; CHECK-NEXT: xscvhpdp f0, f0 - ; CHECK-NEXT: fcmpu cr0, f0, f1 -diff --git llvm/test/CodeGen/PowerPC/pr48519.ll llvm/test/CodeGen/PowerPC/pr48519.ll -index 50970cb185d8..035cc49b93e6 100644 ---- llvm/test/CodeGen/PowerPC/pr48519.ll -+++ llvm/test/CodeGen/PowerPC/pr48519.ll -@@ -22,6 +22,7 @@ define void @julia__typed_vcat_20() #0 { - ; CHECK-NEXT: xscvsxdsp f1, f0 - ; CHECK-NEXT: bl __gnu_f2h_ieee - ; CHECK-NEXT: nop -+; CHECK-NEXT: clrldi r3, r3, 48 - ; CHECK-NEXT: bl __gnu_h2f_ieee - ; CHECK-NEXT: nop - ; CHECK-NEXT: addi r30, r30, -1 -@@ -46,6 +47,7 @@ define void @julia__typed_vcat_20() #0 { - ; CHECK-P9-NEXT: xscvsxdsp f0, f0 - ; CHECK-P9-NEXT: xscvdphp f0, f0 - ; CHECK-P9-NEXT: mffprwz r3, f0 -+; CHECK-P9-NEXT: clrlwi r3, r3, 16 - ; CHECK-P9-NEXT: mtfprwz f0, r3 - ; CHECK-P9-NEXT: li r3, 0 - ; CHECK-P9-NEXT: xscvhpdp f0, f0 -diff --git llvm/test/CodeGen/PowerPC/pr49092.ll llvm/test/CodeGen/PowerPC/pr49092.ll -new file mode 100644 -index 000000000000..2fce58418515 ---- /dev/null -+++ llvm/test/CodeGen/PowerPC/pr49092.ll -@@ -0,0 +1,39 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ -+; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s -+; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ -+; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \ -+; RUN: -check-prefix=CHECK-P9 -+ -+define dso_local half @test2(i64 %a, i64 %b) local_unnamed_addr #0 { -+; CHECK-LABEL: test2: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: mflr r0 -+; CHECK-NEXT: std r0, 16(r1) -+; CHECK-NEXT: stdu r1, -32(r1) -+; CHECK-NEXT: add r3, r4, r3 -+; CHECK-NEXT: addi r3, r3, 11 -+; CHECK-NEXT: clrlwi r3, r3, 16 -+; CHECK-NEXT: bl __gnu_h2f_ieee -+; CHECK-NEXT: nop -+; CHECK-NEXT: addi r1, r1, 32 -+; CHECK-NEXT: ld r0, 16(r1) -+; CHECK-NEXT: mtlr r0 -+; CHECK-NEXT: blr -+; -+; CHECK-P9-LABEL: test2: -+; CHECK-P9: # %bb.0: # %entry -+; CHECK-P9-NEXT: add r3, r4, r3 -+; CHECK-P9-NEXT: addi r3, r3, 11 -+; CHECK-P9-NEXT: clrlwi r3, r3, 16 -+; CHECK-P9-NEXT: mtfprwz f0, r3 -+; CHECK-P9-NEXT: xscvhpdp f1, f0 -+; CHECK-P9-NEXT: blr -+entry: -+ %add = add i64 %b, %a -+ %0 = trunc i64 %add to i16 -+ %conv = add i16 %0, 11 -+ %call = bitcast i16 %conv to half -+ ret half %call -+} -+attributes #0 = { nounwind } --- -2.30.0 - diff --git a/deps/patches/llvm-11-D97435-AArch64-movaddrreg.patch b/deps/patches/llvm-11-D97435-AArch64-movaddrreg.patch deleted file mode 100644 index d33658f493711..0000000000000 --- a/deps/patches/llvm-11-D97435-AArch64-movaddrreg.patch +++ /dev/null @@ -1,484 +0,0 @@ -From 4103660e5362b2ab73256740c28e4f0e042f96d4 Mon Sep 17 00:00:00 2001 -From: Keno Fischer -Date: Mon, 1 Mar 2021 16:37:03 -0500 -Subject: [PATCH 1/4] [Aarch64] Correct register class for pseudo instructions - -This constrains the Mov* and similar pseudo instruction to take -GPR64common register classes rather than GPR64. GPR64 includs XZR -which is invalid here, because this pseudo instructions expands -into an adrp/add pair sharing a destination register. XZR is invalid -on add and attempting to encode it will instead increment the stack -pointer causing crashes (downstream report at [1]). The test case -there reproduces on LLVM11, but I do not have a test case that -reaches this code path on main, since it is being masked by -improved dead code elimination introduced in D91513. Nevertheless, -this seems like a good thing to fix in case there are other cases -that dead code elimination doesn't clean up (e.g. if `optnone` is -used and the optimization is skipped). - -I think it would be worth auditing uses of GPR64 in pseudo -instructions to see if there are any similar issues, but I do not -have a high enough view of the backend or knowledge of the -Aarch64 architecture to do this quickly. - -[1] https://github.com/JuliaLang/julia/issues/39818 - -Reviewed By: t.p.northover - -Differential Revision: https://reviews.llvm.org/D97435 ---- - .../AArch64/AArch64ExpandPseudoInsts.cpp | 1 + - llvm/lib/Target/AArch64/AArch64InstrInfo.td | 32 +- - .../GlobalISel/select-blockaddress.mir | 5 +- - .../select-jump-table-brjt-constrain.mir | 2 +- - .../GlobalISel/select-jump-table-brjt.mir | 2 +- - .../CodeGen/AArch64/GlobalISel/select.mir | 312 ------------------ - 6 files changed, 22 insertions(+), 332 deletions(-) - delete mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/select.mir - -diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp -index 9e65ad2e18f9..3497a15a7574 100644 ---- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp -+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp -@@ -844,6 +844,7 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, - case AArch64::MOVaddrEXT: { - // Expand into ADRP + ADD. - Register DstReg = MI.getOperand(0).getReg(); -+ assert(DstReg != AArch64::XZR); - MachineInstrBuilder MIB1 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg) - .add(MI.getOperand(1)); -diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td -index f4a5f639e497..9e69934a97e2 100644 ---- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td -+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td -@@ -630,40 +630,40 @@ let isReMaterializable = 1, isCodeGenOnly = 1 in { - // removed, along with the AArch64Wrapper node. - - let AddedComplexity = 10 in --def LOADgot : Pseudo<(outs GPR64:$dst), (ins i64imm:$addr), -- [(set GPR64:$dst, (AArch64LOADgot tglobaladdr:$addr))]>, -+def LOADgot : Pseudo<(outs GPR64common:$dst), (ins i64imm:$addr), -+ [(set GPR64common:$dst, (AArch64LOADgot tglobaladdr:$addr))]>, - Sched<[WriteLDAdr]>; - - // The MOVaddr instruction should match only when the add is not folded - // into a load or store address. - def MOVaddr -- : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), -- [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi), -+ : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), -+ [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi), - tglobaladdr:$low))]>, - Sched<[WriteAdrAdr]>; - def MOVaddrJT -- : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), -- [(set GPR64:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi), -+ : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), -+ [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi), - tjumptable:$low))]>, - Sched<[WriteAdrAdr]>; - def MOVaddrCP -- : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), -- [(set GPR64:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi), -+ : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), -+ [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi), - tconstpool:$low))]>, - Sched<[WriteAdrAdr]>; - def MOVaddrBA -- : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), -- [(set GPR64:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi), -+ : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), -+ [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi), - tblockaddress:$low))]>, - Sched<[WriteAdrAdr]>; - def MOVaddrTLS -- : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), -- [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi), -+ : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), -+ [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi), - tglobaltlsaddr:$low))]>, - Sched<[WriteAdrAdr]>; - def MOVaddrEXT -- : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), -- [(set GPR64:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi), -+ : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), -+ [(set GPR64common:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi), - texternalsym:$low))]>, - Sched<[WriteAdrAdr]>; - // Normally AArch64addlow either gets folded into a following ldr/str, -@@ -671,8 +671,8 @@ def MOVaddrEXT - // might appear without either of them, so allow lowering it into a plain - // add. - def ADDlowTLS -- : Pseudo<(outs GPR64:$dst), (ins GPR64:$src, i64imm:$low), -- [(set GPR64:$dst, (AArch64addlow GPR64:$src, -+ : Pseudo<(outs GPR64sp:$dst), (ins GPR64sp:$src, i64imm:$low), -+ [(set GPR64sp:$dst, (AArch64addlow GPR64sp:$src, - tglobaltlsaddr:$low))]>, - Sched<[WriteAdr]>; - -diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir -index bd5ee80d5841..10732660d34a 100644 ---- a/llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir -+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir -@@ -30,9 +30,10 @@ registers: - body: | - ; CHECK-LABEL: name: test_blockaddress - ; CHECK: bb.0 (%ir-block.0): -- ; CHECK: [[MOVaddrBA:%[0-9]+]]:gpr64 = MOVaddrBA target-flags(aarch64-page) blockaddress(@test_blockaddress, %ir-block.block), target-flags(aarch64-pageoff, aarch64-nc) blockaddress(@test_blockaddress, %ir-block.block) -+ ; CHECK: [[MOVaddrBA:%[0-9]+]]:gpr64common = MOVaddrBA target-flags(aarch64-page) blockaddress(@test_blockaddress, %ir-block.block), target-flags(aarch64-pageoff, aarch64-nc) blockaddress(@test_blockaddress, %ir-block.block) - ; CHECK: [[MOVaddr:%[0-9]+]]:gpr64common = MOVaddr target-flags(aarch64-page) @addr, target-flags(aarch64-pageoff, aarch64-nc) @addr -- ; CHECK: STRXui [[MOVaddrBA]], [[MOVaddr]], 0 :: (store 8 into @addr) -+ ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY [[MOVaddrBA]] -+ ; CHECK: STRXui [[COPY]], [[MOVaddr]], 0 :: (store 8 into @addr) - ; CHECK: BR [[MOVaddrBA]] - ; CHECK: bb.1.block (address-taken): - ; CHECK: RET_ReallyLR -diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt-constrain.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt-constrain.mir -index 082bf43061da..6f3540a0768b 100644 ---- a/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt-constrain.mir -+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt-constrain.mir -@@ -30,7 +30,7 @@ body: | - ; CHECK: Bcc 8, %bb.3, implicit $nzcv - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000) -- ; CHECK: [[MOVaddrJT:%[0-9]+]]:gpr64 = MOVaddrJT target-flags(aarch64-page) %jump-table.0, target-flags(aarch64-pageoff, aarch64-nc) %jump-table.0 -+ ; CHECK: [[MOVaddrJT:%[0-9]+]]:gpr64common = MOVaddrJT target-flags(aarch64-page) %jump-table.0, target-flags(aarch64-pageoff, aarch64-nc) %jump-table.0 - ; CHECK: early-clobber %6:gpr64, early-clobber %7:gpr64sp = JumpTableDest32 [[MOVaddrJT]], [[SUBREG_TO_REG]], %jump-table.0 - ; CHECK: BR %6 - ; CHECK: bb.2: -diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt.mir -index ae7d90769f99..80631eadb5e8 100644 ---- a/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt.mir -+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt.mir -@@ -65,7 +65,7 @@ body: | - ; CHECK: bb.1.entry: - ; CHECK: successors: %bb.3(0x2aaaaaab), %bb.4(0x2aaaaaab), %bb.2(0x2aaaaaab) - ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY $wzr -- ; CHECK: [[MOVaddrJT:%[0-9]+]]:gpr64 = MOVaddrJT target-flags(aarch64-page) %jump-table.0, target-flags(aarch64-pageoff, aarch64-nc) %jump-table.0 -+ ; CHECK: [[MOVaddrJT:%[0-9]+]]:gpr64common = MOVaddrJT target-flags(aarch64-page) %jump-table.0, target-flags(aarch64-pageoff, aarch64-nc) %jump-table.0 - ; CHECK: early-clobber %18:gpr64, early-clobber %19:gpr64sp = JumpTableDest32 [[MOVaddrJT]], [[SUBREG_TO_REG]], %jump-table.0 - ; CHECK: BR %18 - ; CHECK: bb.2.sw.bb: -diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select.mir -deleted file mode 100644 -index 112aee8d552c..000000000000 ---- a/llvm/test/CodeGen/AArch64/GlobalISel/select.mir -+++ /dev/null -@@ -1,312 +0,0 @@ --# RUN: llc -O0 -mtriple=aarch64-apple-ios -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=IOS --# RUN: llc -O0 -mtriple=aarch64-linux-gnu -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=LINUX-DEFAULT --# RUN: llc -O0 -mtriple=aarch64-linux-gnu -relocation-model=pic -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=LINUX-PIC -- ----- | -- target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" -- -- define void @frame_index() { -- %ptr0 = alloca i64 -- ret void -- } -- -- define i8* @ptr_mask(i8* %in) { ret i8* undef } -- -- @var_local = global i8 0 -- define i8* @global_local() { ret i8* undef } -- -- @var_got = external global i8 -- define i8* @global_got() { ret i8* undef } -- -- define void @icmp() { ret void } -- define void @fcmp() { ret void } -- -- define void @phi() { ret void } -- -- define void @select() { ret void } --... -- ----- --# CHECK-LABEL: name: frame_index --name: frame_index --legalized: true --regBankSelected: true -- --# CHECK: registers: --# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } --registers: -- - { id: 0, class: gpr } -- --stack: -- - { id: 0, name: ptr0, offset: 0, size: 8, alignment: 8 } -- --# CHECK: body: --# CHECK: %0:gpr64sp = ADDXri %stack.0.ptr0, 0, 0 --body: | -- bb.0: -- %0(p0) = G_FRAME_INDEX %stack.0.ptr0 -- $x0 = COPY %0(p0) --... -- ----- -- ----- --# CHECK-LABEL: name: ptr_mask --name: ptr_mask --legalized: true --regBankSelected: true -- --# CHECK: body: --# CHECK: %2:gpr64sp = ANDXri %0, 8060 --body: | -- bb.0: -- liveins: $x0 -- %0:gpr(p0) = COPY $x0 -- %const:gpr(s64) = G_CONSTANT i64 -8 -- %1:gpr(p0) = G_PTRMASK %0, %const -- $x0 = COPY %1(p0) --... -- ----- --# Global defined in the same linkage unit so no GOT is needed --# CHECK-LABEL: name: global_local --name: global_local --legalized: true --regBankSelected: true --registers: -- - { id: 0, class: gpr } -- --# CHECK: body: --# IOS: %0:gpr64 = MOVaddr target-flags(aarch64-page) @var_local, target-flags(aarch64-pageoff, aarch64-nc) @var_local --# LINUX-DEFAULT: %0:gpr64 = MOVaddr target-flags(aarch64-page) @var_local, target-flags(aarch64-pageoff, aarch64-nc) @var_local --# LINUX-PIC: %0:gpr64 = LOADgot target-flags(aarch64-got) @var_local --body: | -- bb.0: -- %0(p0) = G_GLOBAL_VALUE @var_local -- $x0 = COPY %0(p0) --... -- ----- --# CHECK-LABEL: name: global_got --name: global_got --legalized: true --regBankSelected: true --registers: -- - { id: 0, class: gpr } -- --# CHECK: body: --# IOS: %0:gpr64 = LOADgot target-flags(aarch64-got) @var_got --# LINUX-DEFAULT: %0:gpr64 = MOVaddr target-flags(aarch64-page) @var_got, target-flags(aarch64-pageoff, aarch64-nc) @var_got --# LINUX-PIC: %0:gpr64 = LOADgot target-flags(aarch64-got) @var_got --body: | -- bb.0: -- %0(p0) = G_GLOBAL_VALUE @var_got -- $x0 = COPY %0(p0) --... -- ----- --# CHECK-LABEL: name: icmp --name: icmp --legalized: true --regBankSelected: true -- --# CHECK: registers: --# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } --# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } --# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } --# CHECK-NEXT: - { id: 3, class: gpr32, preferred-register: '' } --# CHECK-NEXT: - { id: 4, class: gpr64, preferred-register: '' } --# CHECK-NEXT: - { id: 5, class: gpr32, preferred-register: '' } --registers: -- - { id: 0, class: gpr } -- - { id: 1, class: gpr } -- - { id: 2, class: gpr } -- - { id: 3, class: gpr } -- - { id: 4, class: gpr } -- - { id: 5, class: gpr } -- - { id: 6, class: gpr } -- - { id: 7, class: gpr } -- - { id: 8, class: gpr } -- - { id: 9, class: gpr } -- - { id: 10, class: gpr } -- - { id: 11, class: gpr } -- --# CHECK: body: --# CHECK: SUBSWrr %0, %0, implicit-def $nzcv --# CHECK: %1:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv -- --# CHECK: SUBSXrr %2, %2, implicit-def $nzcv --# CHECK: %3:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv -- --# CHECK: SUBSXrr %4, %4, implicit-def $nzcv --# CHECK: %5:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv -- --body: | -- bb.0: -- liveins: $w0, $x0 -- -- %0(s32) = COPY $w0 -- %1(s32) = G_ICMP intpred(eq), %0, %0 -- %6(s1) = G_TRUNC %1(s32) -- %9(s32) = G_ANYEXT %6 -- $w0 = COPY %9(s32) -- -- %2(s64) = COPY $x0 -- %3(s32) = G_ICMP intpred(uge), %2, %2 -- %7(s1) = G_TRUNC %3(s32) -- %10(s32) = G_ANYEXT %7 -- $w0 = COPY %10(s32) -- -- %4(p0) = COPY $x0 -- %5(s32) = G_ICMP intpred(ne), %4, %4 -- %8(s1) = G_TRUNC %5(s32) -- %11(s32) = G_ANYEXT %8 -- $w0 = COPY %11(s32) --... -- ----- --# CHECK-LABEL: name: fcmp --name: fcmp --legalized: true --regBankSelected: true -- --# CHECK: registers: --# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } --# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } --# CHECK-NEXT: - { id: 2, class: fpr64, preferred-register: '' } --# CHECK-NEXT: - { id: 3, class: gpr32, preferred-register: '' } --# CHECK-NEXT: - { id: 4, class: gpr32, preferred-register: '' } --# CHECK-NEXT: - { id: 5, class: gpr32, preferred-register: '' } --registers: -- - { id: 0, class: fpr } -- - { id: 1, class: gpr } -- - { id: 2, class: fpr } -- - { id: 3, class: gpr } -- - { id: 4, class: gpr } -- - { id: 5, class: gpr } -- - { id: 6, class: gpr } -- - { id: 7, class: gpr } -- --# CHECK: body: --# CHECK: FCMPSrr %0, %0, implicit-def $nzcv --# CHECK: [[TST_MI:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 5, implicit $nzcv --# CHECK: [[TST_GT:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv --# CHECK: %1:gpr32 = ORRWrr [[TST_MI]], [[TST_GT]] -- --# CHECK: FCMPDrr %2, %2, implicit-def $nzcv --# CHECK: %3:gpr32 = CSINCWr $wzr, $wzr, 4, implicit $nzcv -- --body: | -- bb.0: -- liveins: $w0, $x0 -- -- %0(s32) = COPY $s0 -- %1(s32) = G_FCMP floatpred(one), %0, %0 -- %4(s1) = G_TRUNC %1(s32) -- %6(s32) = G_ANYEXT %4 -- $w0 = COPY %6(s32) -- -- %2(s64) = COPY $d0 -- %3(s32) = G_FCMP floatpred(uge), %2, %2 -- %5(s1) = G_TRUNC %3(s32) -- %7(s32) = G_ANYEXT %5 -- $w0 = COPY %7(s32) -- --... -- ----- --# CHECK-LABEL: name: phi --name: phi --legalized: true --regBankSelected: true --tracksRegLiveness: true -- --# CHECK: registers: --# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } --# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } --# CHECK-NEXT: - { id: 2, class: fpr32, preferred-register: '' } --registers: -- - { id: 0, class: fpr } -- - { id: 1, class: gpr } -- - { id: 2, class: fpr } -- --# CHECK: body: --# CHECK: bb.1: --# CHECK: %2:fpr32 = PHI %0, %bb.0, %2, %bb.1 -- --body: | -- bb.0: -- liveins: $s0, $w0 -- successors: %bb.1 -- %0(s32) = COPY $s0 -- %3:gpr(s32) = COPY $w0 -- %1(s1) = G_TRUNC %3 -- -- bb.1: -- successors: %bb.1, %bb.2 -- %2(s32) = PHI %0, %bb.0, %2, %bb.1 -- G_BRCOND %1, %bb.1 -- -- bb.2: -- $s0 = COPY %2 -- RET_ReallyLR implicit $s0 --... -- ----- --# CHECK-LABEL: name: select --name: select --legalized: true --regBankSelected: true --tracksRegLiveness: true -- --# CHECK: registers: --# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } --# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } --# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } --# CHECK-NEXT: - { id: 3, class: gpr32, preferred-register: '' } --# CHECK-NEXT: - { id: 4, class: gpr64, preferred-register: '' } --# CHECK-NEXT: - { id: 5, class: gpr64, preferred-register: '' } --# CHECK-NEXT: - { id: 6, class: gpr64, preferred-register: '' } --# CHECK-NEXT: - { id: 7, class: gpr64, preferred-register: '' } --# CHECK-NEXT: - { id: 8, class: gpr64, preferred-register: '' } --# CHECK-NEXT: - { id: 9, class: gpr64, preferred-register: '' } --registers: -- - { id: 0, class: gpr } -- - { id: 1, class: gpr } -- - { id: 2, class: gpr } -- - { id: 3, class: gpr } -- - { id: 4, class: gpr } -- - { id: 5, class: gpr } -- - { id: 6, class: gpr } -- - { id: 7, class: gpr } -- - { id: 8, class: gpr } -- - { id: 9, class: gpr } -- --# CHECK: body: --# CHECK: $wzr = ANDSWri %10, 0, implicit-def $nzcv --# CHECK: %3:gpr32 = CSELWr %1, %2, 1, implicit $nzcv --# CHECK: $wzr = ANDSWri %10, 0, implicit-def $nzcv --# CHECK: %6:gpr64 = CSELXr %4, %5, 1, implicit $nzcv --# CHECK: $wzr = ANDSWri %10, 0, implicit-def $nzcv --# CHECK: %9:gpr64 = CSELXr %7, %8, 1, implicit $nzcv --body: | -- bb.0: -- liveins: $w0, $w1, $w2 -- %10:gpr(s32) = COPY $w0 -- %0(s1) = G_TRUNC %10 -- -- %1(s32) = COPY $w1 -- %2(s32) = COPY $w2 -- %3(s32) = G_SELECT %0, %1, %2 -- $w0 = COPY %3(s32) -- -- %4(s64) = COPY $x0 -- %5(s64) = COPY $x1 -- %6(s64) = G_SELECT %0, %4, %5 -- $x0 = COPY %6(s64) -- -- %7(p0) = COPY $x0 -- %8(p0) = COPY $x1 -- %9(p0) = G_SELECT %0, %7, %8 -- $x0 = COPY %9(p0) --... --- -2.25.1 - diff --git a/deps/patches/llvm-11-D97571-AArch64-loh.patch b/deps/patches/llvm-11-D97571-AArch64-loh.patch deleted file mode 100644 index 352a9504652a2..0000000000000 --- a/deps/patches/llvm-11-D97571-AArch64-loh.patch +++ /dev/null @@ -1,101 +0,0 @@ -From 84eff6b2defb7f2d847d5bac165d6a44472b9ddf Mon Sep 17 00:00:00 2001 -From: Keno Fischer -Date: Mon, 1 Mar 2021 16:38:19 -0500 -Subject: [PATCH 2/4] [AArch64] Fix emitting an AdrpAddLdr LOH when there's a - potential clobber of the def of the adrp before the ldr. - -Apparently this pass used to have liveness analysis but it was removed for scompile time reasons. This workaround prevents the LOH from being emitted unless the ADD and LDR are adjacent. - -Fixes https://github.com/JuliaLang/julia/issues/39820 - -Reviewed By: loladiro, qcolombet - -Differential Revision: https://reviews.llvm.org/D97571 ---- - llvm/lib/Target/AArch64/AArch64CollectLOH.cpp | 26 +++++++++++++++- - .../AArch64/loh-adrp-add-ldr-clobber.mir | 30 +++++++++++++++++++ - 2 files changed, 55 insertions(+), 1 deletion(-) - create mode 100644 llvm/test/CodeGen/AArch64/loh-adrp-add-ldr-clobber.mir - -diff --git a/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp b/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp -index efdb1131abc9..ac243347b24d 100644 ---- a/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp -+++ b/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp -@@ -419,13 +419,37 @@ static void handleADRP(const MachineInstr &MI, AArch64FunctionInfo &AFI, - ++NumADRPToLDR; - } - break; -- case MCLOH_AdrpAddLdr: -+ case MCLOH_AdrpAddLdr: { -+ // There is a possibility that the linker may try to rewrite: -+ // adrp x0, @sym@PAGE -+ // add x1, x0, @sym@PAGEOFF -+ // [x0 = some other def] -+ // ldr x2, [x1] -+ // ...into... -+ // adrp x0, @sym -+ // nop -+ // [x0 = some other def] -+ // ldr x2, [x0] -+ // ...if the offset to the symbol won't fit within a literal load. -+ // This causes the load to use the result of the adrp, which in this -+ // case has already been clobbered. -+ // FIXME: Implement proper liveness tracking for all registers. For now, -+ // don't emit the LOH if there are any instructions between the add and -+ // the ldr. -+ MachineInstr *AddMI = const_cast(Info.MI1); -+ const MachineInstr *LdrMI = Info.MI0; -+ auto AddIt = MachineBasicBlock::iterator(AddMI); -+ auto EndIt = AddMI->getParent()->end(); -+ if (AddMI->getIterator() == EndIt || LdrMI != &*next_nodbg(AddIt, EndIt)) -+ break; -+ - LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpAddLdr:\n" - << '\t' << MI << '\t' << *Info.MI1 << '\t' - << *Info.MI0); - AFI.addLOHDirective(MCLOH_AdrpAddLdr, {&MI, Info.MI1, Info.MI0}); - ++NumADDToLDR; - break; -+ } - case MCLOH_AdrpAddStr: - if (Info.MI1 != nullptr) { - LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpAddStr:\n" -diff --git a/llvm/test/CodeGen/AArch64/loh-adrp-add-ldr-clobber.mir b/llvm/test/CodeGen/AArch64/loh-adrp-add-ldr-clobber.mir -new file mode 100644 -index 000000000000..e1e893c6383a ---- /dev/null -+++ b/llvm/test/CodeGen/AArch64/loh-adrp-add-ldr-clobber.mir -@@ -0,0 +1,30 @@ -+# RUN: llc -o /dev/null %s -mtriple=aarch64-apple-ios -run-pass=aarch64-collect-loh -debug-only=aarch64-collect-loh 2>&1 | FileCheck %s -+--- | -+ @sym2 = local_unnamed_addr global [10000000 x i32] zeroinitializer, align 8 -+ @sym = local_unnamed_addr global i32 zeroinitializer, align 8 -+ -+ define i32 @main() { -+ ret i32 0 -+ } -+ -+... -+--- -+name: main -+alignment: 4 -+tracksRegLiveness: true -+liveins: -+ - { reg: '$x22', virtual-reg: '' } -+ - { reg: '$x21', virtual-reg: '' } -+body: | -+ bb.0: -+ liveins: $x21, $x22 -+ ; Check we don't emit an loh here because there's a clobbering def of x8 before the ldr. -+ ; CHECK-LABEL: main -+ ; CHECK-NOT: MCLOH_AdrpAddLdr -+ renamable $x8 = ADRP target-flags(aarch64-page) @sym -+ renamable $x9 = ADDXri killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @sym, 0 -+ renamable $x8 = ADDXri killed renamable $x22, 1, 0 -+ $x9 = LDRXui $x9, 0 -+ RET undef $lr -+ -+... --- -2.25.1 - diff --git a/deps/patches/llvm-11-PR48458-X86ISelDAGToDAG.patch b/deps/patches/llvm-11-PR48458-X86ISelDAGToDAG.patch deleted file mode 100644 index 8b7fe974d5409..0000000000000 --- a/deps/patches/llvm-11-PR48458-X86ISelDAGToDAG.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 2c8b03616a3e033b0067ac506e6287970cfd424e Mon Sep 17 00:00:00 2001 -From: Craig Topper -Date: Wed, 9 Dec 2020 10:21:40 -0800 -Subject: [PATCH] [X86] Use APInt::isSignedIntN instead of isIntN for 64-bit - ANDs in X86DAGToDAGISel::IsProfitableToFold - -Pretty sure we meant to be checking signed 32 immediates here -rather than unsigned 32 bit. I suspect I messed this up because -in MathExtras.h we have isIntN and isUIntN so isIntN differs in -signedness depending on whether you're using APInt or plain integers. - -This fixes a case where we didn't fold a constant created -by shrinkAndImmediate. Since shrinkAndImmediate doesn't topologically -sort constants it creates, we can fail to convert the Constant -to a TargetConstant. This leads to very strange behavior later. - -Fixes PR48458. ---- - llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 2 +- - llvm/test/CodeGen/X86/pr48458.ll | 17 +++++++++++++++++ - 2 files changed, 18 insertions(+), 1 deletion(-) - create mode 100644 llvm/test/CodeGen/X86/pr48458.ll - -diff --git llvm/lib/Target/X86/X86ISelDAGToDAG.cpp llvm/lib/Target/X86/X86ISelDAGToDAG.cpp -index 3cd80cb04ab8..f6aaef215432 100644 ---- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp -+++ llvm/lib/Target/X86/X86ISelDAGToDAG.cpp -@@ -611,7 +611,7 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { - // best of both worlds. - if (U->getOpcode() == ISD::AND && - Imm->getAPIntValue().getBitWidth() == 64 && -- Imm->getAPIntValue().isIntN(32)) -+ Imm->getAPIntValue().isSignedIntN(32)) - return false; - - // If this really a zext_inreg that can be represented with a movzx -diff --git llvm/test/CodeGen/X86/pr48458.ll llvm/test/CodeGen/X86/pr48458.ll -new file mode 100644 -index 000000000000..bca355961611 ---- /dev/null -+++ llvm/test/CodeGen/X86/pr48458.ll -@@ -0,0 +1,17 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -+ -+define i1 @foo(i64* %0) { -+; CHECK-LABEL: foo: -+; CHECK: # %bb.0: # %top -+; CHECK-NEXT: movq (%rdi), %rax -+; CHECK-NEXT: andq $-2147483648, %rax # imm = 0x80000000 -+; CHECK-NEXT: sete %al -+; CHECK-NEXT: retq -+top: -+ %1 = load i64, i64* %0, !range !0 -+ %2 = icmp ult i64 %1, 2147483648 -+ ret i1 %2 -+} -+ -+!0 = !{i64 0, i64 10000000000} --- -2.29.2 diff --git a/deps/patches/llvm-11-aarch64-addrspace.patch b/deps/patches/llvm-11-aarch64-addrspace.patch deleted file mode 100644 index f046a5000ede4..0000000000000 --- a/deps/patches/llvm-11-aarch64-addrspace.patch +++ /dev/null @@ -1,31 +0,0 @@ -From a1178fdd072b7addcf56c2f6e2298165263040bb Mon Sep 17 00:00:00 2001 -From: Tim Northover -Date: Thu, 25 Feb 2021 10:13:59 +0000 -Subject: [PATCH 3/4] AArch64: relax address-space assertion in FastISel. - -Some people are using alternative address spaces to track GC data, but -otherwise they behave exactly the same. This is the only place in the backend -we even try to care about it so it's really not achieving anything. ---- - llvm/lib/Target/AArch64/AArch64FastISel.cpp | 5 +---- - 1 file changed, 1 insertion(+), 4 deletions(-) - -diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp -index 0f63f4ca62e5..cf3ebed6ef19 100644 ---- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp -+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp -@@ -526,10 +526,7 @@ unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) { - MVT VT = CEVT.getSimpleVT(); - // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that, - // 'null' pointers need to have a somewhat special treatment. -- if (const auto *CPN = dyn_cast(C)) { -- (void)CPN; -- assert(CPN->getType()->getPointerAddressSpace() == 0 && -- "Unexpected address space"); -+ if (isa(C)) { - assert(VT == MVT::i64 && "Expected 64-bit pointers"); - return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT); - } --- -2.25.1 - diff --git a/deps/patches/llvm-11-ppc-half-ctr.patch b/deps/patches/llvm-11-ppc-half-ctr.patch deleted file mode 100644 index e9a9b9a4d5f86..0000000000000 --- a/deps/patches/llvm-11-ppc-half-ctr.patch +++ /dev/null @@ -1,96 +0,0 @@ -From 79a73d6388790cfec9bd76b1790f0f5551a9df8c Mon Sep 17 00:00:00 2001 -From: Nemanja Ivanovic -Date: Mon, 28 Dec 2020 22:51:51 -0600 -Subject: [PATCH 1/4] [PowerPC] Disable CTR loops containing operations on - half-precision - -On subtargets prior to Power9, conversions to/from half precision -are lowered to libcalls. This makes loops containing such operations -invalid candidates for HW loops. - -Fixes: https://bugs.llvm.org/show_bug.cgi?id=48519 ---- - .../Target/PowerPC/PPCTargetTransformInfo.cpp | 4 ++ - llvm/test/CodeGen/PowerPC/pr48519.ll | 55 +++++++++++++++++++ - 2 files changed, 59 insertions(+) - create mode 100644 llvm/test/CodeGen/PowerPC/pr48519.ll - -diff --git llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp -index 53556ffc267d..49c10fdf8898 100644 ---- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp -+++ llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp -@@ -441,6 +441,10 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo, - isLargeIntegerTy(!TM.isPPC64(), CI->getSrcTy()->getScalarType()) || - isLargeIntegerTy(!TM.isPPC64(), CI->getDestTy()->getScalarType())) - return true; -+ if (!ST->isISA3_0() && -+ (CI->getSrcTy()->getScalarType()->isHalfTy() || -+ CI->getDestTy()->getScalarType()->isHalfTy())) -+ return true; - } else if (isLargeIntegerTy(!TM.isPPC64(), - J->getType()->getScalarType()) && - (J->getOpcode() == Instruction::UDiv || -diff --git llvm/test/CodeGen/PowerPC/pr48519.ll llvm/test/CodeGen/PowerPC/pr48519.ll -new file mode 100644 -index 000000000000..777874e91c26 ---- /dev/null -+++ llvm/test/CodeGen/PowerPC/pr48519.ll -@@ -0,0 +1,55 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ -+; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s -+define void @julia__typed_vcat_20() #0 { -+; CHECK-LABEL: julia__typed_vcat_20: -+; CHECK: # %bb.0: # %top -+; CHECK-NEXT: mflr r0 -+; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill -+; CHECK-NEXT: std r0, 16(r1) -+; CHECK-NEXT: stdu r1, -48(r1) -+; CHECK-NEXT: li r3, 1 -+; CHECK-NEXT: li r30, 0 -+; CHECK-NEXT: .p2align 4 -+; CHECK-NEXT: .LBB0_1: # %L139 -+; CHECK-NEXT: # -+; CHECK-NEXT: addi r3, r3, -1 -+; CHECK-NEXT: mtfprd f0, r3 -+; CHECK-NEXT: xscvsxdsp f1, f0 -+; CHECK-NEXT: bl __gnu_f2h_ieee -+; CHECK-NEXT: nop -+; CHECK-NEXT: bl __gnu_h2f_ieee -+; CHECK-NEXT: nop -+; CHECK-NEXT: addi r30, r30, -1 -+; CHECK-NEXT: li r3, 0 -+; CHECK-NEXT: cmpldi r30, 0 -+; CHECK-NEXT: bne+ cr0, .LBB0_1 -+; CHECK-NEXT: # %bb.2: # %pass.1 -+; CHECK-NEXT: bl __gnu_f2h_ieee -+; CHECK-NEXT: nop -+; CHECK-NEXT: sth r3, 0(r3) -+top: -+ %.sroa.6.0.copyload = load i64, i64 addrspace(11)* null, align 8 -+ %0 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %.sroa.6.0.copyload, i64 0) -+ %1 = extractvalue { i64, i1 } %0, 0 -+ br label %L139 -+ -+L139: ; preds = %L139, %top -+ %value_phi21 = phi i64 [ %5, %L139 ], [ 1, %top ] -+ %value_phi23 = phi i64 [ 0, %L139 ], [ 1, %top ] -+ %2 = add nsw i64 %value_phi23, -1 -+ %3 = add i64 %2, 0 -+ %4 = sitofp i64 %3 to half -+ store half %4, half addrspace(13)* undef, align 2 -+ %.not101.not = icmp eq i64 %value_phi21, 0 -+ %5 = add i64 %value_phi21, 1 -+ br i1 %.not101.not, label %pass.1, label %L139 -+ -+pass.1: ; preds = %L139 -+ unreachable -+} -+ -+; Function Attrs: nounwind readnone speculatable willreturn -+declare { i64, i1 } @llvm.ssub.with.overflow.i64(i64, i64) #0 -+ -+attributes #0 = { nounwind } --- -2.30.0 - diff --git a/deps/patches/llvm-11-ppc-sp-from-bp.patch b/deps/patches/llvm-11-ppc-sp-from-bp.patch deleted file mode 100644 index 014cfb237a303..0000000000000 --- a/deps/patches/llvm-11-ppc-sp-from-bp.patch +++ /dev/null @@ -1,621 +0,0 @@ -From 646760460fa06f8577d35282cde5faf8f0ed8499 Mon Sep 17 00:00:00 2001 -From: Nemanja Ivanovic -Date: Tue, 22 Dec 2020 05:43:33 -0600 -Subject: [PATCH 4/4] [PowerPC] Restore stack ptr from base ptr when available - -On subtargets that have a red zone, we will copy the stack pointer to the base -pointer in the prologue prior to updating the stack pointer. There are no other -updates to the base pointer after that. This suggests that we should be able to -restore the stack pointer from the base pointer rather than loading it from the -back chain or adding the frame size back to either the stack pointer or the -frame pointer. -This came about because functions that call setjmp need to restore the SP from -the FP because the back chain might have been clobbered -(see https://reviews.llvm.org/D92906). However, if the stack is realigned, the -restored SP might be incorrect (which is what caused the failures in the two -ASan test cases). - -This patch was tested quite extensivelly both with sanitizer runtimes and -general code. - -Differential revision: https://reviews.llvm.org/D93327 ---- - llvm/lib/Target/PowerPC/PPCFrameLowering.cpp | 9 +- - llvm/test/CodeGen/PowerPC/aix-base-pointer.ll | 8 +- - llvm/test/CodeGen/PowerPC/pr46759.ll | 2 +- - .../CodeGen/PowerPC/stack-clash-prologue.ll | 498 ++++++++++++++++++ - llvm/test/CodeGen/PowerPC/stack-realign.ll | 4 +- - 5 files changed, 513 insertions(+), 8 deletions(-) - -diff --git llvm/lib/Target/PowerPC/PPCFrameLowering.cpp llvm/lib/Target/PowerPC/PPCFrameLowering.cpp -index 66db0f199e15..80cbaa475184 100644 ---- llvm/lib/Target/PowerPC/PPCFrameLowering.cpp -+++ llvm/lib/Target/PowerPC/PPCFrameLowering.cpp -@@ -1704,11 +1704,18 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, - // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red - // zone add this offset back now. - -+ // If the function has a base pointer, the stack pointer has been copied -+ // to it so we can restore it by copying in the other direction. -+ if (HasRedZone && HasBP) { -+ BuildMI(MBB, MBBI, dl, OrInst, RBReg). -+ addReg(BPReg). -+ addReg(BPReg); -+ } - // If this function contained a fastcc call and GuaranteedTailCallOpt is - // enabled (=> hasFastCall()==true) the fastcc call might contain a tail - // call which invalidates the stack pointer value in SP(0). So we use the - // value of R31 in this case. Similar situation exists with setjmp. -- if (FI->hasFastCall() || MF.exposesReturnsTwice()) { -+ else if (FI->hasFastCall() || MF.exposesReturnsTwice()) { - assert(HasFP && "Expecting a valid frame pointer."); - if (!HasRedZone) - RBReg = FPReg; -diff --git llvm/test/CodeGen/PowerPC/aix-base-pointer.ll llvm/test/CodeGen/PowerPC/aix-base-pointer.ll -index 2566e31c025d..5141fd9e4222 100644 ---- llvm/test/CodeGen/PowerPC/aix-base-pointer.ll -+++ llvm/test/CodeGen/PowerPC/aix-base-pointer.ll -@@ -27,8 +27,8 @@ declare void @callee(i32*) - ; 32BIT: stwux 1, 1, 0 - ; 32BIT: addi 3, 1, 64 - ; 32BIT: bl .callee --; 32BIT: lwz 1, 0(1) --; 32BIT: lwz 30, -8(1) -+; 32BIT: mr 1, 30 -+; 32BIT: lwz 30, -16(1) - - ; 64BIT-LABEL: .caller: - ; 64BIT: std 30, -16(1) -@@ -38,5 +38,5 @@ declare void @callee(i32*) - ; 64BIT: stdux 1, 1, 0 - ; 64BIT: addi 3, 1, 128 - ; 64BIT: bl .callee --; 64BIT: ld 1, 0(1) --; 64BIT: ld 30, -16(1) -+; 64BIT: mr 1, 30 -+; 64BIT: ld 30, -24(1) -diff --git llvm/test/CodeGen/PowerPC/pr46759.ll llvm/test/CodeGen/PowerPC/pr46759.ll -index d1d68a5db7e3..92f2c64bc06a 100644 ---- llvm/test/CodeGen/PowerPC/pr46759.ll -+++ llvm/test/CodeGen/PowerPC/pr46759.ll -@@ -45,7 +45,7 @@ define void @foo(i32 %vla_size) #0 { - ; CHECK-LE-NEXT: .LBB0_2: # %entry - ; CHECK-LE-NEXT: addi r3, r1, 2048 - ; CHECK-LE-NEXT: lbz r3, 0(r3) --; CHECK-LE-NEXT: ld r1, 0(r1) -+; CHECK-LE-NEXT: mr r1, r30 - ; CHECK-LE-NEXT: ld r31, -8(r1) - ; CHECK-LE-NEXT: ld r30, -16(r1) - ; CHECK-LE-NEXT: blr -diff --git llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll -index cb513be9128c..6443059c9704 100644 ---- llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll -+++ llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll -@@ -528,4 +528,502 @@ entry: - ret i8 %c - } - -+; alloca + align < probe_size -+define i32 @f8(i64 %i) local_unnamed_addr #0 { -+; CHECK-LE-LABEL: f8: -+; CHECK-LE: # %bb.0: -+; CHECK-LE-NEXT: clrldi r0, r1, 58 -+; CHECK-LE-NEXT: std r30, -16(r1) -+; CHECK-LE-NEXT: mr r30, r1 -+; CHECK-LE-NEXT: subfic r0, r0, -896 -+; CHECK-LE-NEXT: stdux r1, r1, r0 -+; CHECK-LE-NEXT: .cfi_def_cfa_register r30 -+; CHECK-LE-NEXT: .cfi_offset r30, -16 -+; CHECK-LE-NEXT: addi r4, r1, 64 -+; CHECK-LE-NEXT: sldi r3, r3, 2 -+; CHECK-LE-NEXT: li r5, 1 -+; CHECK-LE-NEXT: stwx r5, r4, r3 -+; CHECK-LE-NEXT: lwz r3, 64(r1) -+; CHECK-LE-NEXT: mr r1, r30 -+; CHECK-LE-NEXT: ld r30, -16(r1) -+; CHECK-LE-NEXT: blr -+; -+; CHECK-BE-LABEL: f8: -+; CHECK-BE: # %bb.0: -+; CHECK-BE-NEXT: clrldi r0, r1, 58 -+; CHECK-BE-NEXT: std r30, -16(r1) -+; CHECK-BE-NEXT: mr r30, r1 -+; CHECK-BE-NEXT: subfic r0, r0, -896 -+; CHECK-BE-NEXT: stdux r1, r1, r0 -+; CHECK-BE-NEXT: .cfi_def_cfa_register r30 -+; CHECK-BE-NEXT: .cfi_offset r30, -16 -+; CHECK-BE-NEXT: addi r4, r1, 64 -+; CHECK-BE-NEXT: li r5, 1 -+; CHECK-BE-NEXT: sldi r3, r3, 2 -+; CHECK-BE-NEXT: stwx r5, r4, r3 -+; CHECK-BE-NEXT: lwz r3, 64(r1) -+; CHECK-BE-NEXT: mr r1, r30 -+; CHECK-BE-NEXT: ld r30, -16(r1) -+; CHECK-BE-NEXT: blr -+; -+; CHECK-32-LABEL: f8: -+; CHECK-32: # %bb.0: -+; CHECK-32-NEXT: clrlwi r0, r1, 26 -+; CHECK-32-NEXT: subfic r0, r0, -896 -+; CHECK-32-NEXT: stwux r1, r1, r0 -+; CHECK-32-NEXT: sub r0, r1, r0 -+; CHECK-32-NEXT: addic r0, r0, -8 -+; CHECK-32-NEXT: stwx r30, 0, r0 -+; CHECK-32-NEXT: addic r30, r0, 8 -+; CHECK-32-NEXT: .cfi_def_cfa_register r30 -+; CHECK-32-NEXT: .cfi_offset r30, -8 -+; CHECK-32-NEXT: addi r3, r1, 64 -+; CHECK-32-NEXT: li r5, 1 -+; CHECK-32-NEXT: slwi r4, r4, 2 -+; CHECK-32-NEXT: stwx r5, r3, r4 -+; CHECK-32-NEXT: mr r0, r31 -+; CHECK-32-NEXT: lwz r3, 64(r1) -+; CHECK-32-NEXT: lwz r31, 0(r1) -+; CHECK-32-NEXT: lwz r30, -8(r31) -+; CHECK-32-NEXT: mr r1, r31 -+; CHECK-32-NEXT: mr r31, r0 -+; CHECK-32-NEXT: blr -+ %a = alloca i32, i32 200, align 64 -+ %b = getelementptr inbounds i32, i32* %a, i64 %i -+ store volatile i32 1, i32* %b -+ %c = load volatile i32, i32* %a -+ ret i32 %c -+} -+ -+; alloca > probe_size, align > probe_size -+define i32 @f9(i64 %i) local_unnamed_addr #0 { -+; CHECK-LE-LABEL: f9: -+; CHECK-LE: # %bb.0: -+; CHECK-LE-NEXT: std r30, -16(r1) -+; CHECK-LE-NEXT: mr r30, r1 -+; CHECK-LE-NEXT: .cfi_def_cfa r30, 0 -+; CHECK-LE-NEXT: clrldi r0, r30, 53 -+; CHECK-LE-NEXT: subc r12, r30, r0 -+; CHECK-LE-NEXT: clrldi r0, r0, 52 -+; CHECK-LE-NEXT: cmpdi r0, 0 -+; CHECK-LE-NEXT: beq cr0, .LBB9_2 -+; CHECK-LE-NEXT: # %bb.1: -+; CHECK-LE-NEXT: neg r0, r0 -+; CHECK-LE-NEXT: stdux r30, r1, r0 -+; CHECK-LE-NEXT: .LBB9_2: -+; CHECK-LE-NEXT: li r0, -4096 -+; CHECK-LE-NEXT: cmpd r1, r12 -+; CHECK-LE-NEXT: beq cr0, .LBB9_4 -+; CHECK-LE-NEXT: .LBB9_3: -+; CHECK-LE-NEXT: stdux r30, r1, r0 -+; CHECK-LE-NEXT: cmpd r1, r12 -+; CHECK-LE-NEXT: bne cr0, .LBB9_3 -+; CHECK-LE-NEXT: .LBB9_4: -+; CHECK-LE-NEXT: mr r12, r30 -+; CHECK-LE-NEXT: stdu r12, -2048(r1) -+; CHECK-LE-NEXT: stdu r12, -4096(r1) -+; CHECK-LE-NEXT: stdu r12, -4096(r1) -+; CHECK-LE-NEXT: .cfi_def_cfa_register r1 -+; CHECK-LE-NEXT: .cfi_def_cfa_register r30 -+; CHECK-LE-NEXT: .cfi_offset r30, -16 -+; CHECK-LE-NEXT: addi r4, r1, 2048 -+; CHECK-LE-NEXT: sldi r3, r3, 2 -+; CHECK-LE-NEXT: li r5, 1 -+; CHECK-LE-NEXT: stwx r5, r4, r3 -+; CHECK-LE-NEXT: lwz r3, 2048(r1) -+; CHECK-LE-NEXT: mr r1, r30 -+; CHECK-LE-NEXT: ld r30, -16(r1) -+; CHECK-LE-NEXT: blr -+; -+; CHECK-BE-LABEL: f9: -+; CHECK-BE: # %bb.0: -+; CHECK-BE-NEXT: std r30, -16(r1) -+; CHECK-BE-NEXT: mr r30, r1 -+; CHECK-BE-NEXT: .cfi_def_cfa r30, 0 -+; CHECK-BE-NEXT: clrldi r0, r30, 53 -+; CHECK-BE-NEXT: subc r12, r30, r0 -+; CHECK-BE-NEXT: clrldi r0, r0, 52 -+; CHECK-BE-NEXT: cmpdi r0, 0 -+; CHECK-BE-NEXT: beq cr0, .LBB9_2 -+; CHECK-BE-NEXT: # %bb.1: -+; CHECK-BE-NEXT: neg r0, r0 -+; CHECK-BE-NEXT: stdux r30, r1, r0 -+; CHECK-BE-NEXT: .LBB9_2: -+; CHECK-BE-NEXT: li r0, -4096 -+; CHECK-BE-NEXT: cmpd r1, r12 -+; CHECK-BE-NEXT: beq cr0, .LBB9_4 -+; CHECK-BE-NEXT: .LBB9_3: -+; CHECK-BE-NEXT: stdux r30, r1, r0 -+; CHECK-BE-NEXT: cmpd r1, r12 -+; CHECK-BE-NEXT: bne cr0, .LBB9_3 -+; CHECK-BE-NEXT: .LBB9_4: -+; CHECK-BE-NEXT: mr r12, r30 -+; CHECK-BE-NEXT: stdu r12, -2048(r1) -+; CHECK-BE-NEXT: stdu r12, -4096(r1) -+; CHECK-BE-NEXT: stdu r12, -4096(r1) -+; CHECK-BE-NEXT: .cfi_def_cfa_register r1 -+; CHECK-BE-NEXT: .cfi_def_cfa_register r30 -+; CHECK-BE-NEXT: .cfi_offset r30, -16 -+; CHECK-BE-NEXT: addi r4, r1, 2048 -+; CHECK-BE-NEXT: li r5, 1 -+; CHECK-BE-NEXT: sldi r3, r3, 2 -+; CHECK-BE-NEXT: stwx r5, r4, r3 -+; CHECK-BE-NEXT: lwz r3, 2048(r1) -+; CHECK-BE-NEXT: mr r1, r30 -+; CHECK-BE-NEXT: ld r30, -16(r1) -+; CHECK-BE-NEXT: blr -+; -+; CHECK-32-LABEL: f9: -+; CHECK-32: # %bb.0: -+; CHECK-32-NEXT: mr r12, r1 -+; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -+; CHECK-32-NEXT: clrlwi r0, r12, 21 -+; CHECK-32-NEXT: subc r1, r1, r0 -+; CHECK-32-NEXT: stwu r12, -2048(r1) -+; CHECK-32-NEXT: stwu r12, -4096(r1) -+; CHECK-32-NEXT: stwu r12, -4096(r1) -+; CHECK-32-NEXT: .cfi_def_cfa_register r1 -+; CHECK-32-NEXT: sub r0, r1, r12 -+; CHECK-32-NEXT: sub r0, r1, r0 -+; CHECK-32-NEXT: addic r0, r0, -8 -+; CHECK-32-NEXT: stwx r30, 0, r0 -+; CHECK-32-NEXT: addic r30, r0, 8 -+; CHECK-32-NEXT: .cfi_def_cfa_register r30 -+; CHECK-32-NEXT: .cfi_offset r30, -8 -+; CHECK-32-NEXT: addi r3, r1, 2048 -+; CHECK-32-NEXT: li r5, 1 -+; CHECK-32-NEXT: slwi r4, r4, 2 -+; CHECK-32-NEXT: stwx r5, r3, r4 -+; CHECK-32-NEXT: mr r0, r31 -+; CHECK-32-NEXT: lwz r3, 2048(r1) -+; CHECK-32-NEXT: lwz r31, 0(r1) -+; CHECK-32-NEXT: lwz r30, -8(r31) -+; CHECK-32-NEXT: mr r1, r31 -+; CHECK-32-NEXT: mr r31, r0 -+; CHECK-32-NEXT: blr -+ %a = alloca i32, i32 2000, align 2048 -+ %b = getelementptr inbounds i32, i32* %a, i64 %i -+ store volatile i32 1, i32* %b -+ %c = load volatile i32, i32* %a -+ ret i32 %c -+} -+ -+; alloca < probe_size, align < probe_size, alloca + align > probe_size -+define i32 @f10(i64 %i) local_unnamed_addr #0 { -+; CHECK-LE-LABEL: f10: -+; CHECK-LE: # %bb.0: -+; CHECK-LE-NEXT: std r30, -16(r1) -+; CHECK-LE-NEXT: mr r30, r1 -+; CHECK-LE-NEXT: .cfi_def_cfa r30, 0 -+; CHECK-LE-NEXT: clrldi r0, r30, 54 -+; CHECK-LE-NEXT: subc r12, r30, r0 -+; CHECK-LE-NEXT: clrldi r0, r0, 52 -+; CHECK-LE-NEXT: cmpdi r0, 0 -+; CHECK-LE-NEXT: beq cr0, .LBB10_2 -+; CHECK-LE-NEXT: # %bb.1: -+; CHECK-LE-NEXT: neg r0, r0 -+; CHECK-LE-NEXT: stdux r30, r1, r0 -+; CHECK-LE-NEXT: .LBB10_2: -+; CHECK-LE-NEXT: li r0, -4096 -+; CHECK-LE-NEXT: cmpd r1, r12 -+; CHECK-LE-NEXT: beq cr0, .LBB10_4 -+; CHECK-LE-NEXT: .LBB10_3: -+; CHECK-LE-NEXT: stdux r30, r1, r0 -+; CHECK-LE-NEXT: cmpd r1, r12 -+; CHECK-LE-NEXT: bne cr0, .LBB10_3 -+; CHECK-LE-NEXT: .LBB10_4: -+; CHECK-LE-NEXT: mr r12, r30 -+; CHECK-LE-NEXT: stdu r12, -1024(r1) -+; CHECK-LE-NEXT: stdu r12, -4096(r1) -+; CHECK-LE-NEXT: .cfi_def_cfa_register r1 -+; CHECK-LE-NEXT: .cfi_def_cfa_register r30 -+; CHECK-LE-NEXT: .cfi_offset r30, -16 -+; CHECK-LE-NEXT: addi r4, r1, 1024 -+; CHECK-LE-NEXT: sldi r3, r3, 2 -+; CHECK-LE-NEXT: li r5, 1 -+; CHECK-LE-NEXT: stwx r5, r4, r3 -+; CHECK-LE-NEXT: lwz r3, 1024(r1) -+; CHECK-LE-NEXT: mr r1, r30 -+; CHECK-LE-NEXT: ld r30, -16(r1) -+; CHECK-LE-NEXT: blr -+; -+; CHECK-BE-LABEL: f10: -+; CHECK-BE: # %bb.0: -+; CHECK-BE-NEXT: std r30, -16(r1) -+; CHECK-BE-NEXT: mr r30, r1 -+; CHECK-BE-NEXT: .cfi_def_cfa r30, 0 -+; CHECK-BE-NEXT: clrldi r0, r30, 54 -+; CHECK-BE-NEXT: subc r12, r30, r0 -+; CHECK-BE-NEXT: clrldi r0, r0, 52 -+; CHECK-BE-NEXT: cmpdi r0, 0 -+; CHECK-BE-NEXT: beq cr0, .LBB10_2 -+; CHECK-BE-NEXT: # %bb.1: -+; CHECK-BE-NEXT: neg r0, r0 -+; CHECK-BE-NEXT: stdux r30, r1, r0 -+; CHECK-BE-NEXT: .LBB10_2: -+; CHECK-BE-NEXT: li r0, -4096 -+; CHECK-BE-NEXT: cmpd r1, r12 -+; CHECK-BE-NEXT: beq cr0, .LBB10_4 -+; CHECK-BE-NEXT: .LBB10_3: -+; CHECK-BE-NEXT: stdux r30, r1, r0 -+; CHECK-BE-NEXT: cmpd r1, r12 -+; CHECK-BE-NEXT: bne cr0, .LBB10_3 -+; CHECK-BE-NEXT: .LBB10_4: -+; CHECK-BE-NEXT: mr r12, r30 -+; CHECK-BE-NEXT: stdu r12, -1024(r1) -+; CHECK-BE-NEXT: stdu r12, -4096(r1) -+; CHECK-BE-NEXT: .cfi_def_cfa_register r1 -+; CHECK-BE-NEXT: .cfi_def_cfa_register r30 -+; CHECK-BE-NEXT: .cfi_offset r30, -16 -+; CHECK-BE-NEXT: addi r4, r1, 1024 -+; CHECK-BE-NEXT: li r5, 1 -+; CHECK-BE-NEXT: sldi r3, r3, 2 -+; CHECK-BE-NEXT: stwx r5, r4, r3 -+; CHECK-BE-NEXT: lwz r3, 1024(r1) -+; CHECK-BE-NEXT: mr r1, r30 -+; CHECK-BE-NEXT: ld r30, -16(r1) -+; CHECK-BE-NEXT: blr -+; -+; CHECK-32-LABEL: f10: -+; CHECK-32: # %bb.0: -+; CHECK-32-NEXT: mr r12, r1 -+; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -+; CHECK-32-NEXT: clrlwi r0, r12, 22 -+; CHECK-32-NEXT: subc r1, r1, r0 -+; CHECK-32-NEXT: stwu r12, -1024(r1) -+; CHECK-32-NEXT: stwu r12, -4096(r1) -+; CHECK-32-NEXT: .cfi_def_cfa_register r1 -+; CHECK-32-NEXT: sub r0, r1, r12 -+; CHECK-32-NEXT: sub r0, r1, r0 -+; CHECK-32-NEXT: addic r0, r0, -8 -+; CHECK-32-NEXT: stwx r30, 0, r0 -+; CHECK-32-NEXT: addic r30, r0, 8 -+; CHECK-32-NEXT: .cfi_def_cfa_register r30 -+; CHECK-32-NEXT: .cfi_offset r30, -8 -+; CHECK-32-NEXT: addi r3, r1, 1024 -+; CHECK-32-NEXT: li r5, 1 -+; CHECK-32-NEXT: slwi r4, r4, 2 -+; CHECK-32-NEXT: stwx r5, r3, r4 -+; CHECK-32-NEXT: mr r0, r31 -+; CHECK-32-NEXT: lwz r3, 1024(r1) -+; CHECK-32-NEXT: lwz r31, 0(r1) -+; CHECK-32-NEXT: lwz r30, -8(r31) -+; CHECK-32-NEXT: mr r1, r31 -+; CHECK-32-NEXT: mr r31, r0 -+; CHECK-32-NEXT: blr -+ %a = alloca i32, i32 1000, align 1024 -+ %b = getelementptr inbounds i32, i32* %a, i64 %i -+ store volatile i32 1, i32* %b -+ %c = load volatile i32, i32* %a -+ ret i32 %c -+} -+ -+define void @f11(i32 %vla_size, i64 %i) #0 { -+; CHECK-LE-LABEL: f11: -+; CHECK-LE: # %bb.0: -+; CHECK-LE-NEXT: std r31, -8(r1) -+; CHECK-LE-NEXT: std r30, -16(r1) -+; CHECK-LE-NEXT: mr r30, r1 -+; CHECK-LE-NEXT: .cfi_def_cfa r30, 0 -+; CHECK-LE-NEXT: clrldi r0, r30, 49 -+; CHECK-LE-NEXT: subc r12, r30, r0 -+; CHECK-LE-NEXT: clrldi r0, r0, 52 -+; CHECK-LE-NEXT: cmpdi r0, 0 -+; CHECK-LE-NEXT: beq cr0, .LBB11_2 -+; CHECK-LE-NEXT: # %bb.1: -+; CHECK-LE-NEXT: neg r0, r0 -+; CHECK-LE-NEXT: stdux r30, r1, r0 -+; CHECK-LE-NEXT: .LBB11_2: -+; CHECK-LE-NEXT: li r0, -4096 -+; CHECK-LE-NEXT: cmpd r1, r12 -+; CHECK-LE-NEXT: beq cr0, .LBB11_4 -+; CHECK-LE-NEXT: .LBB11_3: -+; CHECK-LE-NEXT: stdux r30, r1, r0 -+; CHECK-LE-NEXT: cmpd r1, r12 -+; CHECK-LE-NEXT: bne cr0, .LBB11_3 -+; CHECK-LE-NEXT: .LBB11_4: -+; CHECK-LE-NEXT: mr r12, r30 -+; CHECK-LE-NEXT: li r0, 24 -+; CHECK-LE-NEXT: mtctr r0 -+; CHECK-LE-NEXT: .LBB11_5: -+; CHECK-LE-NEXT: stdu r12, -4096(r1) -+; CHECK-LE-NEXT: bdnz .LBB11_5 -+; CHECK-LE-NEXT: # %bb.6: -+; CHECK-LE-NEXT: .cfi_def_cfa_register r1 -+; CHECK-LE-NEXT: .cfi_def_cfa_register r30 -+; CHECK-LE-NEXT: .cfi_offset r31, -8 -+; CHECK-LE-NEXT: .cfi_offset r30, -16 -+; CHECK-LE-NEXT: clrldi r3, r3, 32 -+; CHECK-LE-NEXT: lis r5, 1 -+; CHECK-LE-NEXT: mr r31, r1 -+; CHECK-LE-NEXT: li r6, 1 -+; CHECK-LE-NEXT: addi r3, r3, 15 -+; CHECK-LE-NEXT: ori r5, r5, 0 -+; CHECK-LE-NEXT: rldicl r3, r3, 60, 4 -+; CHECK-LE-NEXT: sldi r4, r4, 2 -+; CHECK-LE-NEXT: add r5, r31, r5 -+; CHECK-LE-NEXT: rldicl r3, r3, 4, 31 -+; CHECK-LE-NEXT: stwx r6, r5, r4 -+; CHECK-LE-NEXT: li r4, -32768 -+; CHECK-LE-NEXT: neg r7, r3 -+; CHECK-LE-NEXT: ld r3, 0(r1) -+; CHECK-LE-NEXT: and r4, r7, r4 -+; CHECK-LE-NEXT: mr r7, r4 -+; CHECK-LE-NEXT: li r4, -4096 -+; CHECK-LE-NEXT: divd r5, r7, r4 -+; CHECK-LE-NEXT: mulld r4, r5, r4 -+; CHECK-LE-NEXT: sub r5, r7, r4 -+; CHECK-LE-NEXT: add r4, r1, r7 -+; CHECK-LE-NEXT: stdux r3, r1, r5 -+; CHECK-LE-NEXT: cmpd r1, r4 -+; CHECK-LE-NEXT: beq cr0, .LBB11_8 -+; CHECK-LE-NEXT: .LBB11_7: -+; CHECK-LE-NEXT: stdu r3, -4096(r1) -+; CHECK-LE-NEXT: cmpd r1, r4 -+; CHECK-LE-NEXT: bne cr0, .LBB11_7 -+; CHECK-LE-NEXT: .LBB11_8: -+; CHECK-LE-NEXT: addi r3, r1, -32768 -+; CHECK-LE-NEXT: lbz r3, 0(r3) -+; CHECK-LE-NEXT: mr r1, r30 -+; CHECK-LE-NEXT: ld r31, -8(r1) -+; CHECK-LE-NEXT: ld r30, -16(r1) -+; CHECK-LE-NEXT: blr -+; -+; CHECK-BE-LABEL: f11: -+; CHECK-BE: # %bb.0: -+; CHECK-BE-NEXT: std r31, -8(r1) -+; CHECK-BE-NEXT: std r30, -16(r1) -+; CHECK-BE-NEXT: mr r30, r1 -+; CHECK-BE-NEXT: .cfi_def_cfa r30, 0 -+; CHECK-BE-NEXT: clrldi r0, r30, 49 -+; CHECK-BE-NEXT: subc r12, r30, r0 -+; CHECK-BE-NEXT: clrldi r0, r0, 52 -+; CHECK-BE-NEXT: cmpdi r0, 0 -+; CHECK-BE-NEXT: beq cr0, .LBB11_2 -+; CHECK-BE-NEXT: # %bb.1: -+; CHECK-BE-NEXT: neg r0, r0 -+; CHECK-BE-NEXT: stdux r30, r1, r0 -+; CHECK-BE-NEXT: .LBB11_2: -+; CHECK-BE-NEXT: li r0, -4096 -+; CHECK-BE-NEXT: cmpd r1, r12 -+; CHECK-BE-NEXT: beq cr0, .LBB11_4 -+; CHECK-BE-NEXT: .LBB11_3: -+; CHECK-BE-NEXT: stdux r30, r1, r0 -+; CHECK-BE-NEXT: cmpd r1, r12 -+; CHECK-BE-NEXT: bne cr0, .LBB11_3 -+; CHECK-BE-NEXT: .LBB11_4: -+; CHECK-BE-NEXT: mr r12, r30 -+; CHECK-BE-NEXT: li r0, 24 -+; CHECK-BE-NEXT: mtctr r0 -+; CHECK-BE-NEXT: .LBB11_5: -+; CHECK-BE-NEXT: stdu r12, -4096(r1) -+; CHECK-BE-NEXT: bdnz .LBB11_5 -+; CHECK-BE-NEXT: # %bb.6: -+; CHECK-BE-NEXT: .cfi_def_cfa_register r1 -+; CHECK-BE-NEXT: .cfi_def_cfa_register r30 -+; CHECK-BE-NEXT: .cfi_offset r31, -8 -+; CHECK-BE-NEXT: .cfi_offset r30, -16 -+; CHECK-BE-NEXT: clrldi r3, r3, 32 -+; CHECK-BE-NEXT: lis r5, 1 -+; CHECK-BE-NEXT: addi r3, r3, 15 -+; CHECK-BE-NEXT: mr r31, r1 -+; CHECK-BE-NEXT: ori r5, r5, 0 -+; CHECK-BE-NEXT: rldicl r3, r3, 60, 4 -+; CHECK-BE-NEXT: add r5, r31, r5 -+; CHECK-BE-NEXT: sldi r4, r4, 2 -+; CHECK-BE-NEXT: li r6, 1 -+; CHECK-BE-NEXT: rldicl r3, r3, 4, 31 -+; CHECK-BE-NEXT: stwx r6, r5, r4 -+; CHECK-BE-NEXT: neg r7, r3 -+; CHECK-BE-NEXT: li r4, -32768 -+; CHECK-BE-NEXT: and r4, r7, r4 -+; CHECK-BE-NEXT: ld r3, 0(r1) -+; CHECK-BE-NEXT: mr r7, r4 -+; CHECK-BE-NEXT: li r4, -4096 -+; CHECK-BE-NEXT: divd r5, r7, r4 -+; CHECK-BE-NEXT: mulld r4, r5, r4 -+; CHECK-BE-NEXT: sub r5, r7, r4 -+; CHECK-BE-NEXT: add r4, r1, r7 -+; CHECK-BE-NEXT: stdux r3, r1, r5 -+; CHECK-BE-NEXT: cmpd r1, r4 -+; CHECK-BE-NEXT: beq cr0, .LBB11_8 -+; CHECK-BE-NEXT: .LBB11_7: -+; CHECK-BE-NEXT: stdu r3, -4096(r1) -+; CHECK-BE-NEXT: cmpd r1, r4 -+; CHECK-BE-NEXT: bne cr0, .LBB11_7 -+; CHECK-BE-NEXT: .LBB11_8: -+; CHECK-BE-NEXT: addi r3, r1, -32768 -+; CHECK-BE-NEXT: lbz r3, 0(r3) -+; CHECK-BE-NEXT: mr r1, r30 -+; CHECK-BE-NEXT: ld r31, -8(r1) -+; CHECK-BE-NEXT: ld r30, -16(r1) -+; CHECK-BE-NEXT: blr -+; -+; CHECK-32-LABEL: f11: -+; CHECK-32: # %bb.0: -+; CHECK-32-NEXT: mr r12, r1 -+; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -+; CHECK-32-NEXT: clrlwi r0, r12, 17 -+; CHECK-32-NEXT: subc r1, r1, r0 -+; CHECK-32-NEXT: li r0, 24 -+; CHECK-32-NEXT: mtctr r0 -+; CHECK-32-NEXT: .LBB11_1: -+; CHECK-32-NEXT: stwu r12, -4096(r1) -+; CHECK-32-NEXT: bdnz .LBB11_1 -+; CHECK-32-NEXT: # %bb.2: -+; CHECK-32-NEXT: .cfi_def_cfa_register r1 -+; CHECK-32-NEXT: sub r0, r1, r12 -+; CHECK-32-NEXT: sub r0, r1, r0 -+; CHECK-32-NEXT: addic r0, r0, -4 -+; CHECK-32-NEXT: stwx r31, 0, r0 -+; CHECK-32-NEXT: addic r0, r0, -4 -+; CHECK-32-NEXT: stwx r30, 0, r0 -+; CHECK-32-NEXT: addic r30, r0, 8 -+; CHECK-32-NEXT: .cfi_def_cfa_register r30 -+; CHECK-32-NEXT: .cfi_offset r31, -4 -+; CHECK-32-NEXT: .cfi_offset r30, -8 -+; CHECK-32-NEXT: lis r4, 1 -+; CHECK-32-NEXT: mr r31, r1 -+; CHECK-32-NEXT: ori r4, r4, 0 -+; CHECK-32-NEXT: addi r3, r3, 15 -+; CHECK-32-NEXT: add r4, r31, r4 -+; CHECK-32-NEXT: li r5, 1 -+; CHECK-32-NEXT: slwi r6, r6, 2 -+; CHECK-32-NEXT: rlwinm r3, r3, 0, 0, 27 -+; CHECK-32-NEXT: neg r7, r3 -+; CHECK-32-NEXT: stwx r5, r4, r6 -+; CHECK-32-NEXT: li r4, -32768 -+; CHECK-32-NEXT: and r4, r7, r4 -+; CHECK-32-NEXT: lwz r3, 0(r1) -+; CHECK-32-NEXT: mr r7, r4 -+; CHECK-32-NEXT: li r4, -4096 -+; CHECK-32-NEXT: divw r5, r7, r4 -+; CHECK-32-NEXT: mullw r4, r5, r4 -+; CHECK-32-NEXT: sub r5, r7, r4 -+; CHECK-32-NEXT: add r4, r1, r7 -+; CHECK-32-NEXT: stwux r3, r1, r5 -+; CHECK-32-NEXT: cmpw r1, r4 -+; CHECK-32-NEXT: beq cr0, .LBB11_4 -+; CHECK-32-NEXT: .LBB11_3: -+; CHECK-32-NEXT: stwu r3, -4096(r1) -+; CHECK-32-NEXT: cmpw r1, r4 -+; CHECK-32-NEXT: bne cr0, .LBB11_3 -+; CHECK-32-NEXT: .LBB11_4: -+; CHECK-32-NEXT: addi r3, r1, -32768 -+; CHECK-32-NEXT: lbz r3, 0(r3) -+; CHECK-32-NEXT: lwz r31, 0(r1) -+; CHECK-32-NEXT: lwz r0, -4(r31) -+; CHECK-32-NEXT: lwz r30, -8(r31) -+; CHECK-32-NEXT: mr r1, r31 -+; CHECK-32-NEXT: mr r31, r0 -+; CHECK-32-NEXT: blr -+ %a = alloca i32, i32 4096, align 32768 -+ %b = getelementptr inbounds i32, i32* %a, i64 %i -+ store volatile i32 1, i32* %b -+ %1 = zext i32 %vla_size to i64 -+ %vla = alloca i8, i64 %1, align 2048 -+ %2 = load volatile i8, i8* %vla, align 2048 -+ ret void -+} -+ - attributes #0 = { "probe-stack"="inline-asm" } -diff --git llvm/test/CodeGen/PowerPC/stack-realign.ll llvm/test/CodeGen/PowerPC/stack-realign.ll -index ea3603b9ce20..640bfb81709a 100644 ---- llvm/test/CodeGen/PowerPC/stack-realign.ll -+++ llvm/test/CodeGen/PowerPC/stack-realign.ll -@@ -43,7 +43,7 @@ entry: - - ; CHECK: std 3, 48(30) - --; CHECK: ld 1, 0(1) -+; CHECK: mr 1, 30 - ; CHECK-DAG: ld [[SR:[0-9]+]], 16(1) - ; CHECK-DAG: ld 30, -16(1) - ; CHECK-DAG: mtlr [[SR]] -@@ -69,7 +69,7 @@ entry: - - ; CHECK-FP: std 3, 48(30) - --; CHECK-FP: ld 1, 0(1) -+; CHECK-FP: mr 1, 30 - ; CHECK-FP-DAG: ld [[SR:[0-9]+]], 16(1) - ; CHECK-FP-DAG: ld 31, -8(1) - ; CHECK-FP-DAG: ld 30, -16(1) --- -2.30.0 - diff --git a/deps/patches/llvm-6.0-DISABLE_ABI_CHECKS.patch b/deps/patches/llvm-6.0-DISABLE_ABI_CHECKS.patch deleted file mode 100644 index d537c2579166f..0000000000000 --- a/deps/patches/llvm-6.0-DISABLE_ABI_CHECKS.patch +++ /dev/null @@ -1,39 +0,0 @@ -From d793ba4bacae51ae25be19c1636fcf38707938fd Mon Sep 17 00:00:00 2001 -From: Valentin Churavy -Date: Fri, 1 Jun 2018 17:43:55 -0400 -Subject: [PATCH] fix LLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING - ---- - cmake/modules/HandleLLVMOptions.cmake | 2 +- - include/llvm/Config/abi-breaking.h.cmake | 2 +- - 2 files changed, 2 insertions(+), 2 deletions(-) - -diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake -index 3d2dd48018c..b67ee6a896e 100644 ---- a/cmake/modules/HandleLLVMOptions.cmake -+++ b/cmake/modules/HandleLLVMOptions.cmake -@@ -572,7 +572,7 @@ if (LLVM_ENABLE_WARNINGS AND (LLVM_COMPILER_IS_GCC_COMPATIBLE OR CLANG_CL)) - - if (LLVM_ENABLE_PEDANTIC AND LLVM_COMPILER_IS_GCC_COMPATIBLE) - append("-pedantic" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) -- append("-Wno-long-long" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) -+ append("-Wno-long-long -Wundef" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) - endif() - - add_flag_if_supported("-Wcovered-switch-default" COVERED_SWITCH_DEFAULT_FLAG) -diff --git a/include/llvm/Config/abi-breaking.h.cmake b/include/llvm/Config/abi-breaking.h.cmake -index 7ae401e5b8a..d52c4609101 100644 ---- a/include/llvm/Config/abi-breaking.h.cmake -+++ b/include/llvm/Config/abi-breaking.h.cmake -@@ -20,7 +20,7 @@ - - /* Allow selectively disabling link-time mismatch checking so that header-only - ADT content from LLVM can be used without linking libSupport. */ --#if !LLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING -+#ifndef LLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING - - // ABI_BREAKING_CHECKS protection: provides link-time failure when clients build - // mismatch with LLVM --- -2.17.0 - diff --git a/deps/patches/llvm-7.0-D44650.patch b/deps/patches/llvm-7.0-D44650.patch deleted file mode 100644 index 09b5b27149aa8..0000000000000 --- a/deps/patches/llvm-7.0-D44650.patch +++ /dev/null @@ -1,13 +0,0 @@ -diff --git a/tools/llvm-cfi-verify/CMakeLists.txt b/tools/llvm-cfi-verify/CMakeLists.txt -index ae12bec5e80..9ffbe4e070d 100644 ---- a/tools/llvm-cfi-verify/CMakeLists.txt -+++ b/tools/llvm-cfi-verify/CMakeLists.txt -@@ -11,7 +11,7 @@ set(LLVM_LINK_COMPONENTS - Symbolize - ) - --add_llvm_tool(llvm-cfi-verify -+add_llvm_tool(llvm-cfi-verify DISABLE_LLVM_LINK_LLVM_DYLIB - llvm-cfi-verify.cpp - ) - diff --git a/deps/patches/llvm-D27629-AArch64-large_model_6.0.1.patch b/deps/patches/llvm-D27629-AArch64-large_model_6.0.1.patch deleted file mode 100644 index 89beefdd157e5..0000000000000 --- a/deps/patches/llvm-D27629-AArch64-large_model_6.0.1.patch +++ /dev/null @@ -1,53 +0,0 @@ -From f76abe65e6d07fea5e838c4f8c9a9421c16debb0 Mon Sep 17 00:00:00 2001 -From: Valentin Churavy -Date: Thu, 5 Jul 2018 12:37:50 -0400 -Subject: [PATCH] Fix unwind info relocation with large code model on AArch64 - ---- - lib/MC/MCObjectFileInfo.cpp | 2 ++ - .../AArch64/ELF_ARM64_large-relocations.s | 20 +++++++++++++++++++ - 2 files changed, 22 insertions(+) - create mode 100644 test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s - -diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp -index 328f000f37c..938b35f20d1 100644 ---- a/lib/MC/MCObjectFileInfo.cpp -+++ b/lib/MC/MCObjectFileInfo.cpp -@@ -291,6 +291,8 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) { - break; - case Triple::ppc64: - case Triple::ppc64le: -+ case Triple::aarch64: -+ case Triple::aarch64_be: - case Triple::x86_64: - FDECFIEncoding = dwarf::DW_EH_PE_pcrel | - (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); -diff --git a/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s b/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s -new file mode 100644 -index 00000000000..66f28dabd79 ---- /dev/null -+++ b/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s -@@ -0,0 +1,20 @@ -+# RUN: llvm-mc -triple=arm64-none-linux-gnu -large-code-model -filetype=obj -o %T/large-reloc.o %s -+# RUN: llvm-rtdyld -triple=arm64-none-linux-gnu -verify -map-section large-reloc.o,.eh_frame=0x10000 -map-section large-reloc.o,.text=0xffff000000000000 -check=%s %T/large-reloc.o -+# RUN-BE: llvm-mc -triple=aarch64_be-none-linux-gnu -large-code-model -filetype=obj -o %T/be-large-reloc.o %s -+# RUN-BE: llvm-rtdyld -triple=aarch64_be-none-linux-gnu -verify -map-section be-large-reloc.o,.eh_frame=0x10000 -map-section be-large-reloc.o,.text=0xffff000000000000 -check=%s %T/be-large-reloc.o -+ -+ .text -+ .globl g -+ .p2align 2 -+ .type g,@function -+g: -+ .cfi_startproc -+ mov x0, xzr -+ ret -+ .Lfunc_end0: -+ .size g, .Lfunc_end0-g -+ .cfi_endproc -+ -+# Skip the CIE and load the 8 bytes PC begin pointer. -+# Assuming the CIE and the FDE length are both 4 bytes. -+# rtdyld-check: *{8}(section_addr(large-reloc.o, .eh_frame) + (*{4}(section_addr(large-reloc.o, .eh_frame))) + 0xc) = g - (section_addr(large-reloc.o, .eh_frame) + (*{4}(section_addr(large-reloc.o, .eh_frame))) + 0xc) --- -2.18.0 - diff --git a/deps/patches/llvm-D80101.patch b/deps/patches/llvm-D80101.patch deleted file mode 100644 index f8b3ff7c643ad..0000000000000 --- a/deps/patches/llvm-D80101.patch +++ /dev/null @@ -1,13 +0,0 @@ -diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp -index 9d4fdc6b624..14b52e0ca33 100644 ---- a/lib/CodeGen/StackColoring.cpp -+++ b/lib/CodeGen/StackColoring.cpp -@@ -913,6 +913,8 @@ void StackColoring::remapInstructions(DenseMap &SlotRemap) { - assert(To && From && "Invalid allocation object"); - Allocas[From] = To; - -+ const_cast(To)->moveBefore(const_cast(&*To->getParent()->getFirstInsertionPt())); -+ - // AA might be used later for instruction scheduling, and we need it to be - // able to deduce the correct aliasing releationships between pointers - // derived from the alloca being remapped and the target of that remapping. diff --git a/deps/patches/llvm-D84031.patch b/deps/patches/llvm-D84031.patch deleted file mode 100644 index 60fea4e41b959..0000000000000 --- a/deps/patches/llvm-D84031.patch +++ /dev/null @@ -1,10 +0,0 @@ ---- a/lib/CodeGen/CodeGenPrepare.cpp -+++ b/lib/CodeGen/CodeGenPrepare.cpp -@@ -540,6 +540,7 @@ - LargeOffsetGEPID.clear(); - } - -+ NewGEPBases.clear(); - SunkAddrs.clear(); - - if (!DisableBranchOpts) { diff --git a/deps/patches/llvm-D88630-clang-cmake.patch b/deps/patches/llvm-D88630-clang-cmake.patch deleted file mode 100644 index 947b46ebb4e42..0000000000000 --- a/deps/patches/llvm-D88630-clang-cmake.patch +++ /dev/null @@ -1,29 +0,0 @@ -commit 9c682bac219e28afc73e25e7089b553ecd1ca0f6 -Author: Keno Fischer -Date: Wed Sep 30 22:15:40 2020 -0400 - - [clang/CMake] Respect LLVM_TOOLS_INSTALL_DIR - - Otherwise clang installs all of its tools into `bin/` while - LLVM installs its tools into (LLVM_TOOLS_INSTALL_DIR). - I could swear this used to work (and in fact the julia build system - assumes it), but I can't pin down a specific commit that would - have broken this, and julia has been relying on pre-compiled binaries - for a while now (that don't use this setting), so it may have been - broken for quite a while. - - Differential Revision: https://reviews.llvm.org/D88630 - -diff --git a/tools/clang/cmake/modules/AddClang.cmake b/tools/clang/cmake/modules/AddClang.cmake -index 704278a0e93..a455365cabf 100644 ---- a/tools/clang/cmake/modules/AddClang.cmake -+++ b/tools/clang/cmake/modules/AddClang.cmake -@@ -170,7 +170,7 @@ macro(add_clang_tool name) - - install(TARGETS ${name} - ${export_to_clangtargets} -- RUNTIME DESTINATION bin -+ RUNTIME DESTINATION ${LLVM_TOOLS_INSTALL_DIR} - COMPONENT ${name}) - - if(NOT LLVM_ENABLE_IDE) diff --git a/deps/patches/llvm-invalid-addrspacecast-sink.patch b/deps/patches/llvm-invalid-addrspacecast-sink.patch deleted file mode 100644 index 5e6dd196cd3d6..0000000000000 --- a/deps/patches/llvm-invalid-addrspacecast-sink.patch +++ /dev/null @@ -1,116 +0,0 @@ -commit 291f2c3c30ab667b7dd39fbf162b737d64f56492 -Author: Tim Besard -Date: Fri Nov 27 09:19:40 2020 +0100 - - Don't sink ptrtoint/inttoptr sequences into non-noop addrspacecasts. - - Differential Revision: https://reviews.llvm.org/D92210 - -diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp -index 40e92b9e0a..0b90c9c0fc 100644 ---- a/lib/CodeGen/CodeGenPrepare.cpp -+++ b/lib/CodeGen/CodeGenPrepare.cpp -@@ -5041,18 +5041,29 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, - WeakTrackingVH SunkAddrVH = SunkAddrs[Addr]; - - Value * SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr; -+ Type *IntPtrTy = DL->getIntPtrType(Addr->getType()); - if (SunkAddr) { - LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode - << " for " << *MemoryInst << "\n"); -- if (SunkAddr->getType() != Addr->getType()) -- SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType()); -+ if (SunkAddr->getType() != Addr->getType()) { -+ // Even though we only considered no-op addrspacecasts, -+ // semantically-meaningful conversions may still be present due to -+ // ptrtoint/inttoptr sequences. -+ if (SunkAddr->getType()->getPointerAddressSpace() != -+ Addr->getType()->getPointerAddressSpace() && -+ !DL->isNonIntegralPointerType(Addr->getType())) { -+ SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr"); -+ SunkAddr = -+ Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr"); -+ } else -+ SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType()); -+ } - } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() && - SubtargetInfo->addrSinkUsingGEPs())) { - // By default, we use the GEP-based method when AA is used later. This - // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities. - LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode - << " for " << *MemoryInst << "\n"); -- Type *IntPtrTy = DL->getIntPtrType(Addr->getType()); - Value *ResultPtr = nullptr, *ResultIndex = nullptr; - - // First, find the pointer. -@@ -5181,8 +5192,19 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, - : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr"); - } - -- if (SunkAddr->getType() != Addr->getType()) -- SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType()); -+ if (SunkAddr->getType() != Addr->getType()) { -+ // Even though we only considered no-op addrspacecasts, -+ // semantically-meaningful conversions may still be present due to -+ // ptrtoint/inttoptr sequences. -+ if (SunkAddr->getType()->getPointerAddressSpace() != -+ Addr->getType()->getPointerAddressSpace() && -+ !DL->isNonIntegralPointerType(Addr->getType())) { -+ SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr"); -+ SunkAddr = -+ Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr"); -+ } else -+ SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType()); -+ } - } - } else { - // We'd require a ptrtoint/inttoptr down the line, which we can't do for -diff --git a/test/Transforms/CodeGenPrepare/NVPTX/dont-introduce-addrspacecast.ll b/test/Transforms/CodeGenPrepare/NVPTX/dont-introduce-addrspacecast.ll -new file mode 100644 -index 0000000000..39e50241c9 ---- /dev/null -+++ b/test/Transforms/CodeGenPrepare/NVPTX/dont-introduce-addrspacecast.ll -@@ -0,0 +1,43 @@ -+; RUN: opt -S -codegenprepare < %s | FileCheck %s -+ -+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" -+target triple = "nvptx64-nvidia-cuda" -+ -+ -+; ptrtoint/inttoptr combinations can introduce semantically-meaningful address space casts -+; which we can't sink into an addrspacecast -+ -+; CHECK-LABEL: @test -+define void @test(i8* %input_ptr) { -+ ; CHECK-LABEL: l1: -+ ; CHECK-NOT: addrspacecast -+ %intptr = ptrtoint i8* %input_ptr to i64 -+ %ptr = inttoptr i64 %intptr to i32 addrspace(3)* -+ -+ br label %l1 -+l1: -+ -+ store atomic i32 1, i32 addrspace(3)* %ptr unordered, align 4 -+ ret void -+} -+ -+ -+; we still should be able to look through multiple sequences of inttoptr/ptrtoint -+ -+; CHECK-LABEL: @test2 -+define void @test2(i8* %input_ptr) { -+ ; CHECK-LABEL: l2: -+ ; CHECK: bitcast -+ ; CHECK-NEXT: store -+ %intptr = ptrtoint i8* %input_ptr to i64 -+ %ptr = inttoptr i64 %intptr to i32 addrspace(3)* -+ -+ %intptr2 = ptrtoint i32 addrspace(3)* %ptr to i64 -+ %ptr2 = inttoptr i64 %intptr2 to i32* -+ -+ br label %l2 -+l2: -+ -+ store atomic i32 1, i32* %ptr2 unordered, align 4 -+ ret void -+} diff --git a/deps/patches/llvm-julia-tsan-custom-as.patch b/deps/patches/llvm-julia-tsan-custom-as.patch deleted file mode 100644 index a6f8a42ad2e32..0000000000000 --- a/deps/patches/llvm-julia-tsan-custom-as.patch +++ /dev/null @@ -1,28 +0,0 @@ -From bd41be423127b8946daea805290ad2eb19e66be4 Mon Sep 17 00:00:00 2001 -From: Valentin Churavy -Date: Sat, 19 May 2018 11:56:55 -0400 -Subject: [PATCH] [TSAN] Allow for custom address spaces - -Julia uses addressspaces for GC and we want these to be sanitized as well. ---- - lib/Transforms/Instrumentation/ThreadSanitizer.cpp | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp -index ec6904486e1..9d673353f43 100644 ---- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp -+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp -@@ -296,7 +296,9 @@ static bool shouldInstrumentReadWriteFromAddress(const Module *M, Value *Addr) { - // with them. - if (Addr) { - Type *PtrTy = cast(Addr->getType()->getScalarType()); -- if (PtrTy->getPointerAddressSpace() != 0) -+ auto AS = PtrTy->getPointerAddressSpace(); -+ // Allow for custom addresspaces -+ if (AS != 0 && AS < 10) - return false; - } - --- -2.17.0 - diff --git a/deps/patches/llvm-rGb498303066a6-gcc11-header-fix.patch b/deps/patches/llvm-rGb498303066a6-gcc11-header-fix.patch deleted file mode 100644 index a1683c91c5b29..0000000000000 --- a/deps/patches/llvm-rGb498303066a6-gcc11-header-fix.patch +++ /dev/null @@ -1,21 +0,0 @@ -From b498303066a63a203d24f739b2d2e0e56dca70d1 Mon Sep 17 00:00:00 2001 -From: serge-sans-paille -Date: Tue, 10 Nov 2020 14:55:25 +0100 -Subject: [PATCH] [nfc] Fix missing include - ---- - llvm/utils/benchmark/src/benchmark_register.h | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/llvm/utils/benchmark/src/benchmark_register.h b/llvm/utils/benchmark/src/benchmark_register.h -index 0705e219f2fa..4caa5ad4da07 100644 ---- a/utils/benchmark/src/benchmark_register.h -+++ b/utils/benchmark/src/benchmark_register.h -@@ -1,6 +1,7 @@ - #ifndef BENCHMARK_REGISTER_H - #define BENCHMARK_REGISTER_H - -+#include - #include - - #include "check.h" diff --git a/deps/patches/llvm7-revert-D44485.patch b/deps/patches/llvm7-revert-D44485.patch deleted file mode 100644 index 121e1974f5f4f..0000000000000 --- a/deps/patches/llvm7-revert-D44485.patch +++ /dev/null @@ -1,94 +0,0 @@ -From 4370214628487ac8495f963ae05960b5ecc31103 Mon Sep 17 00:00:00 2001 -From: Jameson Nash -Date: Thu, 12 Sep 2019 11:45:07 -0400 -Subject: [PATCH] Revert "[MC] Always emit relocations for same-section - function references" - -This reverts commit 9232972575cafac29c3e4817c8714c9aca0e8585. ---- - lib/MC/WinCOFFObjectWriter.cpp | 12 +++++------- - test/MC/COFF/diff.s | 25 ++++++++----------------- - 2 files changed, 13 insertions(+), 24 deletions(-) - -diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp -index 9ffecd99df6..0214161e03c 100644 ---- a/lib/MC/WinCOFFObjectWriter.cpp -+++ b/lib/MC/WinCOFFObjectWriter.cpp -@@ -690,14 +690,12 @@ void WinCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm, - bool WinCOFFObjectWriter::isSymbolRefDifferenceFullyResolvedImpl( - const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB, - bool InSet, bool IsPCRel) const { -- // Don't drop relocations between functions, even if they are in the same text -- // section. Multiple Visual C++ linker features depend on having the -- // relocations present. The /INCREMENTAL flag will cause these relocations to -- // point to thunks, and the /GUARD:CF flag assumes that it can use relocations -- // to approximate the set of all address taken functions. LLD's implementation -- // of /GUARD:CF also relies on the existance of these relocations. -+ // MS LINK expects to be able to replace all references to a function with a -+ // thunk to implement their /INCREMENTAL feature. Make sure we don't optimize -+ // away any relocations to functions. - uint16_t Type = cast(SymA).getType(); -- if ((Type >> COFF::SCT_COMPLEX_TYPE_SHIFT) == COFF::IMAGE_SYM_DTYPE_FUNCTION) -+ if (Asm.isIncrementalLinkerCompatible() && -+ (Type >> COFF::SCT_COMPLEX_TYPE_SHIFT) == COFF::IMAGE_SYM_DTYPE_FUNCTION) - return false; - return MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(Asm, SymA, FB, - InSet, IsPCRel); -diff --git a/test/MC/COFF/diff.s b/test/MC/COFF/diff.s -index f89e4ed8901..d68e628577b 100644 ---- a/test/MC/COFF/diff.s -+++ b/test/MC/COFF/diff.s -@@ -1,14 +1,19 @@ - // RUN: llvm-mc -filetype=obj -triple i686-pc-mingw32 %s | llvm-readobj -s -sr -sd | FileCheck %s - --// COFF resolves differences between labels in the same section, unless that --// label is declared with function type. -- - .section baz, "xr" -+ .def X -+ .scl 2; -+ .type 32; -+ .endef - .globl X - X: - mov Y-X+42, %eax - retl - -+ .def Y -+ .scl 2; -+ .type 32; -+ .endef - .globl Y - Y: - retl -@@ -25,11 +30,6 @@ _foobar: # @foobar - # %bb.0: - ret - -- .globl _baz --_baz: -- calll _foobar -- retl -- - .data - .globl _rust_crate # @rust_crate - .align 4 -@@ -39,15 +39,6 @@ _rust_crate: - .long _foobar-_rust_crate - .long _foobar-_rust_crate - --// Even though _baz and _foobar are in the same .text section, we keep the --// relocation for compatibility with the VC linker's /guard:cf and /incremental --// flags, even on mingw. -- --// CHECK: Name: .text --// CHECK: Relocations [ --// CHECK-NEXT: 0x12 IMAGE_REL_I386_REL32 _foobar --// CHECK-NEXT: ] -- - // CHECK: Name: .data - // CHECK: Relocations [ - // CHECK-NEXT: 0x4 IMAGE_REL_I386_DIR32 _foobar --- -2.17.1 - diff --git a/deps/patches/llvm8-D34078-vectorize-fdiv.patch b/deps/patches/llvm8-D34078-vectorize-fdiv.patch deleted file mode 100644 index c386d04b85376..0000000000000 --- a/deps/patches/llvm8-D34078-vectorize-fdiv.patch +++ /dev/null @@ -1,42 +0,0 @@ -diff --git a/lib/Analysis/IVDescriptors.cpp b/lib/Analysis/IVDescriptors.cpp -index aaebc4a481e..91fe4c0003c 100644 ---- a/lib/Analysis/IVDescriptors.cpp -+++ b/lib/Analysis/IVDescriptors.cpp -@@ -571,6 +571,7 @@ RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, - return InstDesc(Kind == RK_IntegerOr, I); - case Instruction::Xor: - return InstDesc(Kind == RK_IntegerXor, I); -+ case Instruction::FDiv: - case Instruction::FMul: - return InstDesc(Kind == RK_FloatMult, I, UAI); - case Instruction::FSub: -diff --git a/test/Transforms/LoopVectorize/float-reduction.ll b/test/Transforms/LoopVectorize/float-reduction.ll -index f3b95d0ead7..669c54d55a2 100644 ---- a/test/Transforms/LoopVectorize/float-reduction.ll -+++ b/test/Transforms/LoopVectorize/float-reduction.ll -@@ -44,3 +44,25 @@ for.body: ; preds = %for.body, %entry - for.end: ; preds = %for.body - ret float %sub - } -+ -+;CHECK-LABEL: @foodiv( -+;CHECK: fdiv fast <4 x float> -+;CHECK: ret -+define float @foodiv(float* nocapture %A, i32* nocapture %n) nounwind uwtable readonly ssp { -+entry: -+ br label %for.body -+ -+for.body: ; preds = %for.body, %entry -+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] -+ %sum.04 = phi float [ 1.000000e+00, %entry ], [ %sub, %for.body ] -+ %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv -+ %0 = load float, float* %arrayidx, align 4 -+ %sub = fdiv fast float %sum.04, %0 -+ %indvars.iv.next = add i64 %indvars.iv, 1 -+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32 -+ %exitcond = icmp eq i32 %lftr.wideiv, 200 -+ br i1 %exitcond, label %for.end, label %for.body -+ -+for.end: ; preds = %for.body -+ ret float %sub -+} diff --git a/deps/patches/llvm9-D50010-VNCoercion-ni.patch b/deps/patches/llvm9-D50010-VNCoercion-ni.patch deleted file mode 100644 index 988d669fe08fc..0000000000000 --- a/deps/patches/llvm9-D50010-VNCoercion-ni.patch +++ /dev/null @@ -1,64 +0,0 @@ -diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp ---- a/lib/Transforms/Utils/VNCoercion.cpp -+++ b/lib/Transforms/Utils/VNCoercion.cpp -@@ -34,17 +34,22 @@ - if (StoreSize < DL.getTypeSizeInBits(LoadTy)) - return false; - -+ bool StoredNI = DL.isNonIntegralPointerType(StoredTy->getScalarType()); -+ bool LoadNI = DL.isNonIntegralPointerType(LoadTy->getScalarType()); - // Don't coerce non-integral pointers to integers or vice versa. -- if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()) != -- DL.isNonIntegralPointerType(LoadTy->getScalarType())) { -+ if (StoredNI != LoadNI) { - // As a special case, allow coercion of memset used to initialize - // an array w/null. Despite non-integral pointers not generally having a - // specific bit pattern, we do assume null is zero. - if (auto *CI = dyn_cast(StoredVal)) - return CI->isNullValue(); - return false; -+ } else if (StoredNI && LoadNI && -+ cast(StoredTy)->getAddressSpace() != -+ cast(LoadTy)->getAddressSpace()) { -+ return false; - } -- -+ - return true; - } - -diff --git a/test/Transforms/GVN/non-integral-pointers.ll b/test/Transforms/GVN/non-integral-pointers.ll ---- a/test/Transforms/GVN/non-integral-pointers.ll -+++ b/test/Transforms/GVN/non-integral-pointers.ll -@@ -1,7 +1,7 @@ - ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py - ; RUN: opt -gvn -S < %s | FileCheck %s - --target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4" -+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4:5" - target triple = "x86_64-unknown-linux-gnu" - - define void @f0(i1 %alwaysFalse, i64 %val, i64* %loc) { -@@ -285,3 +285,21 @@ - %ref = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %loc.off - ret i8 addrspace(4)* %ref - } -+ -+ define i8 addrspace(5)* @multini(i1 %alwaysFalse, i8 addrspace(4)* %val, i8 addrspace(4)** %loc) { -+ ; CHECK-LABEL: @multini( -+ ; CHECK-NOT: inttoptr -+ ; CHECK-NOT: ptrtoint -+ ; CHECK-NOT: addrspacecast -+ entry: -+ store i8 addrspace(4)* %val, i8 addrspace(4)** %loc -+ br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken -+ -+ neverTaken: -+ %loc.bc = bitcast i8 addrspace(4)** %loc to i8 addrspace(5)** -+ %differentas = load i8 addrspace(5)*, i8 addrspace(5)** %loc.bc -+ ret i8 addrspace(5)* %differentas -+ -+ alwaysTaken: -+ ret i8 addrspace(5)* null -+ } - From 85365223548bd07a35755cd341602cb6d17f02da Mon Sep 17 00:00:00 2001 From: David Widmann Date: Mon, 17 Jan 2022 00:05:17 +0100 Subject: [PATCH 02/10] Use `GlobalRef` of `Core.CodeInfo` in `@generated` (#43823) Co-authored-by: Simeon Schaub --- base/expr.jl | 5 +++-- test/syntax.jl | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/base/expr.jl b/base/expr.jl index 2fb3559d1b9f6..7719eff3b334b 100644 --- a/base/expr.jl +++ b/base/expr.jl @@ -574,6 +574,7 @@ macro generated(f) if isa(f, Expr) && (f.head === :function || is_short_function_def(f)) body = f.args[2] lno = body.args[1] + tmp = gensym("tmp") return Expr(:escape, Expr(f.head, f.args[1], Expr(:block, @@ -581,8 +582,8 @@ macro generated(f) Expr(:if, Expr(:generated), # https://github.com/JuliaLang/julia/issues/25678 Expr(:block, - :(local tmp = $body), - :(if tmp isa Core.CodeInfo; return tmp; else tmp; end)), + :(local $tmp = $body), + :(if $tmp isa $(GlobalRef(Core, :CodeInfo)); return $tmp; else $tmp; end)), Expr(:block, Expr(:meta, :generated_only), Expr(:return, nothing)))))) diff --git a/test/syntax.jl b/test/syntax.jl index 354f014733c7e..0bfafa1355c19 100644 --- a/test/syntax.jl +++ b/test/syntax.jl @@ -2981,6 +2981,21 @@ end @generated g25678(x) = return :x @test g25678(7) === 7 +# issue 25678: module of name `Core` +# https://github.com/JuliaLang/julia/pull/40778/files#r784416018 +@test @eval Module() begin + Core = 1 + @generated f() = 1 + f() == 1 +end + +# issue 25678: argument of name `tmp` +# https://github.com/JuliaLang/julia/pull/43823#discussion_r785365312 +@test @eval Module() begin + @generated f(tmp) = tmp + f(1) === Int +end + # issue #19012 @test Meta.parse("\U2200", raise=false) == Symbol("∀") @test Meta.parse("\U2203", raise=false) == Symbol("∃") From 75a1d0ff6bea2c38bb524339e9feaaffe1b90aad Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Sun, 16 Jan 2022 18:05:41 -0500 Subject: [PATCH 03/10] [LLVM] Add support for building experimental targets (#43822) --- deps/llvm.mk | 2 ++ 1 file changed, 2 insertions(+) diff --git a/deps/llvm.mk b/deps/llvm.mk index 518e764dcde04..0f88e95266af8 100644 --- a/deps/llvm.mk +++ b/deps/llvm.mk @@ -61,6 +61,7 @@ LLVM_LIB_FILE := libLLVMCodeGen.a # Figure out which targets to build LLVM_TARGETS := host;NVPTX;AMDGPU;WebAssembly;BPF +LLVM_EXPERIMENTAL_TARGETS := LLVM_CFLAGS := LLVM_CXXFLAGS := @@ -83,6 +84,7 @@ LLVM_CXXFLAGS += $(CXXFLAGS) LLVM_CPPFLAGS += $(CPPFLAGS) LLVM_LDFLAGS += $(LDFLAGS) LLVM_CMAKE += -DLLVM_TARGETS_TO_BUILD:STRING="$(LLVM_TARGETS)" -DCMAKE_BUILD_TYPE="$(LLVM_CMAKE_BUILDTYPE)" +LLVM_CMAKE += -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD:STRING="$(LLVM_EXPERIMENTAL_TARGETS)" LLVM_CMAKE += -DLLVM_ENABLE_LIBXML2=OFF -DLLVM_HOST_TRIPLE="$(or $(XC_HOST),$(BUILD_MACHINE))" LLVM_CMAKE += -DLLVM_ENABLE_ZLIB=ON -DZLIB_LIBRARY="$(build_prefix)/lib" LLVM_CMAKE += -DCOMPILER_RT_ENABLE_IOS=OFF -DCOMPILER_RT_ENABLE_WATCHOS=OFF -DCOMPILER_RT_ENABLE_TVOS=OFF From dd0c14ba1d0add2ce89524a26684a1194a83312c Mon Sep 17 00:00:00 2001 From: Lionel Zoubritzky Date: Mon, 17 Jan 2022 03:11:59 +0100 Subject: [PATCH 04/10] Fix getindex and setindex! on 0-dimensional reinterpretarray (#43819) --- base/reinterpretarray.jl | 22 +++++++++++++--- test/reinterpretarray.jl | 57 ++++++++++++++++++++++++---------------- 2 files changed, 53 insertions(+), 26 deletions(-) diff --git a/base/reinterpretarray.jl b/base/reinterpretarray.jl index 499c56b4f9bc9..070f9563d6ddd 100644 --- a/base/reinterpretarray.jl +++ b/base/reinterpretarray.jl @@ -328,8 +328,15 @@ axes(a::NonReshapedReinterpretArray{T,0}) where {T} = () elsize(::Type{<:ReinterpretArray{T}}) where {T} = sizeof(T) unsafe_convert(::Type{Ptr{T}}, a::ReinterpretArray{T,N,S} where N) where {T,S} = Ptr{T}(unsafe_convert(Ptr{S},a.parent)) -@inline @propagate_inbounds getindex(a::NonReshapedReinterpretArray{T,0}) where {T} = reinterpret(T, a.parent[]) -@inline @propagate_inbounds getindex(a::ReinterpretArray) = a[1] +@inline @propagate_inbounds function getindex(a::NonReshapedReinterpretArray{T,0,S}) where {T,S} + if isprimitivetype(T) && isprimitivetype(S) + reinterpret(T, a.parent[]) + else + a[firstindex(a)] + end +end + +@inline @propagate_inbounds getindex(a::ReinterpretArray) = a[firstindex(a)] @inline @propagate_inbounds function getindex(a::ReinterpretArray{T,N,S}, inds::Vararg{Int, N}) where {T,N,S} check_readable(a) @@ -462,8 +469,15 @@ end return t[][i1] end -@inline @propagate_inbounds setindex!(a::NonReshapedReinterpretArray{T,0,S} where T, v) where {S} = (a.parent[] = reinterpret(S, v)) -@inline @propagate_inbounds setindex!(a::ReinterpretArray, v) = (a[1] = v) +@inline @propagate_inbounds function setindex!(a::NonReshapedReinterpretArray{T,0,S}, v) where {T,S} + if isprimitivetype(S) && isprimitivetype(T) + a.parent[] = reinterpret(S, v) + return a + end + setindex!(a, v, firstindex(a)) +end + +@inline @propagate_inbounds setindex!(a::ReinterpretArray, v) = setindex!(a, v, firstindex(a)) @inline @propagate_inbounds function setindex!(a::ReinterpretArray{T,N,S}, v, inds::Vararg{Int, N}) where {T,N,S} check_writable(a) diff --git a/test/reinterpretarray.jl b/test/reinterpretarray.jl index c1c2c64c9ccce..ad3cdd9b1b4a8 100644 --- a/test/reinterpretarray.jl +++ b/test/reinterpretarray.jl @@ -67,29 +67,29 @@ for (_A, Ar, _B) in ((A, Ars, B), (As, Arss, Bs)) reinterpret(NTuple{3, Int64}, Bc)[2] = (4,5,6) @test Bc == Complex{Int64}[5+6im, 7+4im, 5+6im] B2 = reinterpret(NTuple{3, Int64}, Bc) - @test setindex!(B2, (1,2,3), 1) == B2 + @test setindex!(B2, (1,2,3), 1) === B2 @test Bc == Complex{Int64}[1+2im, 3+4im, 5+6im] Bc = copy(_B) Brrs = reinterpret(reshape, Int64, Bc) - @test setindex!(Brrs, -5, 2, 3) == Brrs + @test setindex!(Brrs, -5, 2, 3) === Brrs @test Bc == Complex{Int64}[5+6im, 7+8im, 9-5im] Brrs[last(eachindex(Brrs))] = 22 @test Bc == Complex{Int64}[5+6im, 7+8im, 9+22im] A1 = reinterpret(Float64, _A) A2 = reinterpret(ComplexF64, _A) - @test setindex!(A1, 1.0, 1) == A1 + @test setindex!(A1, 1.0, 1) === A1 @test real(A2[1]) == 1.0 A1 = reinterpret(reshape, Float64, _A) - A1[1] = 2.5 + @test setindex!(A1, 2.5, 1) === A1 @test reinterpret(Float64, _A[1]) == 2.5 A1rs = reinterpret(Float64, Ar) A2rs = reinterpret(ComplexF64, Ar) - A1rs[1, 1] = 1.0 + @test setindex!(A1rs, 1.0, 1, 1) === A1rs @test real(A2rs[1]) == 1.0 A1rs = reinterpret(reshape, Float64, Ar) A2rs = reinterpret(reshape, ComplexF64, Ar) - @test setindex!(A1rs, 2.5, 1, 1) == A1rs + @test setindex!(A1rs, 2.5, 1, 1) === A1rs @test real(A2rs[1]) == 2.5 end end @@ -107,14 +107,14 @@ A3r[CartesianIndex(1,2)] = 300+400im @test A3[2,1,2] == 400 # same-size reinterpret where one of the types is non-primitive -let a = NTuple{4,UInt8}[(0x01,0x02,0x03,0x04)] - @test reinterpret(Float32, a)[1] == reinterpret(Float32, 0x04030201) - reinterpret(Float32, a)[1] = 2.0 +let a = NTuple{4,UInt8}[(0x01,0x02,0x03,0x04)], ra = reinterpret(Float32, a) + @test ra[1] == reinterpret(Float32, 0x04030201) + @test setindex!(ra, 2.0) === ra @test reinterpret(Float32, a)[1] == 2.0 end -let a = NTuple{4,UInt8}[(0x01,0x02,0x03,0x04)] - @test reinterpret(reshape, Float32, a)[1] == reinterpret(Float32, 0x04030201) - reinterpret(reshape, Float32, a)[1] = 2.0 +let a = NTuple{4,UInt8}[(0x01,0x02,0x03,0x04)], ra = reinterpret(reshape, Float32, a) + @test ra[1] == reinterpret(Float32, 0x04030201) + @test setindex!(ra, 2.0) === ra @test reinterpret(reshape, Float32, a)[1] == 2.0 end @@ -198,7 +198,7 @@ let a = fill(1.0, 5, 3) @test_throws BoundsError r[badinds...] = -2 end for goodinds in (1, 15, (1,1), (5,3)) - r[goodinds...] = -2 + @test setindex!(r, -2, goodinds...) === r @test r[goodinds...] == -2 end r = reinterpret(Int32, a) @@ -211,7 +211,7 @@ let a = fill(1.0, 5, 3) @test_throws BoundsError r[badinds...] = -3 end for goodinds in (1, 30, (1,1), (10,3)) - r[goodinds...] = -3 + @test setindex!(r, -3, goodinds...) === r @test r[goodinds...] == -3 end r = reinterpret(Int64, view(a, 1:2:5, :)) @@ -224,7 +224,7 @@ let a = fill(1.0, 5, 3) @test_throws BoundsError r[badinds...] = -4 end for goodinds in (1, 9, (1,1), (3,3)) - r[goodinds...] = -4 + @test setindex!(r, -4, goodinds...) === r @test r[goodinds...] == -4 end r = reinterpret(Int32, view(a, 1:2:5, :)) @@ -237,7 +237,7 @@ let a = fill(1.0, 5, 3) @test_throws BoundsError r[badinds...] = -5 end for goodinds in (1, 18, (1,1), (6,3)) - r[goodinds...] = -5 + @test setindex!(r, -5, goodinds...) === r @test r[goodinds...] == -5 end @@ -318,14 +318,25 @@ end # Test 0-dimensional Arrays A = zeros(UInt32) -B = reinterpret(Int32,A) -Brs = reinterpret(reshape,Int32,A) -@test size(B) == size(Brs) == () -@test axes(B) == axes(Brs) == () -B[] = Int32(5) +B = reinterpret(Int32, A) +Brs = reinterpret(reshape,Int32, A) +C = reinterpret(Tuple{UInt32}, A) # non-primitive type +Crs = reinterpret(reshape, Tuple{UInt32}, A) # non-primitive type +@test size(B) == size(Brs) == size(C) == size(Crs) == () +@test axes(B) == axes(Brs) == axes(C) == axes(Crs) == () +@test setindex!(B, Int32(5)) === B @test B[] === Int32(5) @test Brs[] === Int32(5) +@test C[] === (UInt32(5),) +@test Crs[] === (UInt32(5),) @test A[] === UInt32(5) +@test setindex!(Brs, Int32(12)) === Brs +@test A[] === UInt32(12) +@test setindex!(C, (UInt32(7),)) === C +@test A[] === UInt32(7) +@test setindex!(Crs, (UInt32(3),)) === Crs +@test A[] === UInt32(3) + a = [(1.0,2.0)] af = @inferred(reinterpret(reshape, Float64, a)) @@ -413,13 +424,15 @@ end z = reinterpret(Tuple{}, fill(missing, ())) @test z == fill((), ()) @test z == reinterpret(reshape, Tuple{}, fill(nothing, ())) + @test z[] == () + @test setindex!(z, ()) === z @test_throws BoundsError z[2] @test_throws BoundsError z[3] = () @test_throws ArgumentError reinterpret(UInt8, fill(nothing, ())) @test_throws ArgumentError reinterpret(Missing, fill(1f0, ())) @test_throws ArgumentError reinterpret(reshape, Float64, fill(nothing, ())) @test_throws ArgumentError reinterpret(reshape, Nothing, fill(17, ())) - + @test_throws MethodError z[] = nothing @test @inferred(ndims(reinterpret(reshape, SomeSingleton, t))) == 2 @test @inferred(axes(reinterpret(reshape, Tuple{}, t))) == (Base.OneTo(3),Base.OneTo(5)) From 8667272ec19c73c24477644c7fca33a82b3f8e12 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Mon, 17 Jan 2022 02:28:15 -0500 Subject: [PATCH 05/10] Small test cleanup (#43831) Deconflict some identifiers, make test run properly in Main module. --- test/compiler/codegen.jl | 12 +++++++----- test/compiler/inline.jl | 14 +++++++------- test/compiler/irpasses.jl | 4 ++-- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/test/compiler/codegen.jl b/test/compiler/codegen.jl index 41d59832917bc..7469dc74c8156 100644 --- a/test/compiler/codegen.jl +++ b/test/compiler/codegen.jl @@ -364,16 +364,16 @@ macro aliasscope(body) end) end -struct Const{T<:Array} +struct ConstAliasScope{T<:Array} a::T end -@eval Base.getindex(A::Const, i1::Int) = Core.const_arrayref($(Expr(:boundscheck)), A.a, i1) -@eval Base.getindex(A::Const, i1::Int, i2::Int, I::Int...) = (@inline; Core.const_arrayref($(Expr(:boundscheck)), A.a, i1, i2, I...)) +@eval Base.getindex(A::ConstAliasScope, i1::Int) = Core.const_arrayref($(Expr(:boundscheck)), A.a, i1) +@eval Base.getindex(A::ConstAliasScope, i1::Int, i2::Int, I::Int...) = (@inline; Core.const_arrayref($(Expr(:boundscheck)), A.a, i1, i2, I...)) function foo31018!(a, b) @aliasscope for i in eachindex(a, b) - a[i] = Const(b)[i] + a[i] = ConstAliasScope(b)[i] end end io = IOBuffer() @@ -588,7 +588,9 @@ struct A40855 b::Union{Nothing, Int} end g() = string(A40855(X40855, 1)) -@test g() == "$(@__MODULE__).A40855($(@__MODULE__).X40855, 1)" +let mod_prefix = (@__MODULE__) == Core.Main ? "" : "$(@__MODULE__)." + @test g() == "$(mod_prefix)A40855($(mod_prefix)X40855, 1)" +end # issue #40612 f40612(a, b) = a|b === a|b diff --git a/test/compiler/inline.jl b/test/compiler/inline.jl index 68bb1245d4303..a5991b5436e70 100644 --- a/test/compiler/inline.jl +++ b/test/compiler/inline.jl @@ -667,7 +667,7 @@ begin end @noinline a::Point +ₚ b::Point = Point(a.x + b.x, a.y + b.y) - function compute(n) + function compute_idem_n(n) a = Point(1.5, 2.5) b = Point(2.25, 4.75) for i in 0:(n-1) @@ -675,11 +675,11 @@ begin end return a.x, a.y end - let src = code_typed1(compute, (Int,)) + let src = code_typed1(compute_idem_n, (Int,)) @test count(isinvoke(:+ₚ), src.code) == 0 # successful inlining end - function compute(n) + function compute_idem_n(n) a = Point(1.5, 2.5) b = Point(2.25, 4.75) for i in 0:(n-1) @@ -687,13 +687,13 @@ begin end return a.x, a.y end - let src = code_typed1(compute, (Int,)) + let src = code_typed1(compute_idem_n, (Int,)) @test count(isinvoke(:+ₚ), src.code) == 2 # no inlining end - compute(42) # this execution should discard the cache of `+ₚ` since it's declared as `@noinline` + compute_idem_n(42) # this execution should discard the cache of `+ₚ` since it's declared as `@noinline` - function compute(n) + function compute_idem_n(n) a = Point(1.5, 2.5) b = Point(2.25, 4.75) for i in 0:(n-1) @@ -701,7 +701,7 @@ begin end return a.x, a.y end - let src = code_typed1(compute, (Int,)) + let src = code_typed1(compute_idem_n, (Int,)) @test count(isinvoke(:+ₚ), src.code) == 0 # no inlining !? end end diff --git a/test/compiler/irpasses.jl b/test/compiler/irpasses.jl index 9465c4d9acf1e..f355a42d7b08c 100644 --- a/test/compiler/irpasses.jl +++ b/test/compiler/irpasses.jl @@ -284,7 +284,7 @@ struct Point y::Float64 end #=@inline=# add(a::Point, b::Point) = Point(a.x + b.x, a.y + b.y) -function compute() +function compute_points() a = Point(1.5, 2.5) b = Point(2.25, 4.75) for i in 0:(100000000-1) @@ -292,7 +292,7 @@ function compute() end a.x, a.y end -let src = code_typed1(compute) +let src = code_typed1(compute_points) @test !any(isnew, src.code) end From f03e8392db501f291badc5df5a580c5819bc677a Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Mon, 17 Jan 2022 03:23:18 -0500 Subject: [PATCH 06/10] Fix splatnew test (#43835) The function being tested currently throws: ``` julia> _construct_structwithsplatnew() ERROR: TypeError: in new, expected Int64, got a value of type String Stacktrace: [1] StructWithSplatNew @ ./REPL[15]:3 [inlined] [2] _construct_structwithsplatnew() @ Main ./REPL[16]:1 [3] top-level scope @ REPL[18]:1 ``` As a result, compiler precision improvements can cause the test to fail, which is not intended. Fix the fieldtype of the struct to make sure it doesn't throw. --- test/compiler/inference.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl index a42ffbbe84db0..9025306ae3e2c 100644 --- a/test/compiler/inference.jl +++ b/test/compiler/inference.jl @@ -3137,11 +3137,12 @@ _use_unstable_kw_2() = _unstable_kw(x = 2, y = rand()) @test Base.return_types(_use_unstable_kw_1) == Any[String] @test Base.return_types(_use_unstable_kw_2) == Any[String] @eval struct StructWithSplatNew - x::Int + x::String StructWithSplatNew(t) = $(Expr(:splatnew, :StructWithSplatNew, :t)) end _construct_structwithsplatnew() = StructWithSplatNew(("",)) @test Base.return_types(_construct_structwithsplatnew) == Any[StructWithSplatNew] +@test isa(_construct_structwithsplatnew(), StructWithSplatNew) # case where a call cycle can be broken by constant propagation struct NotQRSparse From 591f0664e2c222242c41935547f7ccab46a94a9a Mon Sep 17 00:00:00 2001 From: "Viral B. Shah" Date: Mon, 17 Jan 2022 08:15:05 -0500 Subject: [PATCH 07/10] Move sparse matrix tests to the new SparseArrays.jl repo (#43832) * Move sparse matrix tests to the new SparseArrays.jl repo --- test/abstractarray.jl | 21 +-------------------- test/arrayops.jl | 37 ------------------------------------- test/core.jl | 4 ++-- test/hashing.jl | 2 +- test/show.jl | 24 +----------------------- 5 files changed, 5 insertions(+), 83 deletions(-) diff --git a/test/abstractarray.jl b/test/abstractarray.jl index 32e9eef52ecd7..38ee4a31a02ec 100644 --- a/test/abstractarray.jl +++ b/test/abstractarray.jl @@ -1,6 +1,6 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license -using Random, LinearAlgebra, SparseArrays +using Random, LinearAlgebra A = rand(5,4,3) @testset "Bounds checking" begin @@ -832,24 +832,6 @@ A = TSlowNIndexes(rand(2,2)) @test @inferred(axes(rand(3,2), 3)) == 1:1 end -@testset "#17088" begin - n = 10 - M = rand(n, n) - @testset "vector of vectors" begin - v = [[M]; [M]] # using vcat - @test size(v) == (2,) - @test !issparse(v) - end - @testset "matrix of vectors" begin - m1 = [[M] [M]] # using hcat - m2 = [[M] [M];] # using hvcat - @test m1 == m2 - @test size(m1) == (1,2) - @test !issparse(m1) - @test !issparse(m2) - end -end - @testset "isinteger and isreal" begin @test all(isinteger, Diagonal(rand(1:5,5))) @test isreal(Diagonal(rand(5))) @@ -1022,7 +1004,6 @@ end s = Vector([1, 2]) for a = ([1], UInt[1], [3, 4, 5], UInt[3, 4, 5]) @test s === copy!(s, Vector(a)) == Vector(a) - @test s === copy!(s, SparseVector(a)) == Vector(a) end # issue #35649 s = [1, 2, 3, 4] diff --git a/test/arrayops.jl b/test/arrayops.jl index abdf61e654c01..1cb8d667bef36 100644 --- a/test/arrayops.jl +++ b/test/arrayops.jl @@ -6,8 +6,6 @@ using .Main.OffsetArrays isdefined(@__MODULE__, :T24Linear) || include("testhelpers/arrayindexingtypes.jl") -using SparseArrays - using Random, LinearAlgebra using Dates @@ -1201,9 +1199,6 @@ end m = mapslices(x->tuple(x), [1 2; 3 4], dims=1) @test m[1,1] == ([1,3],) @test m[1,2] == ([2,4],) - - # issue #21123 - @test mapslices(nnz, sparse(1.0I, 3, 3), dims=1) == [1 1 1] end @testset "single multidimensional index" begin @@ -1954,13 +1949,6 @@ end @test isless(CartesianIndex((2,1)), CartesianIndex((1,2))) @test !isless(CartesianIndex((1,2)), CartesianIndex((2,1))) - a = spzeros(2,3) - @test CartesianIndices(size(a)) == eachindex(a) - a[CartesianIndex{2}(2,3)] = 5 - @test a[2,3] == 5 - b = view(a, 1:2, 2:3) - b[CartesianIndex{2}(1,1)] = 7 - @test a[1,2] == 7 @test 2*CartesianIndex{3}(1,2,3) == CartesianIndex{3}(2,4,6) @test CartesianIndex{3}(1,2,3)*2 == CartesianIndex{3}(2,4,6) @test_throws ErrorException iterate(CartesianIndex{3}(1,2,3)) @@ -2013,16 +2001,6 @@ end y = iterate(itr, y[2]) @test y === nothing @test r[val] == 3 - r = sparse(2:3:8) - itr = eachindex(r) - y = iterate(itr) - @test y !== nothing - y = iterate(itr, y[2]) - y = iterate(itr, y[2]) - @test y !== nothing - val, state = y - @test r[val] == 8 - @test iterate(itr, state) == nothing end R = CartesianIndices((1,3)) @@ -2964,18 +2942,3 @@ end @test c + zero(c) == c end end - -@testset "Allow assignment of singleton array to sparse array #43644" begin - K = spzeros(3,3) - b = zeros(3,3) - b[3,:] = [1,2,3] - K[3,1:3] += [1.0 2.0 3.0]' - @test K == b - K[3:3,1:3] += zeros(1, 3) - @test K == b - K[3,1:3] += zeros(3) - @test K == b - K[3,:] += zeros(3,1) - @test K == b - @test_throws DimensionMismatch K[3,1:2] += [1.0 2.0 3.0]' -end diff --git a/test/core.jl b/test/core.jl index 2473bdba2c164..391a13e3784f2 100644 --- a/test/core.jl +++ b/test/core.jl @@ -2,7 +2,7 @@ # test core language features -using Random, SparseArrays, InteractiveUtils +using Random, InteractiveUtils const Bottom = Union{} @@ -3659,7 +3659,7 @@ f12092(x::Int, y::Int...) = 2 # NOTE: should have > MAX_TUPLETYPE_LEN arguments f12063(tt, g, p, c, b, v, cu::T, d::AbstractArray{T, 2}, ve) where {T} = 1 f12063(args...) = 2 -g12063() = f12063(0, 0, 0, 0, 0, 0, 0.0, spzeros(0,0), Int[]) +g12063() = f12063(0, 0, 0, 0, 0, 0, 0.0, zeros(0,0), Int[]) @test g12063() == 1 # issue #11587 diff --git a/test/hashing.jl b/test/hashing.jl index e5223ed407062..9bd076554962f 100644 --- a/test/hashing.jl +++ b/test/hashing.jl @@ -283,4 +283,4 @@ end @test isequal(a, b) == (Base.hash_32_32(a) == Base.hash_32_32(b)) end end -end \ No newline at end of file +end diff --git a/test/show.jl b/test/show.jl index e5ad38e3b2e1f..52909069eaf89 100644 --- a/test/show.jl +++ b/test/show.jl @@ -1,6 +1,6 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license -using LinearAlgebra, SparseArrays +using LinearAlgebra # For curmod_* include("testenv.jl") @@ -730,28 +730,6 @@ let filename = tempname() rm(filename) end -# issue #12960 -mutable struct T12960 end -import Base.zero -Base.zero(::Type{T12960}) = T12960() -Base.zero(x::T12960) = T12960() -let - A = sparse(1.0I, 3, 3) - B = similar(A, T12960) - @test repr(B) == "sparse([1, 2, 3], [1, 2, 3], $T12960[#undef, #undef, #undef], 3, 3)" - @test occursin( - "\n #undef ⋅ ⋅ \n ⋅ #undef ⋅ \n ⋅ ⋅ #undef", - repr(MIME("text/plain"), B), - ) - - B[1,2] = T12960() - @test repr(B) == "sparse([1, 1, 2, 3], [1, 2, 2, 3], $T12960[#undef, $T12960(), #undef, #undef], 3, 3)" - @test occursin( - "\n #undef T12960() ⋅ \n ⋅ #undef ⋅ \n ⋅ ⋅ #undef", - repr(MIME("text/plain"), B), - ) -end - # issue #13127 function f13127() buf = IOBuffer() From 4b036f07fddf38cb58ad8214755129848b3dbc34 Mon Sep 17 00:00:00 2001 From: Fredrik Ekre Date: Mon, 17 Jan 2022 17:43:12 +0100 Subject: [PATCH 08/10] Update Documenter to latest version. (#43843) --- doc/Manifest.toml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/Manifest.toml b/doc/Manifest.toml index 3d45af2116c50..f38f11a7778bb 100644 --- a/doc/Manifest.toml +++ b/doc/Manifest.toml @@ -1,6 +1,6 @@ # This file is machine-generated - editing it directly is not advised -julia_version = "1.8.0-DEV.1110" +julia_version = "1.8.0-DEV.1335" manifest_format = "2.0" project_hash = "e0c77beb18dc1f6cce661ebd60658c0c1a77390f" @@ -24,9 +24,9 @@ version = "0.8.6" [[deps.Documenter]] deps = ["ANSIColoredPrinters", "Base64", "Dates", "DocStringExtensions", "IOCapture", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "REPL", "Test", "Unicode"] -git-tree-sha1 = "f425293f7e0acaf9144de6d731772de156676233" +git-tree-sha1 = "75c6cf9d99e0efc79b724f5566726ad3ad010a01" uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4" -version = "0.27.10" +version = "0.27.12" [[deps.IOCapture]] deps = ["Logging", "Random"] @@ -64,9 +64,9 @@ version = "1.2.0" [[deps.Parsers]] deps = ["Dates"] -git-tree-sha1 = "ae4bbcadb2906ccc085cf52ac286dc1377dceccc" +git-tree-sha1 = "92f91ba9e5941fc781fecf5494ac1da87bdac775" uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" -version = "2.1.2" +version = "2.2.0" [[deps.Printf]] deps = ["Unicode"] From d6cd37af702e23f0edec4f703f3277bfac090b29 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Tue, 18 Jan 2022 02:31:46 +0900 Subject: [PATCH 09/10] InteractiveUtils: add const annotations (#43846) --- stdlib/InteractiveUtils/src/codeview.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stdlib/InteractiveUtils/src/codeview.jl b/stdlib/InteractiveUtils/src/codeview.jl index 4e2164cb161a8..c5d57d5a3b345 100644 --- a/stdlib/InteractiveUtils/src/codeview.jl +++ b/stdlib/InteractiveUtils/src/codeview.jl @@ -1,13 +1,13 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license # highlighting settings -highlighting = Dict{Symbol, Bool}( +const highlighting = Dict{Symbol, Bool}( :warntype => true, :llvm => true, :native => true, ) -llstyle = Dict{Symbol, Tuple{Bool, Union{Symbol, Int}}}( +const llstyle = Dict{Symbol, Tuple{Bool, Union{Symbol, Int}}}( :default => (false, :normal), # e.g. comma, equal sign, unknown token :comment => (false, :light_black), :label => (false, :light_red), From 45acb76a008b2291df3e272e33a7773f3dd79bc6 Mon Sep 17 00:00:00 2001 From: DilumAluthgeBot <43731525+DilumAluthgeBot@users.noreply.github.com> Date: Mon, 17 Jan 2022 13:56:28 -0500 Subject: [PATCH 10/10] =?UTF-8?q?=F0=9F=A4=96=20Bump=20the=20SparseArrays?= =?UTF-8?q?=20stdlib=20from=20205b770=20to=2016dd9bd=20(#43848)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Dilum Aluthge --- .../md5 | 1 + .../sha512 | 1 + .../md5 | 1 - .../sha512 | 1 - stdlib/SparseArrays.version | 2 +- 5 files changed, 3 insertions(+), 3 deletions(-) create mode 100644 deps/checksums/SparseArrays-16dd9bdfe51260dc4219c748eadbead89be5c046.tar.gz/md5 create mode 100644 deps/checksums/SparseArrays-16dd9bdfe51260dc4219c748eadbead89be5c046.tar.gz/sha512 delete mode 100644 deps/checksums/SparseArrays-205b7703b91477e6c43d8c125a0f2f486ab30cfd.tar.gz/md5 delete mode 100644 deps/checksums/SparseArrays-205b7703b91477e6c43d8c125a0f2f486ab30cfd.tar.gz/sha512 diff --git a/deps/checksums/SparseArrays-16dd9bdfe51260dc4219c748eadbead89be5c046.tar.gz/md5 b/deps/checksums/SparseArrays-16dd9bdfe51260dc4219c748eadbead89be5c046.tar.gz/md5 new file mode 100644 index 0000000000000..a8ca532f348c9 --- /dev/null +++ b/deps/checksums/SparseArrays-16dd9bdfe51260dc4219c748eadbead89be5c046.tar.gz/md5 @@ -0,0 +1 @@ +2b8de7d0df945b8f1f5cb1fee37633ca diff --git a/deps/checksums/SparseArrays-16dd9bdfe51260dc4219c748eadbead89be5c046.tar.gz/sha512 b/deps/checksums/SparseArrays-16dd9bdfe51260dc4219c748eadbead89be5c046.tar.gz/sha512 new file mode 100644 index 0000000000000..61f15067c97b2 --- /dev/null +++ b/deps/checksums/SparseArrays-16dd9bdfe51260dc4219c748eadbead89be5c046.tar.gz/sha512 @@ -0,0 +1 @@ +c8db2ee51368ef2230d5c0f97165549f74f327da1cc1e4af88a526f9ec73f3a19f48be844d360043a302bd004b7057a10db8c1d12902deb0232ac09b8fa954ad diff --git a/deps/checksums/SparseArrays-205b7703b91477e6c43d8c125a0f2f486ab30cfd.tar.gz/md5 b/deps/checksums/SparseArrays-205b7703b91477e6c43d8c125a0f2f486ab30cfd.tar.gz/md5 deleted file mode 100644 index 37adb7aba646a..0000000000000 --- a/deps/checksums/SparseArrays-205b7703b91477e6c43d8c125a0f2f486ab30cfd.tar.gz/md5 +++ /dev/null @@ -1 +0,0 @@ -4c08d36f52e9f50571f1220b469a3edb diff --git a/deps/checksums/SparseArrays-205b7703b91477e6c43d8c125a0f2f486ab30cfd.tar.gz/sha512 b/deps/checksums/SparseArrays-205b7703b91477e6c43d8c125a0f2f486ab30cfd.tar.gz/sha512 deleted file mode 100644 index 756165ad0d76d..0000000000000 --- a/deps/checksums/SparseArrays-205b7703b91477e6c43d8c125a0f2f486ab30cfd.tar.gz/sha512 +++ /dev/null @@ -1 +0,0 @@ -1f02795247d6fc3c81a37f4b7883d53c6aede7f802b53e000dab18caee03b5d3dc013dfe9fe5716d32250b72406f3ac31456908127db094995cedac2ba8c9f75 diff --git a/stdlib/SparseArrays.version b/stdlib/SparseArrays.version index cfd0424872fb8..be294feed4383 100644 --- a/stdlib/SparseArrays.version +++ b/stdlib/SparseArrays.version @@ -1,4 +1,4 @@ SPARSEARRAYS_BRANCH = main -SPARSEARRAYS_SHA1 = 205b7703b91477e6c43d8c125a0f2f486ab30cfd +SPARSEARRAYS_SHA1 = 16dd9bdfe51260dc4219c748eadbead89be5c046 SPARSEARRAYS_GIT_URL := https://github.com/JuliaLang/SparseArrays.jl.git SPARSEARRAYS_TAR_URL = https://api.github.com/repos/JuliaLang/SparseArrays.jl/tarball/$1