add patches for PPC

JuliaLang · Jan 12, 2021 · 2bbd5fb · 2bbd5fb
1 parent 6c1824d
commit 2bbd5fb
Show file tree

Hide file tree

Showing 4 changed files with 750 additions and 22 deletions.
diff --git a/deps/llvm.mk b/deps/llvm.mk
@@ -531,8 +531,10 @@ $(eval $(call LLVM_PATCH,llvm-11-D90722-rtdyld-absolute-relocs)) # remove for LL
 $(eval $(call LLVM_PATCH,llvm-invalid-addrspacecast-sink)) # upstreamed as D92210
 $(eval $(call LLVM_PATCH,llvm-11-D92906-ppc-setjmp)) # remove for LLVM 12
 $(eval $(call LLVM_PATCH,llvm-11-PR48458-X86ISelDAGToDAG)) # remove for LLVM 12
-$(eval $(call LLVM_PATCH,llvm-11-D93092-ppc-knownbits))
+$(eval $(call LLVM_PATCH,llvm-11-D93092-ppc-knownbits)) # remove for LLVM 12
 $(eval $(call LLVM_PATCH,llvm-11-D93154-globalisel-as))
+$(eval $(call LLVM_PATCH,llvm-11-ppc-half-ctr)) # remove for LLVM 12
+$(eval $(call LLVM_PATCH,llvm-11-ppc-sp-from-bp)) # remove for LLVM 12
 endif # LLVM_VER 11.0
 
 

diff --git a/deps/patches/llvm-11-D93092-ppc-knownbits.patch b/deps/patches/llvm-11-D93092-ppc-knownbits.patch
@@ -1,8 +1,8 @@
-From 8bec64e2c0386934d4e38344907f0f4b0de4d8a3 Mon Sep 17 00:00:00 2001
-From: Valentin Churavy <v.churavy@gmail.com>
-Date: Tue, 15 Dec 2020 09:59:18 -0500
-Subject: [PATCH] [PowerPC] KnownBits should be constant when performing
- non-sign comparison
+From b5a0e6ca2b0c6367b082dd9a77b02c26607c8d7d Mon Sep 17 00:00:00 2001
+From: Kai Luo <lkail@cn.ibm.com>
+Date: Tue, 29 Dec 2020 12:11:55 +0000
+Subject: [PATCH 2/4] [PowerPC] Remaining KnownBits should be constant when
+ performing non-sign comparison
 
 In `PPCTargetLowering::DAGCombineTruncBoolExt`, when checking if it's correct to perform the transformation for non-sign comparison, as the comment says
 ```
@@ -23,20 +23,30 @@ Bit 4, besides bit 0, is still unknown and affects the final result.
 
 This patch fixes https://bugs.llvm.org/show_bug.cgi?id=48388.
 
+Reviewed By: nemanjai, #powerpc
+
 Differential Revision: https://reviews.llvm.org/D93092
 ---
- llvm/lib/Target/PowerPC/PPCISelLowering.cpp |  3 +-
- llvm/test/CodeGen/PowerPC/pr48388.ll        | 42 +++++++++++++++++++++
- 2 files changed, 44 insertions(+), 1 deletion(-)
+ llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 10 +++--
+ llvm/test/CodeGen/PowerPC/pr48388.ll        | 41 +++++++++++++++++++++
+ 2 files changed, 47 insertions(+), 4 deletions(-)
  create mode 100644 llvm/test/CodeGen/PowerPC/pr48388.ll
 
 diff --git llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm/lib/Target/PowerPC/PPCISelLowering.cpp
-index f54f1673526d..76b32db44656 100644
+index f54f1673526d..38dbff4197b9 100644
 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
 +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
-@@ -13291,7 +13291,8 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
-       Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0);
-       Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0);
+@@ -13287,11 +13287,13 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
+       KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
+
+       // We don't really care about what is known about the first bit (if
+-      // anything), so clear it in all masks prior to comparing them.
+-      Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0);
+-      Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0);
++      // anything), so pretend that it is known zero for both to ensure they can
++      // be compared as constants.
++      Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
++      Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
 
 -      if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One)
 +      if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
@@ -46,10 +56,10 @@ index f54f1673526d..76b32db44656 100644
    }
 diff --git llvm/test/CodeGen/PowerPC/pr48388.ll llvm/test/CodeGen/PowerPC/pr48388.ll
 new file mode 100644
-index 000000000000..138fb6147832
+index 000000000000..822e5d852317
 --- /dev/null
 +++ llvm/test/CodeGen/PowerPC/pr48388.ll
-@@ -0,0 +1,42 @@
+@@ -0,0 +1,41 @@
 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le -ppc-asm-full-reg-names \
 +; RUN:   < %s | FileCheck %s
@@ -58,13 +68,12 @@ index 000000000000..138fb6147832
 +; CHECK-LABEL: julia_div_i64:
 +; CHECK:       # %bb.0: # %entry
 +; CHECK-NEXT:    divd r6, r3, r4
-+; CHECK-NEXT:    li r5, 32767
-+; CHECK-NEXT:    sldi r5, r5, 32
-+; CHECK-NEXT:    oris r7, r5, 40069
-+; CHECK-NEXT:    oris r5, r5, 40079
++; CHECK-NEXT:    lis r5, -1592
++; CHECK-NEXT:    ori r7, r5, 21321
++; CHECK-NEXT:    ori r5, r5, 65519
 +; CHECK-NEXT:    cmpdi r3, 0
-+; CHECK-NEXT:    ori r7, r7, 13456
-+; CHECK-NEXT:    ori r5, r5, 65264
++; CHECK-NEXT:    rldic r7, r7, 4, 17
++; CHECK-NEXT:    rldic r5, r5, 4, 17
 +; CHECK-NEXT:    iselgt r9, r5, r7
 +; CHECK-NEXT:    cmpdi r4, 0
 +; CHECK-NEXT:    mulld r8, r6, r4
@@ -93,5 +102,5 @@ index 000000000000..138fb6147832
 +  ret i64 %12
 +}
 -- 
-2.29.2
+2.30.0
 
diff --git a/deps/patches/llvm-11-ppc-half-ctr.patch b/deps/patches/llvm-11-ppc-half-ctr.patch
@@ -0,0 +1,96 @@
+From 79a73d6388790cfec9bd76b1790f0f5551a9df8c Mon Sep 17 00:00:00 2001
+From: Nemanja Ivanovic <nemanjai@ca.ibm.com>
+Date: Mon, 28 Dec 2020 22:51:51 -0600
+Subject: [PATCH 1/4] [PowerPC] Disable CTR loops containing operations on
+ half-precision
+
+On subtargets prior to Power9, conversions to/from half precision
+are lowered to libcalls. This makes loops containing such operations
+invalid candidates for HW loops.
+
+Fixes: https://bugs.llvm.org/show_bug.cgi?id=48519
+---
+ .../Target/PowerPC/PPCTargetTransformInfo.cpp |  4 ++
+ llvm/test/CodeGen/PowerPC/pr48519.ll          | 55 +++++++++++++++++++
+ 2 files changed, 59 insertions(+)
+ create mode 100644 llvm/test/CodeGen/PowerPC/pr48519.ll
+
+diff --git llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+index 53556ffc267d..49c10fdf8898 100644
+--- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
++++ llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+@@ -441,6 +441,10 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
+           isLargeIntegerTy(!TM.isPPC64(), CI->getSrcTy()->getScalarType()) ||
+           isLargeIntegerTy(!TM.isPPC64(), CI->getDestTy()->getScalarType()))
+         return true;
++      if (!ST->isISA3_0() &&
++          (CI->getSrcTy()->getScalarType()->isHalfTy() ||
++           CI->getDestTy()->getScalarType()->isHalfTy()))
++        return true;
+     } else if (isLargeIntegerTy(!TM.isPPC64(),
+                                 J->getType()->getScalarType()) &&
+                (J->getOpcode() == Instruction::UDiv ||
+diff --git llvm/test/CodeGen/PowerPC/pr48519.ll llvm/test/CodeGen/PowerPC/pr48519.ll
+new file mode 100644
+index 000000000000..777874e91c26
+--- /dev/null
++++ llvm/test/CodeGen/PowerPC/pr48519.ll
+@@ -0,0 +1,55 @@
++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
++; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
++; RUN:   -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
++define void @julia__typed_vcat_20() #0 {
++; CHECK-LABEL: julia__typed_vcat_20:
++; CHECK:       # %bb.0: # %top
++; CHECK-NEXT:    mflr r0
++; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
++; CHECK-NEXT:    std r0, 16(r1)
++; CHECK-NEXT:    stdu r1, -48(r1)
++; CHECK-NEXT:    li r3, 1
++; CHECK-NEXT:    li r30, 0
++; CHECK-NEXT:    .p2align 4
++; CHECK-NEXT:  .LBB0_1: # %L139
++; CHECK-NEXT:    #
++; CHECK-NEXT:    addi r3, r3, -1
++; CHECK-NEXT:    mtfprd f0, r3
++; CHECK-NEXT:    xscvsxdsp f1, f0
++; CHECK-NEXT:    bl __gnu_f2h_ieee
++; CHECK-NEXT:    nop
++; CHECK-NEXT:    bl __gnu_h2f_ieee
++; CHECK-NEXT:    nop
++; CHECK-NEXT:    addi r30, r30, -1
++; CHECK-NEXT:    li r3, 0
++; CHECK-NEXT:    cmpldi r30, 0
++; CHECK-NEXT:    bne+ cr0, .LBB0_1
++; CHECK-NEXT:  # %bb.2: # %pass.1
++; CHECK-NEXT:    bl __gnu_f2h_ieee
++; CHECK-NEXT:    nop
++; CHECK-NEXT:    sth r3, 0(r3)
++top:
++  %.sroa.6.0.copyload = load i64, i64 addrspace(11)* null, align 8
++  %0 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %.sroa.6.0.copyload, i64 0)
++  %1 = extractvalue { i64, i1 } %0, 0
++  br label %L139
++
++L139:                                             ; preds = %L139, %top
++  %value_phi21 = phi i64 [ %5, %L139 ], [ 1, %top ]
++  %value_phi23 = phi i64 [ 0, %L139 ], [ 1, %top ]
++  %2 = add nsw i64 %value_phi23, -1
++  %3 = add i64 %2, 0
++  %4 = sitofp i64 %3 to half
++  store half %4, half addrspace(13)* undef, align 2
++  %.not101.not = icmp eq i64 %value_phi21, 0
++  %5 = add i64 %value_phi21, 1
++  br i1 %.not101.not, label %pass.1, label %L139
++
++pass.1:                                           ; preds = %L139
++  unreachable
++}
++
++; Function Attrs: nounwind readnone speculatable willreturn
++declare { i64, i1 } @llvm.ssub.with.overflow.i64(i64, i64) #0
++
++attributes #0 = { nounwind }
+-- 
+2.30.0
+