Skip to content

Commit

Permalink
add patches for PPC
Browse files Browse the repository at this point in the history
  • Loading branch information
vchuravy committed Jan 12, 2021
1 parent 6c1824d commit 2bbd5fb
Show file tree
Hide file tree
Showing 4 changed files with 750 additions and 22 deletions.
4 changes: 3 additions & 1 deletion deps/llvm.mk
Original file line number Diff line number Diff line change
Expand Up @@ -531,8 +531,10 @@ $(eval $(call LLVM_PATCH,llvm-11-D90722-rtdyld-absolute-relocs)) # remove for LL
$(eval $(call LLVM_PATCH,llvm-invalid-addrspacecast-sink)) # upstreamed as D92210
$(eval $(call LLVM_PATCH,llvm-11-D92906-ppc-setjmp)) # remove for LLVM 12
$(eval $(call LLVM_PATCH,llvm-11-PR48458-X86ISelDAGToDAG)) # remove for LLVM 12
$(eval $(call LLVM_PATCH,llvm-11-D93092-ppc-knownbits))
$(eval $(call LLVM_PATCH,llvm-11-D93092-ppc-knownbits)) # remove for LLVM 12
$(eval $(call LLVM_PATCH,llvm-11-D93154-globalisel-as))
$(eval $(call LLVM_PATCH,llvm-11-ppc-half-ctr)) # remove for LLVM 12
$(eval $(call LLVM_PATCH,llvm-11-ppc-sp-from-bp)) # remove for LLVM 12
endif # LLVM_VER 11.0


Expand Down
51 changes: 30 additions & 21 deletions deps/patches/llvm-11-D93092-ppc-knownbits.patch
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
From 8bec64e2c0386934d4e38344907f0f4b0de4d8a3 Mon Sep 17 00:00:00 2001
From: Valentin Churavy <v.churavy@gmail.com>
Date: Tue, 15 Dec 2020 09:59:18 -0500
Subject: [PATCH] [PowerPC] KnownBits should be constant when performing
non-sign comparison
From b5a0e6ca2b0c6367b082dd9a77b02c26607c8d7d Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail@cn.ibm.com>
Date: Tue, 29 Dec 2020 12:11:55 +0000
Subject: [PATCH 2/4] [PowerPC] Remaining KnownBits should be constant when
performing non-sign comparison

In `PPCTargetLowering::DAGCombineTruncBoolExt`, when checking if it's correct to perform the transformation for non-sign comparison, as the comment says
```
Expand All @@ -23,20 +23,30 @@ Bit 4, besides bit 0, is still unknown and affects the final result.

This patch fixes https://bugs.llvm.org/show_bug.cgi?id=48388.

Reviewed By: nemanjai, #powerpc

Differential Revision: https://reviews.llvm.org/D93092
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 3 +-
llvm/test/CodeGen/PowerPC/pr48388.ll | 42 +++++++++++++++++++++
2 files changed, 44 insertions(+), 1 deletion(-)
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 10 +++--
llvm/test/CodeGen/PowerPC/pr48388.ll | 41 +++++++++++++++++++++
2 files changed, 47 insertions(+), 4 deletions(-)
create mode 100644 llvm/test/CodeGen/PowerPC/pr48388.ll

diff --git llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index f54f1673526d..76b32db44656 100644
index f54f1673526d..38dbff4197b9 100644
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -13291,7 +13291,8 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0);
Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0);
@@ -13287,11 +13287,13 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));

// We don't really care about what is known about the first bit (if
- // anything), so clear it in all masks prior to comparing them.
- Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0);
- Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0);
+ // anything), so pretend that it is known zero for both to ensure they can
+ // be compared as constants.
+ Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
+ Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);

- if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One)
+ if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
Expand All @@ -46,10 +56,10 @@ index f54f1673526d..76b32db44656 100644
}
diff --git llvm/test/CodeGen/PowerPC/pr48388.ll llvm/test/CodeGen/PowerPC/pr48388.ll
new file mode 100644
index 000000000000..138fb6147832
index 000000000000..822e5d852317
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/pr48388.ll
@@ -0,0 +1,42 @@
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le -ppc-asm-full-reg-names \
+; RUN: < %s | FileCheck %s
Expand All @@ -58,13 +68,12 @@ index 000000000000..138fb6147832
+; CHECK-LABEL: julia_div_i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: divd r6, r3, r4
+; CHECK-NEXT: li r5, 32767
+; CHECK-NEXT: sldi r5, r5, 32
+; CHECK-NEXT: oris r7, r5, 40069
+; CHECK-NEXT: oris r5, r5, 40079
+; CHECK-NEXT: lis r5, -1592
+; CHECK-NEXT: ori r7, r5, 21321
+; CHECK-NEXT: ori r5, r5, 65519
+; CHECK-NEXT: cmpdi r3, 0
+; CHECK-NEXT: ori r7, r7, 13456
+; CHECK-NEXT: ori r5, r5, 65264
+; CHECK-NEXT: rldic r7, r7, 4, 17
+; CHECK-NEXT: rldic r5, r5, 4, 17
+; CHECK-NEXT: iselgt r9, r5, r7
+; CHECK-NEXT: cmpdi r4, 0
+; CHECK-NEXT: mulld r8, r6, r4
Expand Down Expand Up @@ -93,5 +102,5 @@ index 000000000000..138fb6147832
+ ret i64 %12
+}
--
2.29.2
2.30.0

96 changes: 96 additions & 0 deletions deps/patches/llvm-11-ppc-half-ctr.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
From 79a73d6388790cfec9bd76b1790f0f5551a9df8c Mon Sep 17 00:00:00 2001
From: Nemanja Ivanovic <nemanjai@ca.ibm.com>
Date: Mon, 28 Dec 2020 22:51:51 -0600
Subject: [PATCH 1/4] [PowerPC] Disable CTR loops containing operations on
half-precision

On subtargets prior to Power9, conversions to/from half precision
are lowered to libcalls. This makes loops containing such operations
invalid candidates for HW loops.

Fixes: https://bugs.llvm.org/show_bug.cgi?id=48519
---
.../Target/PowerPC/PPCTargetTransformInfo.cpp | 4 ++
llvm/test/CodeGen/PowerPC/pr48519.ll | 55 +++++++++++++++++++
2 files changed, 59 insertions(+)
create mode 100644 llvm/test/CodeGen/PowerPC/pr48519.ll

diff --git llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 53556ffc267d..49c10fdf8898 100644
--- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -441,6 +441,10 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
isLargeIntegerTy(!TM.isPPC64(), CI->getSrcTy()->getScalarType()) ||
isLargeIntegerTy(!TM.isPPC64(), CI->getDestTy()->getScalarType()))
return true;
+ if (!ST->isISA3_0() &&
+ (CI->getSrcTy()->getScalarType()->isHalfTy() ||
+ CI->getDestTy()->getScalarType()->isHalfTy()))
+ return true;
} else if (isLargeIntegerTy(!TM.isPPC64(),
J->getType()->getScalarType()) &&
(J->getOpcode() == Instruction::UDiv ||
diff --git llvm/test/CodeGen/PowerPC/pr48519.ll llvm/test/CodeGen/PowerPC/pr48519.ll
new file mode 100644
index 000000000000..777874e91c26
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/pr48519.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
+define void @julia__typed_vcat_20() #0 {
+; CHECK-LABEL: julia__typed_vcat_20:
+; CHECK: # %bb.0: # %top
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r0, 16(r1)
+; CHECK-NEXT: stdu r1, -48(r1)
+; CHECK-NEXT: li r3, 1
+; CHECK-NEXT: li r30, 0
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB0_1: # %L139
+; CHECK-NEXT: #
+; CHECK-NEXT: addi r3, r3, -1
+; CHECK-NEXT: mtfprd f0, r3
+; CHECK-NEXT: xscvsxdsp f1, f0
+; CHECK-NEXT: bl __gnu_f2h_ieee
+; CHECK-NEXT: nop
+; CHECK-NEXT: bl __gnu_h2f_ieee
+; CHECK-NEXT: nop
+; CHECK-NEXT: addi r30, r30, -1
+; CHECK-NEXT: li r3, 0
+; CHECK-NEXT: cmpldi r30, 0
+; CHECK-NEXT: bne+ cr0, .LBB0_1
+; CHECK-NEXT: # %bb.2: # %pass.1
+; CHECK-NEXT: bl __gnu_f2h_ieee
+; CHECK-NEXT: nop
+; CHECK-NEXT: sth r3, 0(r3)
+top:
+ %.sroa.6.0.copyload = load i64, i64 addrspace(11)* null, align 8
+ %0 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %.sroa.6.0.copyload, i64 0)
+ %1 = extractvalue { i64, i1 } %0, 0
+ br label %L139
+
+L139: ; preds = %L139, %top
+ %value_phi21 = phi i64 [ %5, %L139 ], [ 1, %top ]
+ %value_phi23 = phi i64 [ 0, %L139 ], [ 1, %top ]
+ %2 = add nsw i64 %value_phi23, -1
+ %3 = add i64 %2, 0
+ %4 = sitofp i64 %3 to half
+ store half %4, half addrspace(13)* undef, align 2
+ %.not101.not = icmp eq i64 %value_phi21, 0
+ %5 = add i64 %value_phi21, 1
+ br i1 %.not101.not, label %pass.1, label %L139
+
+pass.1: ; preds = %L139
+ unreachable
+}
+
+; Function Attrs: nounwind readnone speculatable willreturn
+declare { i64, i1 } @llvm.ssub.with.overflow.i64(i64, i64) #0
+
+attributes #0 = { nounwind }
--
2.30.0

Loading

0 comments on commit 2bbd5fb

Please sign in to comment.