-
-
Notifications
You must be signed in to change notification settings - Fork 5.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
750 additions
and
22 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
From 79a73d6388790cfec9bd76b1790f0f5551a9df8c Mon Sep 17 00:00:00 2001 | ||
From: Nemanja Ivanovic <nemanjai@ca.ibm.com> | ||
Date: Mon, 28 Dec 2020 22:51:51 -0600 | ||
Subject: [PATCH 1/4] [PowerPC] Disable CTR loops containing operations on | ||
half-precision | ||
|
||
On subtargets prior to Power9, conversions to/from half precision | ||
are lowered to libcalls. This makes loops containing such operations | ||
invalid candidates for HW loops. | ||
|
||
Fixes: https://bugs.llvm.org/show_bug.cgi?id=48519 | ||
--- | ||
.../Target/PowerPC/PPCTargetTransformInfo.cpp | 4 ++ | ||
llvm/test/CodeGen/PowerPC/pr48519.ll | 55 +++++++++++++++++++ | ||
2 files changed, 59 insertions(+) | ||
create mode 100644 llvm/test/CodeGen/PowerPC/pr48519.ll | ||
|
||
diff --git llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp | ||
index 53556ffc267d..49c10fdf8898 100644 | ||
--- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp | ||
+++ llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp | ||
@@ -441,6 +441,10 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo, | ||
isLargeIntegerTy(!TM.isPPC64(), CI->getSrcTy()->getScalarType()) || | ||
isLargeIntegerTy(!TM.isPPC64(), CI->getDestTy()->getScalarType())) | ||
return true; | ||
+ if (!ST->isISA3_0() && | ||
+ (CI->getSrcTy()->getScalarType()->isHalfTy() || | ||
+ CI->getDestTy()->getScalarType()->isHalfTy())) | ||
+ return true; | ||
} else if (isLargeIntegerTy(!TM.isPPC64(), | ||
J->getType()->getScalarType()) && | ||
(J->getOpcode() == Instruction::UDiv || | ||
diff --git llvm/test/CodeGen/PowerPC/pr48519.ll llvm/test/CodeGen/PowerPC/pr48519.ll | ||
new file mode 100644 | ||
index 000000000000..777874e91c26 | ||
--- /dev/null | ||
+++ llvm/test/CodeGen/PowerPC/pr48519.ll | ||
@@ -0,0 +1,55 @@ | ||
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | ||
+; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ | ||
+; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s | ||
+define void @julia__typed_vcat_20() #0 { | ||
+; CHECK-LABEL: julia__typed_vcat_20: | ||
+; CHECK: # %bb.0: # %top | ||
+; CHECK-NEXT: mflr r0 | ||
+; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill | ||
+; CHECK-NEXT: std r0, 16(r1) | ||
+; CHECK-NEXT: stdu r1, -48(r1) | ||
+; CHECK-NEXT: li r3, 1 | ||
+; CHECK-NEXT: li r30, 0 | ||
+; CHECK-NEXT: .p2align 4 | ||
+; CHECK-NEXT: .LBB0_1: # %L139 | ||
+; CHECK-NEXT: # | ||
+; CHECK-NEXT: addi r3, r3, -1 | ||
+; CHECK-NEXT: mtfprd f0, r3 | ||
+; CHECK-NEXT: xscvsxdsp f1, f0 | ||
+; CHECK-NEXT: bl __gnu_f2h_ieee | ||
+; CHECK-NEXT: nop | ||
+; CHECK-NEXT: bl __gnu_h2f_ieee | ||
+; CHECK-NEXT: nop | ||
+; CHECK-NEXT: addi r30, r30, -1 | ||
+; CHECK-NEXT: li r3, 0 | ||
+; CHECK-NEXT: cmpldi r30, 0 | ||
+; CHECK-NEXT: bne+ cr0, .LBB0_1 | ||
+; CHECK-NEXT: # %bb.2: # %pass.1 | ||
+; CHECK-NEXT: bl __gnu_f2h_ieee | ||
+; CHECK-NEXT: nop | ||
+; CHECK-NEXT: sth r3, 0(r3) | ||
+top: | ||
+ %.sroa.6.0.copyload = load i64, i64 addrspace(11)* null, align 8 | ||
+ %0 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %.sroa.6.0.copyload, i64 0) | ||
+ %1 = extractvalue { i64, i1 } %0, 0 | ||
+ br label %L139 | ||
+ | ||
+L139: ; preds = %L139, %top | ||
+ %value_phi21 = phi i64 [ %5, %L139 ], [ 1, %top ] | ||
+ %value_phi23 = phi i64 [ 0, %L139 ], [ 1, %top ] | ||
+ %2 = add nsw i64 %value_phi23, -1 | ||
+ %3 = add i64 %2, 0 | ||
+ %4 = sitofp i64 %3 to half | ||
+ store half %4, half addrspace(13)* undef, align 2 | ||
+ %.not101.not = icmp eq i64 %value_phi21, 0 | ||
+ %5 = add i64 %value_phi21, 1 | ||
+ br i1 %.not101.not, label %pass.1, label %L139 | ||
+ | ||
+pass.1: ; preds = %L139 | ||
+ unreachable | ||
+} | ||
+ | ||
+; Function Attrs: nounwind readnone speculatable willreturn | ||
+declare { i64, i1 } @llvm.ssub.with.overflow.i64(i64, i64) #0 | ||
+ | ||
+attributes #0 = { nounwind } | ||
-- | ||
2.30.0 | ||
|
Oops, something went wrong.