Skip to content

Commit

Permalink
AMDGPU: Don't fold rootn(x, 1) to input for strictfp functions (#92595)
Browse files Browse the repository at this point in the history
We need to insert a constrained canonicalize.

Depends #92594
  • Loading branch information
arsenm authored May 20, 2024
1 parent 0da1a6c commit 3cb1fe6
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 5 deletions.
11 changes: 8 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1163,14 +1163,19 @@ bool AMDGPULibCalls::fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B,
if (!match(opr1, m_APIntAllowPoison(CINT)))
return false;

Function *Parent = B.GetInsertBlock()->getParent();

int ci_opr1 = (int)CINT->getSExtValue();
if (ci_opr1 == 1) { // rootn(x, 1) = x
LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << "\n");
if (ci_opr1 == 1 && !Parent->hasFnAttribute(Attribute::StrictFP)) {
// rootn(x, 1) = x
//
// TODO: Insert constrained canonicalize for strictfp case.
LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << '\n');
replaceCall(FPOp, opr0);
return true;
}

Module *M = B.GetInsertBlock()->getModule();
Module *M = Parent->getParent();
if (ci_opr1 == 2) { // rootn(x, 2) = sqrt(x)
if (FunctionCallee FPExpr =
getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
Expand Down
6 changes: 4 additions & 2 deletions llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll
Original file line number Diff line number Diff line change
Expand Up @@ -511,7 +511,8 @@ define float @test_rootn_f32__y_1__strictfp(float %x) #1 {
; CHECK-LABEL: define float @test_rootn_f32__y_1__strictfp(
; CHECK-SAME: float [[X:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: ret float [[X]]
; CHECK-NEXT: [[CALL:%.*]] = tail call float @_Z5rootnfi(float [[X]], i32 1) #[[ATTR0]]
; CHECK-NEXT: ret float [[CALL]]
;
entry:
%call = tail call float @_Z5rootnfi(float %x, i32 1) #1
Expand All @@ -533,7 +534,8 @@ define <2 x float> @test_rootn_v2f32__y_1__strictfp(<2 x float> %x) #1 {
; CHECK-LABEL: define <2 x float> @test_rootn_v2f32__y_1__strictfp(
; CHECK-SAME: <2 x float> [[X:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: ret <2 x float> [[X]]
; CHECK-NEXT: [[CALL:%.*]] = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 1, i32 1>) #[[ATTR0]]
; CHECK-NEXT: ret <2 x float> [[CALL]]
;
entry:
%call = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> %x, <2 x i32> <i32 1, i32 1>) #1
Expand Down

0 comments on commit 3cb1fe6

Please sign in to comment.