From 3cb1fe60fb00ba3761e34866ffc93c7d7a0b509d Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 20 May 2024 22:23:02 +0200 Subject: [PATCH] AMDGPU: Don't fold rootn(x, 1) to input for strictfp functions (#92595) We need to insert a constrained canonicalize. Depends #92594 --- llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp | 11 ++++++++--- .../CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll | 6 ++++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp index 0a5fbf5034c017..47de1791dae313 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -1163,14 +1163,19 @@ bool AMDGPULibCalls::fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B, if (!match(opr1, m_APIntAllowPoison(CINT))) return false; + Function *Parent = B.GetInsertBlock()->getParent(); + int ci_opr1 = (int)CINT->getSExtValue(); - if (ci_opr1 == 1) { // rootn(x, 1) = x - LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << "\n"); + if (ci_opr1 == 1 && !Parent->hasFnAttribute(Attribute::StrictFP)) { + // rootn(x, 1) = x + // + // TODO: Insert constrained canonicalize for strictfp case. + LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << '\n'); replaceCall(FPOp, opr0); return true; } - Module *M = B.GetInsertBlock()->getModule(); + Module *M = Parent->getParent(); if (ci_opr1 == 2) { // rootn(x, 2) = sqrt(x) if (FunctionCallee FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) { diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll index f79983e2491a4f..d75517cb26875a 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll @@ -511,7 +511,8 @@ define float @test_rootn_f32__y_1__strictfp(float %x) #1 { ; CHECK-LABEL: define float @test_rootn_f32__y_1__strictfp( ; CHECK-SAME: float [[X:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: ret float [[X]] +; CHECK-NEXT: [[CALL:%.*]] = tail call float @_Z5rootnfi(float [[X]], i32 1) #[[ATTR0]] +; CHECK-NEXT: ret float [[CALL]] ; entry: %call = tail call float @_Z5rootnfi(float %x, i32 1) #1 @@ -533,7 +534,8 @@ define <2 x float> @test_rootn_v2f32__y_1__strictfp(<2 x float> %x) #1 { ; CHECK-LABEL: define <2 x float> @test_rootn_v2f32__y_1__strictfp( ; CHECK-SAME: <2 x float> [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <2 x float> [[X]] +; CHECK-NEXT: [[CALL:%.*]] = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) #[[ATTR0]] +; CHECK-NEXT: ret <2 x float> [[CALL]] ; entry: %call = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> %x, <2 x i32> ) #1