From 5156576e2b66e2bc4f42b722b1e401eafee34283 Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Tue, 24 Sep 2024 10:42:26 -0700 Subject: [PATCH] [DirectX] Add atan2 intrinsic and expand for DXIL backend (p1) (#108865) This change is part of this proposal: https://discourse.llvm.org/t/rfc-all-the-math-intrinsics/78294 This preliminary work adds the intrinsic to llvm and expands using atan intrinsic for DXIL backend, since DXIL has no atan2 op. Part 1 for Implement the atan2 HLSL Function #70096. --- llvm/docs/LangRef.rst | 37 ++++++++ llvm/include/llvm/IR/Intrinsics.td | 1 + .../Target/DirectX/DXILIntrinsicExpansion.cpp | 52 +++++++++++ llvm/test/CodeGen/DirectX/atan2.ll | 87 +++++++++++++++++++ llvm/test/CodeGen/DirectX/atan2_error.ll | 11 +++ 5 files changed, 188 insertions(+) create mode 100644 llvm/test/CodeGen/DirectX/atan2.ll create mode 100644 llvm/test/CodeGen/DirectX/atan2_error.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 91c3e60bb0acb14..41d1efab752fd78 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -15583,6 +15583,43 @@ trapping or setting ``errno``. When specified with the fast-math-flag 'afn', the result may be approximated using a less accurate calculation. +'``llvm.atan2.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.atan2`` on any +floating-point or vector of floating-point type. Not all targets support +all types however. + +:: + + declare float @llvm.atan2.f32(float %X, float %Y) + declare double @llvm.atan2.f64(double %X, double %Y) + declare x86_fp80 @llvm.atan2.f80(x86_fp80 %X, x86_fp80 %Y) + declare fp128 @llvm.atan2.f128(fp128 %X, fp128 %Y) + declare ppc_fp128 @llvm.atan2.ppcf128(ppc_fp128 %X, ppc_fp128 %Y) + +Overview: +""""""""" + +The '``llvm.atan2.*``' intrinsics return the arctangent of the operand. + +Arguments: +"""""""""" + +The arguments and return value are floating-point numbers of the same type. + +Semantics: +"""""""""" + +Return the same value as a corresponding libm '``atan2``' function but without +trapping or setting ``errno``. + +When specified with the fast-math-flag 'afn', the result may be approximated +using a less accurate calculation. + '``llvm.sinh.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 0a74a217a5f010e..48d57907e6d0bcf 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1016,6 +1016,7 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in { def int_asin : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_acos : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_atan : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_atan2 : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>]>; def int_sin : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_cos : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_tan : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp index dd73b895b14d37a..926cbe97f24fda5 100644 --- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp +++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp @@ -36,6 +36,7 @@ using namespace llvm; static bool isIntrinsicExpansion(Function &F) { switch (F.getIntrinsicID()) { case Intrinsic::abs: + case Intrinsic::atan2: case Intrinsic::exp: case Intrinsic::log: case Intrinsic::log10: @@ -307,6 +308,54 @@ static Value *expandNormalizeIntrinsic(CallInst *Orig) { return Builder.CreateFMul(X, MultiplicandVec); } +static Value *expandAtan2Intrinsic(CallInst *Orig) { + Value *Y = Orig->getOperand(0); + Value *X = Orig->getOperand(1); + Type *Ty = X->getType(); + IRBuilder<> Builder(Orig); + Builder.setFastMathFlags(Orig->getFastMathFlags()); + + Value *Tan = Builder.CreateFDiv(Y, X); + + CallInst *Atan = + Builder.CreateIntrinsic(Ty, Intrinsic::atan, {Tan}, nullptr, "Elt.Atan"); + Atan->setTailCall(Orig->isTailCall()); + Atan->setAttributes(Orig->getAttributes()); + + // Modify atan result based on https://en.wikipedia.org/wiki/Atan2. + Constant *Pi = ConstantFP::get(Ty, llvm::numbers::pi); + Constant *HalfPi = ConstantFP::get(Ty, llvm::numbers::pi / 2); + Constant *NegHalfPi = ConstantFP::get(Ty, -llvm::numbers::pi / 2); + Constant *Zero = ConstantFP::get(Ty, 0); + Value *AtanAddPi = Builder.CreateFAdd(Atan, Pi); + Value *AtanSubPi = Builder.CreateFSub(Atan, Pi); + + // x > 0 -> atan. + Value *Result = Atan; + Value *XLt0 = Builder.CreateFCmpOLT(X, Zero); + Value *XEq0 = Builder.CreateFCmpOEQ(X, Zero); + Value *YGe0 = Builder.CreateFCmpOGE(Y, Zero); + Value *YLt0 = Builder.CreateFCmpOLT(Y, Zero); + + // x < 0, y >= 0 -> atan + pi. + Value *XLt0AndYGe0 = Builder.CreateAnd(XLt0, YGe0); + Result = Builder.CreateSelect(XLt0AndYGe0, AtanAddPi, Result); + + // x < 0, y < 0 -> atan - pi. + Value *XLt0AndYLt0 = Builder.CreateAnd(XLt0, YLt0); + Result = Builder.CreateSelect(XLt0AndYLt0, AtanSubPi, Result); + + // x == 0, y < 0 -> -pi/2 + Value *XEq0AndYLt0 = Builder.CreateAnd(XEq0, YLt0); + Result = Builder.CreateSelect(XEq0AndYLt0, NegHalfPi, Result); + + // x == 0, y > 0 -> pi/2 + Value *XEq0AndYGe0 = Builder.CreateAnd(XEq0, YGe0); + Result = Builder.CreateSelect(XEq0AndYGe0, HalfPi, Result); + + return Result; +} + static Value *expandPowIntrinsic(CallInst *Orig) { Value *X = Orig->getOperand(0); @@ -418,6 +467,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) { case Intrinsic::abs: Result = expandAbs(Orig); break; + case Intrinsic::atan2: + Result = expandAtan2Intrinsic(Orig); + break; case Intrinsic::exp: Result = expandExpIntrinsic(Orig); break; diff --git a/llvm/test/CodeGen/DirectX/atan2.ll b/llvm/test/CodeGen/DirectX/atan2.ll new file mode 100644 index 000000000000000..9d86f87f3ed50eb --- /dev/null +++ b/llvm/test/CodeGen/DirectX/atan2.ll @@ -0,0 +1,87 @@ +; RUN: opt -S -dxil-intrinsic-expansion -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=CHECK,EXPCHECK +; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=CHECK,DOPCHECK + +; Make sure correct dxil expansions for atan2 are generated for float and half. + +define noundef float @atan2_float(float noundef %y, float noundef %x) { +entry: +; CHECK: [[DIV:%.+]] = fdiv float %y, %x +; EXPCHECK: [[ATAN:%.+]] = call float @llvm.atan.f32(float [[DIV]]) +; DOPCHECK: [[ATAN:%.+]] = call float @dx.op.unary.f32(i32 17, float [[DIV]]) +; CHECK-DAG: [[ADD_PI:%.+]] = fadd float [[ATAN]], 0x400921FB60000000 +; CHECK-DAG: [[SUB_PI:%.+]] = fsub float [[ATAN]], 0x400921FB60000000 +; CHECK-DAG: [[X_LT_0:%.+]] = fcmp olt float %x, 0.000000e+00 +; CHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq float %x, 0.000000e+00 +; CHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge float %y, 0.000000e+00 +; CHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt float %y, 0.000000e+00 +; CHECK: [[XLT0_AND_YGE0:%.+]] = and i1 [[X_LT_0]], [[Y_GE_0]] +; CHECK: [[SELECT_ADD_PI:%.+]] = select i1 [[XLT0_AND_YGE0]], float [[ADD_PI]], float [[ATAN]] +; CHECK: [[XLT0_AND_YLT0:%.+]] = and i1 [[X_LT_0]], [[Y_LT_0]] +; CHECK: [[SELECT_SUB_PI:%.+]] = select i1 [[XLT0_AND_YLT0]], float [[SUB_PI]], float [[SELECT_ADD_PI]] +; CHECK: [[XEQ0_AND_YLT0:%.+]] = and i1 [[X_EQ_0]], [[Y_LT_0]] +; CHECK: [[SELECT_NEGHPI:%.+]] = select i1 [[XEQ0_AND_YLT0]], float 0xBFF921FB60000000, float [[SELECT_SUB_PI]] +; CHECK: [[XEQ0_AND_YGE0:%.+]] = and i1 [[X_EQ_0]], [[Y_GE_0]] +; CHECK: [[SELECT_HPI:%.+]] = select i1 [[XEQ0_AND_YGE0]], float 0x3FF921FB60000000, float [[SELECT_NEGHPI]] +; CHECK: ret float [[SELECT_HPI]] + %elt.atan2 = call float @llvm.atan2.f32(float %y, float %x) + ret float %elt.atan2 +} + +define noundef half @atan2_half(half noundef %y, half noundef %x) { +entry: +; CHECK: [[DIV:%.+]] = fdiv half %y, %x +; EXPCHECK: [[ATAN:%.+]] = call half @llvm.atan.f16(half [[DIV]]) +; DOPCHECK: [[ATAN:%.+]] = call half @dx.op.unary.f16(i32 17, half [[DIV]]) +; CHECK-DAG: [[ADD_PI:%.+]] = fadd half [[ATAN]], 0xH4248 +; CHECK-DAG: [[SUB_PI:%.+]] = fsub half [[ATAN]], 0xH4248 +; CHECK-DAG: [[X_LT_0:%.+]] = fcmp olt half %x, 0xH0000 +; CHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq half %x, 0xH0000 +; CHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge half %y, 0xH0000 +; CHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt half %y, 0xH0000 +; CHECK: [[XLT0_AND_YGE0:%.+]] = and i1 [[X_LT_0]], [[Y_GE_0]] +; CHECK: [[SELECT_ADD_PI:%.+]] = select i1 [[XLT0_AND_YGE0]], half [[ADD_PI]], half [[ATAN]] +; CHECK: [[XLT0_AND_YLT0:%.+]] = and i1 [[X_LT_0]], [[Y_LT_0]] +; CHECK: [[SELECT_SUB_PI:%.+]] = select i1 [[XLT0_AND_YLT0]], half [[SUB_PI]], half [[SELECT_ADD_PI]] +; CHECK: [[XEQ0_AND_YLT0:%.+]] = and i1 [[X_EQ_0]], [[Y_LT_0]] +; CHECK: [[SELECT_NEGHPI:%.+]] = select i1 [[XEQ0_AND_YLT0]], half 0xHBE48, half [[SELECT_SUB_PI]] +; CHECK: [[XEQ0_AND_YGE0:%.+]] = and i1 [[X_EQ_0]], [[Y_GE_0]] +; CHECK: [[SELECT_HPI:%.+]] = select i1 [[XEQ0_AND_YGE0]], half 0xH3E48, half [[SELECT_NEGHPI]] +; CHECK: ret half [[SELECT_HPI]] + %elt.atan2 = call half @llvm.atan2.f16(half %y, half %x) + ret half %elt.atan2 +} + +define noundef <4 x float> @atan2_float4(<4 x float> noundef %y, <4 x float> noundef %x) { +entry: +; Just Expansion, no scalarization or lowering: +; EXPCHECK: [[DIV:%.+]] = fdiv <4 x float> %y, %x +; EXPCHECK: [[ATAN:%.+]] = call <4 x float> @llvm.atan.v4f32(<4 x float> [[DIV]]) +; EXPCHECK-DAG: [[ADD_PI:%.+]] = fadd <4 x float> [[ATAN]], +; EXPCHECK-DAG: [[SUB_PI:%.+]] = fsub <4 x float> [[ATAN]], +; EXPCHECK-DAG: [[X_LT_0:%.+]] = fcmp olt <4 x float> %x, zeroinitializer +; EXPCHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq <4 x float> %x, zeroinitializer +; EXPCHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge <4 x float> %y, zeroinitializer +; EXPCHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt <4 x float> %y, zeroinitializer +; EXPCHECK: [[XLT0_AND_YGE0:%.+]] = and <4 x i1> [[X_LT_0]], [[Y_GE_0]] +; EXPCHECK: [[SELECT_ADD_PI:%.+]] = select <4 x i1> [[XLT0_AND_YGE0]], <4 x float> [[ADD_PI]], <4 x float> [[ATAN]] +; EXPCHECK: [[XLT0_AND_YLT0:%.+]] = and <4 x i1> [[X_LT_0]], [[Y_LT_0]] +; EXPCHECK: [[SELECT_SUB_PI:%.+]] = select <4 x i1> [[XLT0_AND_YLT0]], <4 x float> [[SUB_PI]], <4 x float> [[SELECT_ADD_PI]] +; EXPCHECK: [[XEQ0_AND_YLT0:%.+]] = and <4 x i1> [[X_EQ_0]], [[Y_LT_0]] +; EXPCHECK: [[SELECT_NEGHPI:%.+]] = select <4 x i1> [[XEQ0_AND_YLT0]], <4 x float> , <4 x float> [[SELECT_SUB_PI]] +; EXPCHECK: [[XEQ0_AND_YGE0:%.+]] = and <4 x i1> [[X_EQ_0]], [[Y_GE_0]] +; EXPCHECK: [[SELECT_HPI:%.+]] = select <4 x i1> [[XEQ0_AND_YGE0]], <4 x float> , <4 x float> [[SELECT_NEGHPI]] +; EXPCHECK: ret <4 x float> [[SELECT_HPI]] + +; Scalarization occurs after expansion, so atan scalarization is tested separately. +; Expansion, scalarization and lowering: +; Just make sure this expands to exactly 4 scalar DXIL atan (OpCode=17) calls. +; DOPCHECK-COUNT-4: call float @dx.op.unary.f32(i32 17, float %{{.*}}) +; DOPCHECK-NOT: call float @dx.op.unary.f32(i32 17, + + %elt.atan2 = call <4 x float> @llvm.atan2.v4f32(<4 x float> %y, <4 x float> %x) + ret <4 x float> %elt.atan2 +} + +declare half @llvm.atan2.f16(half, half) +declare float @llvm.atan2.f32(float, float) +declare <4 x float> @llvm.atan2.v4f32(<4 x float>, <4 x float>) diff --git a/llvm/test/CodeGen/DirectX/atan2_error.ll b/llvm/test/CodeGen/DirectX/atan2_error.ll new file mode 100644 index 000000000000000..5b3077f85f5d4ed --- /dev/null +++ b/llvm/test/CodeGen/DirectX/atan2_error.ll @@ -0,0 +1,11 @@ +; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s + +; DXIL operation atan does not support double overload type +; CHECK: in function atan2_double +; CHECK-SAME: Cannot create ATan operation: Invalid overload type + +define noundef double @atan2_double(double noundef %a, double noundef %b) #0 { +entry: + %1 = call double @llvm.atan2.f64(double %a, double %b) + ret double %1 +}