diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def index 532a3ca334b1ae..c4586894e3e490 100644 --- a/llvm/include/llvm/Analysis/VecFuncs.def +++ b/llvm/include/llvm/Analysis/VecFuncs.def @@ -92,7 +92,9 @@ TLI_DEFINE_VECFUNC("llvm.atan.f64", "_simd_atan_d2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("atanf", "_simd_atan_f4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("llvm.atan.f32", "_simd_atan_f4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("atan2", "_simd_atan2_d2", FIXED(2), "_ZGV_LLVM_N2vv") +TLI_DEFINE_VECFUNC("llvm.atan2.f64", "_simd_atan2_d2", FIXED(2), "_ZGV_LLVM_N2vv") TLI_DEFINE_VECFUNC("atan2f", "_simd_atan2_f4", FIXED(4), "_ZGV_LLVM_N4vv") +TLI_DEFINE_VECFUNC("llvm.atan2.f32", "_simd_atan2_f4", FIXED(4), "_ZGV_LLVM_N4vv") TLI_DEFINE_VECFUNC("cos", "_simd_cos_d2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("llvm.cos.f64", "_simd_cos_d2", FIXED(2), "_ZGV_LLVM_N2v") @@ -531,6 +533,7 @@ TLI_DEFINE_VECFUNC("atan", "_ZGVnN2v_atan", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("llvm.atan.f64", "_ZGVnN2v_atan", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("atan2", "_ZGVnN2vv_atan2", FIXED(2), "_ZGV_LLVM_N2vv") +TLI_DEFINE_VECFUNC("llvm.atan2.f64", "_ZGVnN2vv_atan2", FIXED(2), "_ZGV_LLVM_N2vv") TLI_DEFINE_VECFUNC("atanh", "_ZGVnN2v_atanh", FIXED(2), "_ZGV_LLVM_N2v") @@ -635,6 +638,7 @@ TLI_DEFINE_VECFUNC("atanf", "_ZGVnN4v_atanf", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("llvm.atan.f32", "_ZGVnN4v_atanf", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("atan2f", "_ZGVnN4vv_atan2f", FIXED(4), "_ZGV_LLVM_N4vv") +TLI_DEFINE_VECFUNC("llvm.atan2.f32", "_ZGVnN4vv_atan2f", FIXED(4), "_ZGV_LLVM_N4vv") TLI_DEFINE_VECFUNC("atanhf", "_ZGVnN4v_atanhf", FIXED(4), "_ZGV_LLVM_N4v") @@ -748,6 +752,8 @@ TLI_DEFINE_VECFUNC("llvm.atan.f32", "_ZGVsMxv_atanf", SCALABLE(4), MASKED, "_ZGV TLI_DEFINE_VECFUNC("atan2", "_ZGVsMxvv_atan2", SCALABLE(2), MASKED, "_ZGVsMxvv") TLI_DEFINE_VECFUNC("atan2f", "_ZGVsMxvv_atan2f", SCALABLE(4), MASKED, "_ZGVsMxvv") +TLI_DEFINE_VECFUNC("llvm.atan2.f64", "_ZGVsMxvv_atan2", SCALABLE(2), MASKED, "_ZGVsMxvv") +TLI_DEFINE_VECFUNC("llvm.atan2.f32", "_ZGVsMxvv_atan2f", SCALABLE(4), MASKED, "_ZGVsMxvv") TLI_DEFINE_VECFUNC("atanh", "_ZGVsMxv_atanh", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("atanhf", "_ZGVsMxv_atanhf", SCALABLE(4), MASKED, "_ZGVsMxv") @@ -933,6 +939,11 @@ TLI_DEFINE_VECFUNC("atan2f", "armpl_vatan2q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N TLI_DEFINE_VECFUNC("atan2", "armpl_svatan2_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvv") TLI_DEFINE_VECFUNC("atan2f", "armpl_svatan2_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvv") +TLI_DEFINE_VECFUNC("llvm.atan2.f64", "armpl_vatan2q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv") +TLI_DEFINE_VECFUNC("llvm.atan2.f32", "armpl_vatan2q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv") +TLI_DEFINE_VECFUNC("llvm.atan2.f64", "armpl_svatan2_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvv") +TLI_DEFINE_VECFUNC("llvm.atan2.f32", "armpl_svatan2_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvv") + TLI_DEFINE_VECFUNC("atanh", "armpl_vatanhq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("atanhf", "armpl_vatanhq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("atanh", "armpl_svatanh_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 3b2fd95076c465..a20c2aee0ca2da 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -5041,6 +5041,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_FACOS: case G_FASIN: case G_FATAN: + case G_FATAN2: case G_FCOSH: case G_FSINH: case G_FTANH: diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index edd08fe3b2f3cf..1eca238ca3dc8c 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -733,18 +733,19 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Promote); } - for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI, - ISD::FCOS, ISD::FSIN, ISD::FSINCOS, - ISD::FACOS, ISD::FASIN, ISD::FATAN, - ISD::FCOSH, ISD::FSINH, ISD::FTANH, - ISD::FTAN, ISD::FEXP, ISD::FEXP2, - ISD::FEXP10, ISD::FLOG, ISD::FLOG2, - ISD::FLOG10, ISD::STRICT_FREM, ISD::STRICT_FPOW, - ISD::STRICT_FPOWI, ISD::STRICT_FCOS, ISD::STRICT_FSIN, - ISD::STRICT_FACOS, ISD::STRICT_FASIN, ISD::STRICT_FATAN, - ISD::STRICT_FCOSH, ISD::STRICT_FSINH, ISD::STRICT_FTANH, - ISD::STRICT_FEXP, ISD::STRICT_FEXP2, ISD::STRICT_FLOG, - ISD::STRICT_FLOG2, ISD::STRICT_FLOG10, ISD::STRICT_FTAN}) { + for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI, + ISD::FCOS, ISD::FSIN, ISD::FSINCOS, + ISD::FACOS, ISD::FASIN, ISD::FATAN, + ISD::FATAN2, ISD::FCOSH, ISD::FSINH, + ISD::FTANH, ISD::FTAN, ISD::FEXP, + ISD::FEXP2, ISD::FEXP10, ISD::FLOG, + ISD::FLOG2, ISD::FLOG10, ISD::STRICT_FREM, + ISD::STRICT_FPOW, ISD::STRICT_FPOWI, ISD::STRICT_FCOS, + ISD::STRICT_FSIN, ISD::STRICT_FACOS, ISD::STRICT_FASIN, + ISD::STRICT_FATAN, ISD::STRICT_FATAN2, ISD::STRICT_FCOSH, + ISD::STRICT_FSINH, ISD::STRICT_FTANH, ISD::STRICT_FEXP, + ISD::STRICT_FEXP2, ISD::STRICT_FLOG, ISD::STRICT_FLOG2, + ISD::STRICT_FLOG10, ISD::STRICT_FTAN}) { setOperationAction(Op, MVT::f16, Promote); setOperationAction(Op, MVT::v4f16, Expand); setOperationAction(Op, MVT::v8f16, Expand); @@ -1187,7 +1188,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, // silliness like this: // clang-format off for (auto Op : - {ISD::SELECT, ISD::SELECT_CC, + {ISD::SELECT, ISD::SELECT_CC, ISD::FATAN2, ISD::BR_CC, ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV, ISD::FMA, ISD::FNEG, ISD::FABS, ISD::FCEIL, @@ -1646,6 +1647,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FACOS, VT, Expand); setOperationAction(ISD::FASIN, VT, Expand); setOperationAction(ISD::FATAN, VT, Expand); + setOperationAction(ISD::FATAN2, VT, Expand); setOperationAction(ISD::FCOSH, VT, Expand); setOperationAction(ISD::FSINH, VT, Expand); setOperationAction(ISD::FTANH, VT, Expand); @@ -1901,6 +1903,7 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) { setOperationAction(ISD::FASIN, VT, Expand); setOperationAction(ISD::FACOS, VT, Expand); setOperationAction(ISD::FATAN, VT, Expand); + setOperationAction(ISD::FATAN2, VT, Expand); setOperationAction(ISD::FSINH, VT, Expand); setOperationAction(ISD::FCOSH, VT, Expand); setOperationAction(ISD::FTANH, VT, Expand); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index a69894839361bc..0327c4a4eb1d92 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -284,9 +284,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .libcallFor({{s64, s128}}) .minScalarOrElt(1, MinFPScalar); - getActionDefinitionsBuilder( - {G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2, G_FLOG10, G_FTAN, G_FEXP, - G_FEXP2, G_FEXP10, G_FACOS, G_FASIN, G_FATAN, G_FCOSH, G_FSINH, G_FTANH}) + getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2, + G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10, + G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH, + G_FSINH, G_FTANH}) // We need a call for these, so we always need to scalarize. .scalarize(0) // Regardless of FP16 support, widen 16-bit elements to 32-bits. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll index 314c5458e30909..ed7bcff5160f81 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll @@ -2345,6 +2345,14 @@ define float @test_atan_f32(float %x) { ret float %y } +declare float @llvm.atan2.f32(float, float) +define float @test_atan2_f32(float %x, float %y) { + ; CHECK-LABEL: name: test_atan2_f32 + ; CHECK: %{{[0-9]+}}:_(s32) = G_FATAN2 %{{[0-9]+}} + %z = call float @llvm.atan2.f32(float %x, float %y) + ret float %z +} + declare float @llvm.cosh.f32(float) define float @test_cosh_f32(float %x) { ; CHECK-LABEL: name: test_cosh_f32 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-atan2.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-atan2.mir new file mode 100644 index 00000000000000..ac4f4358e98790 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-atan2.mir @@ -0,0 +1,255 @@ +# RUN: llc -verify-machineinstrs -mtriple aarch64--- \ +# RUN: -run-pass=legalizer -mattr=+fullfp16 -global-isel %s -o - \ +# RUN: | FileCheck %s +... +--- +name: test_v4f16.atan2 +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.0: + liveins: $d0, $d1 + ; CHECK-LABEL: name: test_v4f16.atan2 + ; CHECK: [[V1:%[0-9]+]]:_(s16), [[V2:%[0-9]+]]:_(s16), [[V3:%[0-9]+]]:_(s16), [[V4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES %{{[0-9]+}}(<4 x s16>) + ; CHECK: [[V5:%[0-9]+]]:_(s16), [[V6:%[0-9]+]]:_(s16), [[V7:%[0-9]+]]:_(s16), [[V8:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES %{{[0-9]+}}(<4 x s16>) + + ; CHECK-DAG: [[V1_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V1]](s16) + ; CHECK-DAG: [[V5_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V5]](s16) + ; CHECK-NEXT: ADJCALLSTACKDOWN + ; CHECK-NEXT: $s0 = COPY [[V1_S32]](s32) + ; CHECK-NEXT: $s1 = COPY [[V5_S32]](s32) + ; CHECK-NEXT: BL &atan2f + ; CHECK-NEXT: ADJCALLSTACKUP + ; CHECK-NEXT: [[ELT1_S32:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: [[ELT1:%[0-9]+]]:_(s16) = G_FPTRUNC [[ELT1_S32]](s32) + + ; CHECK-DAG: [[V2_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V2]](s16) + ; CHECK-DAG: [[V6_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V6]](s16) + ; CHECK-NEXT: ADJCALLSTACKDOWN + ; CHECK-NEXT: $s0 = COPY [[V2_S32]](s32) + ; CHECK-NEXT: $s1 = COPY [[V6_S32]](s32) + ; CHECK-NEXT: BL &atan2f + ; CHECK-NEXT: ADJCALLSTACKUP + ; CHECK-NEXT: [[ELT2_S32:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: [[ELT2:%[0-9]+]]:_(s16) = G_FPTRUNC [[ELT2_S32]](s32) + + ; CHECK-DAG: [[V3_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V3]](s16) + ; CHECK-DAG: [[V7_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V7]](s16) + ; CHECK-NEXT: ADJCALLSTACKDOWN + ; CHECK-NEXT: $s0 = COPY [[V3_S32]](s32) + ; CHECK-NEXT: $s1 = COPY [[V7_S32]](s32) + ; CHECK-NEXT: BL &atan2f + ; CHECK-NEXT: ADJCALLSTACKUP + ; CHECK-NEXT: [[ELT3_S32:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: [[ELT3:%[0-9]+]]:_(s16) = G_FPTRUNC [[ELT3_S32]](s32) + + ; CHECK-DAG: [[V4_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V4]](s16) + ; CHECK-DAG: [[V8_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V8]](s16) + ; CHECK-NEXT: ADJCALLSTACKDOWN + ; CHECK-NEXT: $s0 = COPY [[V4_S32]](s32) + ; CHECK-NEXT: $s1 = COPY [[V8_S32]](s32) + ; CHECK-NEXT: BL &atan2f + ; CHECK-NEXT: ADJCALLSTACKUP + ; CHECK-NEXT: [[ELT4_S32:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: [[ELT4:%[0-9]+]]:_(s16) = G_FPTRUNC [[ELT4_S32]](s32) + + ; CHECK-DAG: %{{[0-9]+}}:_(<4 x s16>) = G_BUILD_VECTOR [[ELT1]](s16), [[ELT2]](s16), [[ELT3]](s16), [[ELT4]](s16) + + %0:_(<4 x s16>) = COPY $d0 + %1:_(<4 x s16>) = COPY $d1 + %2:_(<4 x s16>) = G_FATAN2 %0, %1 + $d0 = COPY %2(<4 x s16>) + RET_ReallyLR implicit $d0 + +... +--- +name: test_v8f16.atan2 +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.0: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: test_v8f16.atan2 + + ; This is big, so let's just check for the 8 calls to atan2f, the the + ; G_UNMERGE_VALUES, and the G_BUILD_VECTOR. The other instructions ought + ; to be covered by the other tests. + + ; CHECK: G_UNMERGE_VALUES + ; CHECK: BL &atan2f + ; CHECK: BL &atan2f + ; CHECK: BL &atan2f + ; CHECK: BL &atan2f + ; CHECK: BL &atan2f + ; CHECK: BL &atan2f + ; CHECK: BL &atan2f + ; CHECK: BL &atan2f + ; CHECK: G_BUILD_VECTOR + + %0:_(<8 x s16>) = COPY $q0 + %1:_(<8 x s16>) = COPY $q1 + %2:_(<8 x s16>) = G_FATAN2 %0, %1 + $q0 = COPY %2(<8 x s16>) + RET_ReallyLR implicit $q0 + +... +--- +name: test_v2f32.atan2 +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.0: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: test_v2f32.atan2 + ; CHECK: [[V1:%[0-9]+]]:_(s32), [[V2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %{{[0-9]+}}(<2 x s32>) + ; CHECK: [[V3:%[0-9]+]]:_(s32), [[V4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %{{[0-9]+}}(<2 x s32>) + + ; CHECK-NEXT: ADJCALLSTACKDOWN + ; CHECK-DAG: $s0 = COPY [[V1]](s32) + ; CHECK-DAG: $s1 = COPY [[V3]](s32) + ; CHECK: BL &atan2f + ; CHECK: ADJCALLSTACKUP + ; CHECK: [[ELT1:%[0-9]+]]:_(s32) = COPY $s0 + + ; CHECK-NEXT: ADJCALLSTACKDOWN + ; CHECK-DAG: $s0 = COPY [[V2]](s32) + ; CHECK-DAG: $s1 = COPY [[V4]](s32) + ; CHECK: BL &atan2f + ; CHECK: ADJCALLSTACKUP + ; CHECK: [[ELT2:%[0-9]+]]:_(s32) = COPY $s0 + + ; CHECK: %2:_(<2 x s32>) = G_BUILD_VECTOR [[ELT1]](s32), [[ELT2]](s32) + + %0:_(<2 x s32>) = COPY $d0 + %1:_(<2 x s32>) = COPY $d1 + %2:_(<2 x s32>) = G_FATAN2 %0, %1 + $d0 = COPY %2(<2 x s32>) + RET_ReallyLR implicit $d0 + +... +--- +name: test_v4f32.atan2 +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: test_v4f32.atan2 + ; CHECK: [[V1:%[0-9]+]]:_(s32), [[V2:%[0-9]+]]:_(s32), [[V3:%[0-9]+]]:_(s32), [[V4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %{{[0-9]+}}(<4 x s32>) + ; CHECK: [[V5:%[0-9]+]]:_(s32), [[V6:%[0-9]+]]:_(s32), [[V7:%[0-9]+]]:_(s32), [[V8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %{{[0-9]+}}(<4 x s32>) + + ; CHECK-NEXT: ADJCALLSTACKDOWN + ; CHECK-DAG: $s0 = COPY [[V1]](s32) + ; CHECK-DAG: $s1 = COPY [[V5]](s32) + ; CHECK: BL &atan2f + ; CHECK: ADJCALLSTACKUP + ; CHECK-NEXT: [[ELT1:%[0-9]+]]:_(s32) = COPY $s0 + + ; CHECK-NEXT: ADJCALLSTACKDOWN + ; CHECK-DAG: $s0 = COPY [[V2]](s32) + ; CHECK-DAG: $s1 = COPY [[V6]](s32) + ; CHECK: BL &atan2f + ; CHECK: ADJCALLSTACKUP + ; CHECK-NEXT: [[ELT2:%[0-9]+]]:_(s32) = COPY $s0 + + ; CHECK-NEXT: ADJCALLSTACKDOWN + ; CHECK-DAG: $s0 = COPY [[V3]](s32) + ; CHECK-DAG: $s1 = COPY [[V7]](s32) + ; CHECK: BL &atan2f + ; CHECK: ADJCALLSTACKUP + ; CHECK-NEXT: [[ELT3:%[0-9]+]]:_(s32) = COPY $s0 + + ; CHECK-NEXT: ADJCALLSTACKDOWN + ; CHECK-DAG: $s0 = COPY [[V4]](s32) + ; CHECK-DAG: $s1 = COPY [[V8]](s32) + ; CHECK: BL &atan2f + ; CHECK: ADJCALLSTACKUP + ; CHECK-NEXT: [[ELT4:%[0-9]+]]:_(s32) = COPY $s0 + + ; CHECK: %2:_(<4 x s32>) = G_BUILD_VECTOR [[ELT1]](s32), [[ELT2]](s32), [[ELT3]](s32), [[ELT4]](s32) + + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = COPY $q1 + %2:_(<4 x s32>) = G_FATAN2 %0, %1 + $q0 = COPY %2(<4 x s32>) + RET_ReallyLR implicit $q0 + +... +--- +name: test_v2f64.atan2 +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.0: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: test_v2f64.atan2 + ; CHECK: [[V1:%[0-9]+]]:_(s64), [[V2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %{{[0-9]+}}(<2 x s64>) + ; CHECK: [[V3:%[0-9]+]]:_(s64), [[V4:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %{{[0-9]+}}(<2 x s64>) + + ; CHECK-NEXT: ADJCALLSTACKDOWN + ; CHECK-DAG: $d0 = COPY [[V1]](s64) + ; CHECK-DAG: $d1 = COPY [[V3]](s64) + ; CHECK: BL &atan2 + ; CHECK: ADJCALLSTACKUP + ; CHECK-NEXT: [[ELT1:%[0-9]+]]:_(s64) = COPY $d0 + + ; CHECK-NEXT: ADJCALLSTACKDOWN + ; CHECK-DAG: $d0 = COPY [[V2]](s64) + ; CHECK-DAG: $d1 = COPY [[V4]](s64) + ; CHECK: BL &atan2 + ; CHECK: ADJCALLSTACKUP + ; CHECK-NEXT: [[ELT2:%[0-9]+]]:_(s64) = COPY $d0 + + ; CHECK: %2:_(<2 x s64>) = G_BUILD_VECTOR [[ELT1]](s64), [[ELT2]](s64) + + %0:_(<2 x s64>) = COPY $q0 + %1:_(<2 x s64>) = COPY $q1 + %2:_(<2 x s64>) = G_FATAN2 %0, %1 + $q0 = COPY %2(<2 x s64>) + RET_ReallyLR implicit $q0 + +... +--- +name: test_atan2_half +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.0: + liveins: $h0, $h1 + ; CHECK-LABEL: name: test_atan2_half + ; CHECK: [[REG1:%[0-9]+]]:_(s32) = G_FPEXT %0(s16) + ; CHECK: [[REG2:%[0-9]+]]:_(s32) = G_FPEXT %1(s16) + ; CHECK-NEXT: ADJCALLSTACKDOWN + ; CHECK-NEXT: $s0 = COPY [[REG1]](s32) + ; CHECK-NEXT: $s1 = COPY [[REG2]](s32) + ; CHECK-NEXT: BL &atan2f + ; CHECK: ADJCALLSTACKUP + ; CHECK-NEXT: [[REG2:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s16) = G_FPTRUNC [[REG2]](s32) + + %0:_(s16) = COPY $h0 + %1:_(s16) = COPY $h1 + %2:_(s16) = G_FATAN2 %0, %1 + $h0 = COPY %2(s16) + RET_ReallyLR implicit $h0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index a21b786a2bae97..93c95aa2f695d6 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -718,8 +718,9 @@ # DEBUG-NEXT: .. the first uncovered type index: 1, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_FATAN2 (opcode {{[0-9]+}}): 1 type index, 0 imm indices -# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined -# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} +# DEBUG-NEXT: .. the first uncovered type index: 1, OK +# DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_FCOSH (opcode {{[0-9]+}}): 1 type index, 0 imm indices # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. the first uncovered type index: 1, OK diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll index e058c83f274f14..5460a376931a55 100644 --- a/llvm/test/CodeGen/AArch64/f16-instructions.ll +++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll @@ -1114,6 +1114,7 @@ declare half @llvm.tan.f16(half %a) #0 declare half @llvm.asin.f16(half %a) #0 declare half @llvm.acos.f16(half %a) #0 declare half @llvm.atan.f16(half %a) #0 +declare half @llvm.atan2.f16(half %a, half %b) #0 declare half @llvm.sinh.f16(half %a) #0 declare half @llvm.cosh.f16(half %a) #0 declare half @llvm.tanh.f16(half %a) #0 @@ -1246,6 +1247,11 @@ define half @test_atan(half %a) #0 { ret half %r } +define half @test_atan2(half %a, half %b) #0 { + %r = call half @llvm.atan2.f16(half %a, half %b) + ret half %r +} + define half @test_cosh(half %a) #0 { ; CHECK-LABEL: test_cosh: ; CHECK: // %bb.0: diff --git a/llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll index cbdfb4c9327756..3aeefab52c6fa3 100644 --- a/llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll +++ b/llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll @@ -398,6 +398,22 @@ define half @atan_f16(half %x) #0 { ret half %val } +define half @atan2_f16(half %x, half %y) #0 { +; CHECK-LABEL: atan2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: fcvt s1, h1 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl atan2f +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %val = call half @llvm.experimental.constrained.atan2.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + define half @sinh_f16(half %x) #0 { ; CHECK-LABEL: sinh_f16: ; CHECK: // %bb.0: diff --git a/llvm/test/CodeGen/AArch64/fp-intrinsics.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics.ll index 62b4a79b26d8e7..1664fa3ce56ae6 100644 --- a/llvm/test/CodeGen/AArch64/fp-intrinsics.ll +++ b/llvm/test/CodeGen/AArch64/fp-intrinsics.ll @@ -174,6 +174,13 @@ define float @atan_f32(float %x) #0 { ret float %val } +; CHECK-LABEL: atan2_f32: +; CHECK: bl atan2f +define float @atan2_f32(float %x, float %y) #0 { + %val = call float @llvm.experimental.constrained.atan2.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + ; CHECK-LABEL: sinh_f32: ; CHECK: bl sinhf define float @sinh_f32(float %x) #0 { @@ -707,6 +714,13 @@ define double @atan_f64(double %x) #0 { ret double %val } +; CHECK-LABEL: atan2_f64: +; CHECK: bl atan2 +define double @atan2_f64(double %x, double %y) #0 { + %val = call double @llvm.experimental.constrained.atan2.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + ; CHECK-LABEL: sinh_f64: ; CHECK: bl sinh define double @sinh_f64(double %x) #0 { @@ -1240,6 +1254,13 @@ define fp128 @atan_f128(fp128 %x) #0 { ret fp128 %val } +; CHECK-LABEL: atan2_f128: +; CHECK: bl atan2l +define fp128 @atan2_f128(fp128 %x, fp128 %y) #0 { + %val = call fp128 @llvm.experimental.constrained.atan2.f128(fp128 %x, fp128 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret fp128 %val +} + ; CHECK-LABEL: sinh_f128: ; CHECK: bl sinhl define fp128 @sinh_f128(fp128 %x) #0 { @@ -1666,6 +1687,13 @@ define <1 x double> @atan_v1f64(<1 x double> %x, <1 x double> %y) #0 { ret <1 x double> %val } +; CHECK-LABEL: atan2_v1f64: +; CHECK: bl atan2 +define <1 x double> @atan2_v1f64(<1 x double> %x, <1 x double> %y) #0 { + %val = call <1 x double> @llvm.experimental.constrained.atan2.v1f64(<1 x double> %x, <1 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <1 x double> %val +} + ; CHECK-LABEL: sinh_v1f64: ; CHECK: bl sinh define <1 x double> @sinh_v1f64(<1 x double> %x, <1 x double> %y) #0 { @@ -1755,6 +1783,7 @@ declare float @llvm.experimental.constrained.tan.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.asin.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.acos.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.atan.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.atan2.f32(float, float, metadata, metadata) declare float @llvm.experimental.constrained.sinh.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.cosh.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.tanh.f32(float, metadata, metadata) @@ -1806,6 +1835,7 @@ declare double @llvm.experimental.constrained.tan.f64(double, metadata, metadata declare double @llvm.experimental.constrained.asin.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.acos.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.atan.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.atan2.f64(double, double, metadata, metadata) declare double @llvm.experimental.constrained.sinh.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.cosh.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.tanh.f64(double, metadata, metadata) @@ -1857,6 +1887,7 @@ declare fp128 @llvm.experimental.constrained.tan.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.asin.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.acos.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.atan.f128(fp128, metadata, metadata) +declare fp128 @llvm.experimental.constrained.atan2.f128(fp128, fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.sinh.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.cosh.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.tanh.f128(fp128, metadata, metadata) diff --git a/llvm/test/CodeGen/AArch64/illegal-float-ops.ll b/llvm/test/CodeGen/AArch64/illegal-float-ops.ll index 08f6bb6f28532c..5ec66b784c037e 100644 --- a/llvm/test/CodeGen/AArch64/illegal-float-ops.ll +++ b/llvm/test/CodeGen/AArch64/illegal-float-ops.ll @@ -243,6 +243,27 @@ define void @test_atan(float %float, double %double, fp128 %fp128) { ret void } +declare float @llvm.atan2.f32(float, float) +declare double @llvm.atan2.f64(double, double) +declare fp128 @llvm.atan2.f128(fp128, fp128) + +define void @test_atan2(float %float1, double %double1, fp128 %fp1281, float %float2, double %double2, fp128 %fp1282) { +; CHECK-LABEL: test_atan2: + + %atan2float = call float @llvm.atan2.f32(float %float1, float %float2) + store float %atan2float, ptr @varfloat +; CHECK: bl atan2f + + %atan2double = call double @llvm.atan2.f64(double %double1, double %double2) + store double %atan2double, ptr @vardouble +; CHECK: bl atan2 + + %atan2fp128 = call fp128 @llvm.atan2.f128(fp128 %fp1281, fp128 %fp1282) + store fp128 %atan2fp128, ptr @varfp128 +; CHECK: bl atan2l + ret void +} + declare float @llvm.cosh.f32(float) declare double @llvm.cosh.f64(double) declare fp128 @llvm.cosh.f128(fp128) diff --git a/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll b/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll index 7b173bda561553..26fb4ca602da17 100644 --- a/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll +++ b/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll @@ -15,7 +15,7 @@ declare @llvm.cos.nxv2f64() declare @llvm.cos.nxv4f32() ;. -; CHECK: @llvm.compiler.used = appending global [64 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_svcos_f64_x, ptr @armpl_svcos_f32_x, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_svexp_f64_x, ptr @armpl_svexp_f32_x, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_svexp10_f64_x, ptr @armpl_svexp10_f32_x, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_svexp2_f64_x, ptr @armpl_svexp2_f32_x, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_svlog_f64_x, ptr @armpl_svlog_f32_x, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32, ptr @armpl_svlog10_f64_x, ptr @armpl_svlog10_f32_x, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_svlog2_f64_x, ptr @armpl_svlog2_f32_x, ptr @armpl_vpowq_f64, ptr @armpl_vpowq_f32, ptr @armpl_svpow_f64_x, ptr @armpl_svpow_f32_x, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_svsin_f64_x, ptr @armpl_svsin_f32_x, ptr @armpl_vtanq_f64, ptr @armpl_vtanq_f32, ptr @armpl_svtan_f64_x, ptr @armpl_svtan_f32_x, ptr @armpl_vacosq_f64, ptr @armpl_vacosq_f32, ptr @armpl_svacos_f64_x, ptr @armpl_svacos_f32_x, ptr @armpl_vasinq_f64, ptr @armpl_vasinq_f32, ptr @armpl_svasin_f64_x, ptr @armpl_svasin_f32_x, ptr @armpl_vatanq_f64, ptr @armpl_vatanq_f32, ptr @armpl_svatan_f64_x, ptr @armpl_svatan_f32_x, ptr @armpl_vcoshq_f64, ptr @armpl_vcoshq_f32, ptr @armpl_svcosh_f64_x, ptr @armpl_svcosh_f32_x, ptr @armpl_vsinhq_f64, ptr @armpl_vsinhq_f32, ptr @armpl_svsinh_f64_x, ptr @armpl_svsinh_f32_x, ptr @armpl_vtanhq_f64, ptr @armpl_vtanhq_f32, ptr @armpl_svtanh_f64_x, ptr @armpl_svtanh_f32_x], section "llvm.metadata" +; CHECK: @llvm.compiler.used = appending global [68 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_svcos_f64_x, ptr @armpl_svcos_f32_x, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_svexp_f64_x, ptr @armpl_svexp_f32_x, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_svexp10_f64_x, ptr @armpl_svexp10_f32_x, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_svexp2_f64_x, ptr @armpl_svexp2_f32_x, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_svlog_f64_x, ptr @armpl_svlog_f32_x, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32, ptr @armpl_svlog10_f64_x, ptr @armpl_svlog10_f32_x, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_svlog2_f64_x, ptr @armpl_svlog2_f32_x, ptr @armpl_vpowq_f64, ptr @armpl_vpowq_f32, ptr @armpl_svpow_f64_x, ptr @armpl_svpow_f32_x, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_svsin_f64_x, ptr @armpl_svsin_f32_x, ptr @armpl_vtanq_f64, ptr @armpl_vtanq_f32, ptr @armpl_svtan_f64_x, ptr @armpl_svtan_f32_x, ptr @armpl_vacosq_f64, ptr @armpl_vacosq_f32, ptr @armpl_svacos_f64_x, ptr @armpl_svacos_f32_x, ptr @armpl_vasinq_f64, ptr @armpl_vasinq_f32, ptr @armpl_svasin_f64_x, ptr @armpl_svasin_f32_x, ptr @armpl_vatanq_f64, ptr @armpl_vatanq_f32, ptr @armpl_svatan_f64_x, ptr @armpl_svatan_f32_x, ptr @armpl_vatan2q_f64, ptr @armpl_vatan2q_f32, ptr @armpl_svatan2_f64_x, ptr @armpl_svatan2_f32_x, ptr @armpl_vcoshq_f64, ptr @armpl_vcoshq_f32, ptr @armpl_svcosh_f64_x, ptr @armpl_svcosh_f32_x, ptr @armpl_vsinhq_f64, ptr @armpl_vsinhq_f32, ptr @armpl_svsinh_f64_x, ptr @armpl_svsinh_f32_x, ptr @armpl_vtanhq_f64, ptr @armpl_vtanhq_f32, ptr @armpl_svtanh_f64_x, ptr @armpl_svtanh_f32_x], section "llvm.metadata" ;. define <2 x double> @llvm_cos_f64(<2 x double> %in) { @@ -598,6 +598,51 @@ define @llvm_atan_vscale_f32( %in) #0 { ret %1 } +declare <2 x double> @llvm.atan2.v2f64(<2 x double>, <2 x double>) +declare <4 x float> @llvm.atan2.v4f32(<4 x float>, <4 x float>) +declare @llvm.atan2.nxv2f64(, ) +declare @llvm.atan2.nxv4f32(, ) + +define <2 x double> @llvm_atan2_f64(<2 x double> %in1, <2 x double> %in2) { +; CHECK-LABEL: define <2 x double> @llvm_atan2_f64 +; CHECK-SAME: (<2 x double> [[IN1:%.*]], <2 x double> [[IN2:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @armpl_vatan2q_f64(<2 x double> [[IN1]], <2 x double> [[IN2]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = call fast <2 x double> @llvm.atan2.v2f64(<2 x double> %in1, <2 x double> %in2) + ret <2 x double> %1 +} + +define <4 x float> @llvm_atan2_f32(<4 x float> %in1, <4 x float> %in2) { +; CHECK-LABEL: define <4 x float> @llvm_atan2_f32 +; CHECK-SAME: (<4 x float> [[IN1:%.*]], <4 x float> [[IN2:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @armpl_vatan2q_f32(<4 x float> [[IN1]], <4 x float> [[IN2]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = call fast <4 x float> @llvm.atan2.v4f32(<4 x float> %in1, <4 x float> %in2) + ret <4 x float> %1 +} + +define @llvm_atan2_vscale_f64( %in1, %in2) #0 { +; CHECK-LABEL: define @llvm_atan2_vscale_f64 +; CHECK-SAME: ( [[IN1:%.*]], [[IN2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svatan2_f64_x( [[IN1]], [[IN2]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.atan2.nxv2f64( %in1, %in2) + ret %1 +} + +define @llvm_atan2_vscale_f32( %in1, %in2) #0 { +; CHECK-LABEL: define @llvm_atan2_vscale_f32 +; CHECK-SAME: ( [[IN1:%.*]], [[IN2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svatan2_f32_x( [[IN1]], [[IN2]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.atan2.nxv4f32( %in1, %in2) + ret %1 +} + declare <2 x double> @llvm.cosh.v2f64(<2 x double>) declare <4 x float> @llvm.cosh.v4f32(<4 x float>) declare @llvm.cosh.nxv2f64()