Skip to content

Commit

Permalink
[aarch64] atan2 intrinsic lowering (p5)
Browse files Browse the repository at this point in the history
This change is part of this proposal: https://discourse.llvm.org/t/rfc-all-the-math-intrinsics/78294

- `VecFuncs.def`: define intrinsic to sleef/armpl mapping
- `LegalizerHelper.cpp`: add missing fewerElementsVector handling for the new atan2 intrinsic
- `AArch64ISelLowering.cpp`: Add arch64 specializations for lowering like neon instructions
- `AArch64LegalizerInfo.cpp`: Legalize atan2.

Part 5 for Implement the atan2 HLSL Function llvm#70096.
  • Loading branch information
tex3d committed Oct 11, 2024
1 parent de9269f commit 1036be3
Show file tree
Hide file tree
Showing 12 changed files with 418 additions and 19 deletions.
11 changes: 11 additions & 0 deletions llvm/include/llvm/Analysis/VecFuncs.def
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,9 @@ TLI_DEFINE_VECFUNC("llvm.atan.f64", "_simd_atan_d2", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("atanf", "_simd_atan_f4", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.atan.f32", "_simd_atan_f4", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("atan2", "_simd_atan2_d2", FIXED(2), "_ZGV_LLVM_N2vv")
TLI_DEFINE_VECFUNC("llvm.atan2.f64", "_simd_atan2_d2", FIXED(2), "_ZGV_LLVM_N2vv")
TLI_DEFINE_VECFUNC("atan2f", "_simd_atan2_f4", FIXED(4), "_ZGV_LLVM_N4vv")
TLI_DEFINE_VECFUNC("llvm.atan2.f32", "_simd_atan2_f4", FIXED(4), "_ZGV_LLVM_N4vv")

TLI_DEFINE_VECFUNC("cos", "_simd_cos_d2", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("llvm.cos.f64", "_simd_cos_d2", FIXED(2), "_ZGV_LLVM_N2v")
Expand Down Expand Up @@ -531,6 +533,7 @@ TLI_DEFINE_VECFUNC("atan", "_ZGVnN2v_atan", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("llvm.atan.f64", "_ZGVnN2v_atan", FIXED(2), "_ZGV_LLVM_N2v")

TLI_DEFINE_VECFUNC("atan2", "_ZGVnN2vv_atan2", FIXED(2), "_ZGV_LLVM_N2vv")
TLI_DEFINE_VECFUNC("llvm.atan2.f64", "_ZGVnN2vv_atan2", FIXED(2), "_ZGV_LLVM_N2vv")

TLI_DEFINE_VECFUNC("atanh", "_ZGVnN2v_atanh", FIXED(2), "_ZGV_LLVM_N2v")

Expand Down Expand Up @@ -635,6 +638,7 @@ TLI_DEFINE_VECFUNC("atanf", "_ZGVnN4v_atanf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.atan.f32", "_ZGVnN4v_atanf", FIXED(4), "_ZGV_LLVM_N4v")

TLI_DEFINE_VECFUNC("atan2f", "_ZGVnN4vv_atan2f", FIXED(4), "_ZGV_LLVM_N4vv")
TLI_DEFINE_VECFUNC("llvm.atan2.f32", "_ZGVnN4vv_atan2f", FIXED(4), "_ZGV_LLVM_N4vv")

TLI_DEFINE_VECFUNC("atanhf", "_ZGVnN4v_atanhf", FIXED(4), "_ZGV_LLVM_N4v")

Expand Down Expand Up @@ -748,6 +752,8 @@ TLI_DEFINE_VECFUNC("llvm.atan.f32", "_ZGVsMxv_atanf", SCALABLE(4), MASKED, "_ZGV

TLI_DEFINE_VECFUNC("atan2", "_ZGVsMxvv_atan2", SCALABLE(2), MASKED, "_ZGVsMxvv")
TLI_DEFINE_VECFUNC("atan2f", "_ZGVsMxvv_atan2f", SCALABLE(4), MASKED, "_ZGVsMxvv")
TLI_DEFINE_VECFUNC("llvm.atan2.f64", "_ZGVsMxvv_atan2", SCALABLE(2), MASKED, "_ZGVsMxvv")
TLI_DEFINE_VECFUNC("llvm.atan2.f32", "_ZGVsMxvv_atan2f", SCALABLE(4), MASKED, "_ZGVsMxvv")

TLI_DEFINE_VECFUNC("atanh", "_ZGVsMxv_atanh", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("atanhf", "_ZGVsMxv_atanhf", SCALABLE(4), MASKED, "_ZGVsMxv")
Expand Down Expand Up @@ -933,6 +939,11 @@ TLI_DEFINE_VECFUNC("atan2f", "armpl_vatan2q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N
TLI_DEFINE_VECFUNC("atan2", "armpl_svatan2_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvv")
TLI_DEFINE_VECFUNC("atan2f", "armpl_svatan2_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvv")

TLI_DEFINE_VECFUNC("llvm.atan2.f64", "armpl_vatan2q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv")
TLI_DEFINE_VECFUNC("llvm.atan2.f32", "armpl_vatan2q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv")
TLI_DEFINE_VECFUNC("llvm.atan2.f64", "armpl_svatan2_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvv")
TLI_DEFINE_VECFUNC("llvm.atan2.f32", "armpl_svatan2_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvv")

TLI_DEFINE_VECFUNC("atanh", "armpl_vatanhq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("atanhf", "armpl_vatanhq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("atanh", "armpl_svatanh_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5041,6 +5041,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_FACOS:
case G_FASIN:
case G_FATAN:
case G_FATAN2:
case G_FCOSH:
case G_FSINH:
case G_FTANH:
Expand Down
29 changes: 16 additions & 13 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -733,18 +733,19 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Promote);
}

for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI,
ISD::FCOS, ISD::FSIN, ISD::FSINCOS,
ISD::FACOS, ISD::FASIN, ISD::FATAN,
ISD::FCOSH, ISD::FSINH, ISD::FTANH,
ISD::FTAN, ISD::FEXP, ISD::FEXP2,
ISD::FEXP10, ISD::FLOG, ISD::FLOG2,
ISD::FLOG10, ISD::STRICT_FREM, ISD::STRICT_FPOW,
ISD::STRICT_FPOWI, ISD::STRICT_FCOS, ISD::STRICT_FSIN,
ISD::STRICT_FACOS, ISD::STRICT_FASIN, ISD::STRICT_FATAN,
ISD::STRICT_FCOSH, ISD::STRICT_FSINH, ISD::STRICT_FTANH,
ISD::STRICT_FEXP, ISD::STRICT_FEXP2, ISD::STRICT_FLOG,
ISD::STRICT_FLOG2, ISD::STRICT_FLOG10, ISD::STRICT_FTAN}) {
for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI,
ISD::FCOS, ISD::FSIN, ISD::FSINCOS,
ISD::FACOS, ISD::FASIN, ISD::FATAN,
ISD::FATAN2, ISD::FCOSH, ISD::FSINH,
ISD::FTANH, ISD::FTAN, ISD::FEXP,
ISD::FEXP2, ISD::FEXP10, ISD::FLOG,
ISD::FLOG2, ISD::FLOG10, ISD::STRICT_FREM,
ISD::STRICT_FPOW, ISD::STRICT_FPOWI, ISD::STRICT_FCOS,
ISD::STRICT_FSIN, ISD::STRICT_FACOS, ISD::STRICT_FASIN,
ISD::STRICT_FATAN, ISD::STRICT_FATAN2, ISD::STRICT_FCOSH,
ISD::STRICT_FSINH, ISD::STRICT_FTANH, ISD::STRICT_FEXP,
ISD::STRICT_FEXP2, ISD::STRICT_FLOG, ISD::STRICT_FLOG2,
ISD::STRICT_FLOG10, ISD::STRICT_FTAN}) {
setOperationAction(Op, MVT::f16, Promote);
setOperationAction(Op, MVT::v4f16, Expand);
setOperationAction(Op, MVT::v8f16, Expand);
Expand Down Expand Up @@ -1187,7 +1188,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// silliness like this:
// clang-format off
for (auto Op :
{ISD::SELECT, ISD::SELECT_CC,
{ISD::SELECT, ISD::SELECT_CC, ISD::FATAN2,
ISD::BR_CC, ISD::FADD, ISD::FSUB,
ISD::FMUL, ISD::FDIV, ISD::FMA,
ISD::FNEG, ISD::FABS, ISD::FCEIL,
Expand Down Expand Up @@ -1646,6 +1647,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FACOS, VT, Expand);
setOperationAction(ISD::FASIN, VT, Expand);
setOperationAction(ISD::FATAN, VT, Expand);
setOperationAction(ISD::FATAN2, VT, Expand);
setOperationAction(ISD::FCOSH, VT, Expand);
setOperationAction(ISD::FSINH, VT, Expand);
setOperationAction(ISD::FTANH, VT, Expand);
Expand Down Expand Up @@ -1901,6 +1903,7 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
setOperationAction(ISD::FASIN, VT, Expand);
setOperationAction(ISD::FACOS, VT, Expand);
setOperationAction(ISD::FATAN, VT, Expand);
setOperationAction(ISD::FATAN2, VT, Expand);
setOperationAction(ISD::FSINH, VT, Expand);
setOperationAction(ISD::FCOSH, VT, Expand);
setOperationAction(ISD::FTANH, VT, Expand);
Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -284,9 +284,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.libcallFor({{s64, s128}})
.minScalarOrElt(1, MinFPScalar);

getActionDefinitionsBuilder(
{G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2, G_FLOG10, G_FTAN, G_FEXP,
G_FEXP2, G_FEXP10, G_FACOS, G_FASIN, G_FATAN, G_FCOSH, G_FSINH, G_FTANH})
getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2,
G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10,
G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH,
G_FSINH, G_FTANH})
// We need a call for these, so we always need to scalarize.
.scalarize(0)
// Regardless of FP16 support, widen 16-bit elements to 32-bits.
Expand Down
8 changes: 8 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2345,6 +2345,14 @@ define float @test_atan_f32(float %x) {
ret float %y
}

declare float @llvm.atan2.f32(float, float)
define float @test_atan2_f32(float %x, float %y) {
; CHECK-LABEL: name: test_atan2_f32
; CHECK: %{{[0-9]+}}:_(s32) = G_FATAN2 %{{[0-9]+}}
%z = call float @llvm.atan2.f32(float %x, float %y)
ret float %z
}

declare float @llvm.cosh.f32(float)
define float @test_cosh_f32(float %x) {
; CHECK-LABEL: name: test_cosh_f32
Expand Down
255 changes: 255 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/legalize-atan2.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,255 @@
# RUN: llc -verify-machineinstrs -mtriple aarch64--- \
# RUN: -run-pass=legalizer -mattr=+fullfp16 -global-isel %s -o - \
# RUN: | FileCheck %s
...
---
name: test_v4f16.atan2
alignment: 4
tracksRegLiveness: true
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
body: |
bb.0:
liveins: $d0, $d1
; CHECK-LABEL: name: test_v4f16.atan2
; CHECK: [[V1:%[0-9]+]]:_(s16), [[V2:%[0-9]+]]:_(s16), [[V3:%[0-9]+]]:_(s16), [[V4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES %{{[0-9]+}}(<4 x s16>)
; CHECK: [[V5:%[0-9]+]]:_(s16), [[V6:%[0-9]+]]:_(s16), [[V7:%[0-9]+]]:_(s16), [[V8:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES %{{[0-9]+}}(<4 x s16>)
; CHECK-DAG: [[V1_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V1]](s16)
; CHECK-DAG: [[V5_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V5]](s16)
; CHECK-NEXT: ADJCALLSTACKDOWN
; CHECK-NEXT: $s0 = COPY [[V1_S32]](s32)
; CHECK-NEXT: $s1 = COPY [[V5_S32]](s32)
; CHECK-NEXT: BL &atan2f
; CHECK-NEXT: ADJCALLSTACKUP
; CHECK-NEXT: [[ELT1_S32:%[0-9]+]]:_(s32) = COPY $s0
; CHECK-NEXT: [[ELT1:%[0-9]+]]:_(s16) = G_FPTRUNC [[ELT1_S32]](s32)
; CHECK-DAG: [[V2_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V2]](s16)
; CHECK-DAG: [[V6_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V6]](s16)
; CHECK-NEXT: ADJCALLSTACKDOWN
; CHECK-NEXT: $s0 = COPY [[V2_S32]](s32)
; CHECK-NEXT: $s1 = COPY [[V6_S32]](s32)
; CHECK-NEXT: BL &atan2f
; CHECK-NEXT: ADJCALLSTACKUP
; CHECK-NEXT: [[ELT2_S32:%[0-9]+]]:_(s32) = COPY $s0
; CHECK-NEXT: [[ELT2:%[0-9]+]]:_(s16) = G_FPTRUNC [[ELT2_S32]](s32)
; CHECK-DAG: [[V3_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V3]](s16)
; CHECK-DAG: [[V7_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V7]](s16)
; CHECK-NEXT: ADJCALLSTACKDOWN
; CHECK-NEXT: $s0 = COPY [[V3_S32]](s32)
; CHECK-NEXT: $s1 = COPY [[V7_S32]](s32)
; CHECK-NEXT: BL &atan2f
; CHECK-NEXT: ADJCALLSTACKUP
; CHECK-NEXT: [[ELT3_S32:%[0-9]+]]:_(s32) = COPY $s0
; CHECK-NEXT: [[ELT3:%[0-9]+]]:_(s16) = G_FPTRUNC [[ELT3_S32]](s32)
; CHECK-DAG: [[V4_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V4]](s16)
; CHECK-DAG: [[V8_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V8]](s16)
; CHECK-NEXT: ADJCALLSTACKDOWN
; CHECK-NEXT: $s0 = COPY [[V4_S32]](s32)
; CHECK-NEXT: $s1 = COPY [[V8_S32]](s32)
; CHECK-NEXT: BL &atan2f
; CHECK-NEXT: ADJCALLSTACKUP
; CHECK-NEXT: [[ELT4_S32:%[0-9]+]]:_(s32) = COPY $s0
; CHECK-NEXT: [[ELT4:%[0-9]+]]:_(s16) = G_FPTRUNC [[ELT4_S32]](s32)
; CHECK-DAG: %{{[0-9]+}}:_(<4 x s16>) = G_BUILD_VECTOR [[ELT1]](s16), [[ELT2]](s16), [[ELT3]](s16), [[ELT4]](s16)
%0:_(<4 x s16>) = COPY $d0
%1:_(<4 x s16>) = COPY $d1
%2:_(<4 x s16>) = G_FATAN2 %0, %1
$d0 = COPY %2(<4 x s16>)
RET_ReallyLR implicit $d0
...
---
name: test_v8f16.atan2
alignment: 4
tracksRegLiveness: true
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
body: |
bb.0:
liveins: $q0, $q1
; CHECK-LABEL: name: test_v8f16.atan2
; This is big, so let's just check for the 8 calls to atan2f, the the
; G_UNMERGE_VALUES, and the G_BUILD_VECTOR. The other instructions ought
; to be covered by the other tests.
; CHECK: G_UNMERGE_VALUES
; CHECK: BL &atan2f
; CHECK: BL &atan2f
; CHECK: BL &atan2f
; CHECK: BL &atan2f
; CHECK: BL &atan2f
; CHECK: BL &atan2f
; CHECK: BL &atan2f
; CHECK: BL &atan2f
; CHECK: G_BUILD_VECTOR
%0:_(<8 x s16>) = COPY $q0
%1:_(<8 x s16>) = COPY $q1
%2:_(<8 x s16>) = G_FATAN2 %0, %1
$q0 = COPY %2(<8 x s16>)
RET_ReallyLR implicit $q0
...
---
name: test_v2f32.atan2
alignment: 4
tracksRegLiveness: true
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
body: |
bb.0:
liveins: $d0, $d1
; CHECK-LABEL: name: test_v2f32.atan2
; CHECK: [[V1:%[0-9]+]]:_(s32), [[V2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %{{[0-9]+}}(<2 x s32>)
; CHECK: [[V3:%[0-9]+]]:_(s32), [[V4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %{{[0-9]+}}(<2 x s32>)
; CHECK-NEXT: ADJCALLSTACKDOWN
; CHECK-DAG: $s0 = COPY [[V1]](s32)
; CHECK-DAG: $s1 = COPY [[V3]](s32)
; CHECK: BL &atan2f
; CHECK: ADJCALLSTACKUP
; CHECK: [[ELT1:%[0-9]+]]:_(s32) = COPY $s0
; CHECK-NEXT: ADJCALLSTACKDOWN
; CHECK-DAG: $s0 = COPY [[V2]](s32)
; CHECK-DAG: $s1 = COPY [[V4]](s32)
; CHECK: BL &atan2f
; CHECK: ADJCALLSTACKUP
; CHECK: [[ELT2:%[0-9]+]]:_(s32) = COPY $s0
; CHECK: %2:_(<2 x s32>) = G_BUILD_VECTOR [[ELT1]](s32), [[ELT2]](s32)
%0:_(<2 x s32>) = COPY $d0
%1:_(<2 x s32>) = COPY $d1
%2:_(<2 x s32>) = G_FATAN2 %0, %1
$d0 = COPY %2(<2 x s32>)
RET_ReallyLR implicit $d0
...
---
name: test_v4f32.atan2
alignment: 4
tracksRegLiveness: true
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
body: |
bb.0:
liveins: $q0, $q1
; CHECK-LABEL: name: test_v4f32.atan2
; CHECK: [[V1:%[0-9]+]]:_(s32), [[V2:%[0-9]+]]:_(s32), [[V3:%[0-9]+]]:_(s32), [[V4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %{{[0-9]+}}(<4 x s32>)
; CHECK: [[V5:%[0-9]+]]:_(s32), [[V6:%[0-9]+]]:_(s32), [[V7:%[0-9]+]]:_(s32), [[V8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %{{[0-9]+}}(<4 x s32>)
; CHECK-NEXT: ADJCALLSTACKDOWN
; CHECK-DAG: $s0 = COPY [[V1]](s32)
; CHECK-DAG: $s1 = COPY [[V5]](s32)
; CHECK: BL &atan2f
; CHECK: ADJCALLSTACKUP
; CHECK-NEXT: [[ELT1:%[0-9]+]]:_(s32) = COPY $s0
; CHECK-NEXT: ADJCALLSTACKDOWN
; CHECK-DAG: $s0 = COPY [[V2]](s32)
; CHECK-DAG: $s1 = COPY [[V6]](s32)
; CHECK: BL &atan2f
; CHECK: ADJCALLSTACKUP
; CHECK-NEXT: [[ELT2:%[0-9]+]]:_(s32) = COPY $s0
; CHECK-NEXT: ADJCALLSTACKDOWN
; CHECK-DAG: $s0 = COPY [[V3]](s32)
; CHECK-DAG: $s1 = COPY [[V7]](s32)
; CHECK: BL &atan2f
; CHECK: ADJCALLSTACKUP
; CHECK-NEXT: [[ELT3:%[0-9]+]]:_(s32) = COPY $s0
; CHECK-NEXT: ADJCALLSTACKDOWN
; CHECK-DAG: $s0 = COPY [[V4]](s32)
; CHECK-DAG: $s1 = COPY [[V8]](s32)
; CHECK: BL &atan2f
; CHECK: ADJCALLSTACKUP
; CHECK-NEXT: [[ELT4:%[0-9]+]]:_(s32) = COPY $s0
; CHECK: %2:_(<4 x s32>) = G_BUILD_VECTOR [[ELT1]](s32), [[ELT2]](s32), [[ELT3]](s32), [[ELT4]](s32)
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
%2:_(<4 x s32>) = G_FATAN2 %0, %1
$q0 = COPY %2(<4 x s32>)
RET_ReallyLR implicit $q0
...
---
name: test_v2f64.atan2
alignment: 4
tracksRegLiveness: true
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
body: |
bb.0:
liveins: $q0, $q1
; CHECK-LABEL: name: test_v2f64.atan2
; CHECK: [[V1:%[0-9]+]]:_(s64), [[V2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %{{[0-9]+}}(<2 x s64>)
; CHECK: [[V3:%[0-9]+]]:_(s64), [[V4:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %{{[0-9]+}}(<2 x s64>)
; CHECK-NEXT: ADJCALLSTACKDOWN
; CHECK-DAG: $d0 = COPY [[V1]](s64)
; CHECK-DAG: $d1 = COPY [[V3]](s64)
; CHECK: BL &atan2
; CHECK: ADJCALLSTACKUP
; CHECK-NEXT: [[ELT1:%[0-9]+]]:_(s64) = COPY $d0
; CHECK-NEXT: ADJCALLSTACKDOWN
; CHECK-DAG: $d0 = COPY [[V2]](s64)
; CHECK-DAG: $d1 = COPY [[V4]](s64)
; CHECK: BL &atan2
; CHECK: ADJCALLSTACKUP
; CHECK-NEXT: [[ELT2:%[0-9]+]]:_(s64) = COPY $d0
; CHECK: %2:_(<2 x s64>) = G_BUILD_VECTOR [[ELT1]](s64), [[ELT2]](s64)
%0:_(<2 x s64>) = COPY $q0
%1:_(<2 x s64>) = COPY $q1
%2:_(<2 x s64>) = G_FATAN2 %0, %1
$q0 = COPY %2(<2 x s64>)
RET_ReallyLR implicit $q0
...
---
name: test_atan2_half
alignment: 4
tracksRegLiveness: true
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
body: |
bb.0:
liveins: $h0, $h1
; CHECK-LABEL: name: test_atan2_half
; CHECK: [[REG1:%[0-9]+]]:_(s32) = G_FPEXT %0(s16)
; CHECK: [[REG2:%[0-9]+]]:_(s32) = G_FPEXT %1(s16)
; CHECK-NEXT: ADJCALLSTACKDOWN
; CHECK-NEXT: $s0 = COPY [[REG1]](s32)
; CHECK-NEXT: $s1 = COPY [[REG2]](s32)
; CHECK-NEXT: BL &atan2f
; CHECK: ADJCALLSTACKUP
; CHECK-NEXT: [[REG2:%[0-9]+]]:_(s32) = COPY $s0
; CHECK-NEXT: [[RES:%[0-9]+]]:_(s16) = G_FPTRUNC [[REG2]](s32)
%0:_(s16) = COPY $h0
%1:_(s16) = COPY $h1
%2:_(s16) = G_FATAN2 %0, %1
$h0 = COPY %2(s16)
RET_ReallyLR implicit $h0
Loading

0 comments on commit 1036be3

Please sign in to comment.