From db0ae9c87321a98e22175d3d77d9671d1d53c31c Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Sat, 14 Sep 2024 18:14:20 -0700 Subject: [PATCH] [X86][CodeGen] Add base atan2 intrinsic lowering (p4) This change is part of this proposal: https://discourse.llvm.org/t/rfc-all-the-math-intrinsics/78294 Based on example PR #96222 and fix PR #101268, with some differences due to 2-arg intrinsic and intermediate refactor (RuntimeLibCalls.cpp). - Add llvm.experimental.constrained.atan2 - Intrinsics.td, ConstrainedOps.def, LangRef.rst - Add to ISDOpcodes.h and TargetSelectionDAG.td, connect to intrinsic in BasicTTIImpl.h, and LibFunc_ in SelectionDAGBuilder.cpp, and map generic op in SelectionDAGCompat.td - Update LegalizeDAG.cpp, LegalizeFloatTypes.cpp, LegalizeVectorOps.cpp, and LegalizeVectorTypes.cpp - Update isKnownNeverNaN in SelectionDAG.cpp - Update SelectionDAGDumper.cpp - Update libcalls - RuntimeLibcalls.def, RuntimeLibcalls.cpp, LegalizerHelper.cpp - Update isKnownNeverNaN for generic opcode in GlobalISel/Utils.cpp - TargetLoweringBase.cpp - Expand for vectors, promote f16 - X86ISelLowering.cpp - Expand f80, promote f32 to f64 for MSVC --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 3 + llvm/include/llvm/CodeGen/ISDOpcodes.h | 3 + llvm/include/llvm/IR/ConstrainedOps.def | 1 + llvm/include/llvm/IR/Intrinsics.td | 5 + llvm/include/llvm/IR/RuntimeLibcalls.def | 5 + .../Target/GlobalISel/SelectionDAGCompat.td | 1 + .../include/llvm/Target/TargetSelectionDAG.td | 6 + .../CodeGen/GlobalISel/LegalizerHelper.cpp | 4 + llvm/lib/CodeGen/GlobalISel/Utils.cpp | 2 + llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 7 + .../SelectionDAG/LegalizeFloatTypes.cpp | 22 ++ llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 2 + .../SelectionDAG/LegalizeVectorOps.cpp | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 3 + .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 1 + .../SelectionDAG/SelectionDAGBuilder.cpp | 12 + .../SelectionDAG/SelectionDAGDumper.cpp | 2 + llvm/lib/CodeGen/TargetLoweringBase.cpp | 7 +- llvm/lib/IR/RuntimeLibcalls.cpp | 1 + llvm/lib/Target/X86/X86ISelLowering.cpp | 2 + llvm/test/Assembler/fp-intrinsics-attr.ll | 8 + llvm/test/CodeGen/X86/fp-intrinsics.ll | 61 ++++- .../test/CodeGen/X86/fp128-libcalls-strict.ll | 51 ++++ llvm/test/CodeGen/X86/fp80-strict-libcalls.ll | 36 +++ llvm/test/CodeGen/X86/llvm.atan2.ll | 80 ++++++ .../X86/vector-constrained-fp-intrinsics.ll | 258 ++++++++++++++++++ llvm/test/Feature/fp-intrinsics.ll | 15 +- 27 files changed, 594 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/X86/llvm.atan2.ll diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index c36a346c1b2e05..9d875e093d6424 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1993,6 +1993,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case Intrinsic::atan: ISD = ISD::FATAN; break; + case Intrinsic::atan2: + ISD = ISD::FATAN2; + break; case Intrinsic::sinh: ISD = ISD::FSINH; break; diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index da43f5be10ff3b..0b6d155b6d161e 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -425,6 +425,7 @@ enum NodeType { STRICT_FASIN, STRICT_FACOS, STRICT_FATAN, + STRICT_FATAN2, STRICT_FSINH, STRICT_FCOSH, STRICT_FTANH, @@ -994,6 +995,8 @@ enum NodeType { FPOWI, /// FLDEXP - ldexp, inspired by libm (op0 * 2**op1). FLDEXP, + /// FATAN2 - atan2, inspired by libm. + FATAN2, /// FFREXP - frexp, extract fractional and exponent component of a /// floating-point value. Returns the two components as separate return diff --git a/llvm/include/llvm/IR/ConstrainedOps.def b/llvm/include/llvm/IR/ConstrainedOps.def index 56304c377b8393..30a82bf633d575 100644 --- a/llvm/include/llvm/IR/ConstrainedOps.def +++ b/llvm/include/llvm/IR/ConstrainedOps.def @@ -72,6 +72,7 @@ CMP_INSTRUCTION(FCmp, 2, 0, experimental_constrained_fcmps, FSETCCS DAG_FUNCTION(acos, 1, 1, experimental_constrained_acos, FACOS) DAG_FUNCTION(asin, 1, 1, experimental_constrained_asin, FASIN) DAG_FUNCTION(atan, 1, 1, experimental_constrained_atan, FATAN) +DAG_FUNCTION(atan2, 2, 1, experimental_constrained_atan2, FATAN2) DAG_FUNCTION(ceil, 1, 0, experimental_constrained_ceil, FCEIL) DAG_FUNCTION(cos, 1, 1, experimental_constrained_cos, FCOS) DAG_FUNCTION(cosh, 1, 1, experimental_constrained_cosh, FCOSH) diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 079ac61adef6e0..d946da4e4622dd 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1226,6 +1226,11 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn, IntrStrictFP] in [ LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; + def int_experimental_constrained_atan2 : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; def int_experimental_constrained_sin : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, llvm_metadata_ty, diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def index 69cf43140ad4bd..4aab658a86690c 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.def +++ b/llvm/include/llvm/IR/RuntimeLibcalls.def @@ -232,6 +232,11 @@ HANDLE_LIBCALL(ATAN_F64, "atan") HANDLE_LIBCALL(ATAN_F80, "atanl") HANDLE_LIBCALL(ATAN_F128,"atanl") HANDLE_LIBCALL(ATAN_PPCF128, "atanl") +HANDLE_LIBCALL(ATAN2_F32, "atan2f") +HANDLE_LIBCALL(ATAN2_F64, "atan2") +HANDLE_LIBCALL(ATAN2_F80, "atan2l") +HANDLE_LIBCALL(ATAN2_F128,"atan2l") +HANDLE_LIBCALL(ATAN2_PPCF128, "atan2l") HANDLE_LIBCALL(SINCOS_F32, nullptr) HANDLE_LIBCALL(SINCOS_F64, nullptr) HANDLE_LIBCALL(SINCOS_F80, nullptr) diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td index d9121cf166e5aa..83bf3c335cac89 100644 --- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -154,6 +154,7 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index adf8a75f620225..fa516fc9b10175 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -534,6 +534,7 @@ def ftan : SDNode<"ISD::FTAN" , SDTFPUnaryOp>; def fasin : SDNode<"ISD::FASIN" , SDTFPUnaryOp>; def facos : SDNode<"ISD::FACOS" , SDTFPUnaryOp>; def fatan : SDNode<"ISD::FATAN" , SDTFPUnaryOp>; +def fatan2 : SDNode<"ISD::FATAN2" , SDTFPBinOp>; def fsinh : SDNode<"ISD::FSINH" , SDTFPUnaryOp>; def fcosh : SDNode<"ISD::FCOSH" , SDTFPUnaryOp>; def ftanh : SDNode<"ISD::FTANH" , SDTFPUnaryOp>; @@ -602,6 +603,8 @@ def strict_facos : SDNode<"ISD::STRICT_FACOS", SDTFPUnaryOp, [SDNPHasChain]>; def strict_fatan : SDNode<"ISD::STRICT_FATAN", SDTFPUnaryOp, [SDNPHasChain]>; +def strict_fatan2 : SDNode<"ISD::STRICT_FATAN2", + SDTFPBinOp, [SDNPHasChain]>; def strict_fsinh : SDNode<"ISD::STRICT_FSINH", SDTFPUnaryOp, [SDNPHasChain]>; def strict_fcosh : SDNode<"ISD::STRICT_FCOSH", @@ -1588,6 +1591,9 @@ def any_facos : PatFrags<(ops node:$src), def any_fatan : PatFrags<(ops node:$src), [(strict_fatan node:$src), (fatan node:$src)]>; +def any_fatan2 : PatFrags<(ops node:$src1, node:$src2), + [(strict_fatan2 node:$src1, node:$src2), + (fatan2 node:$src1, node:$src2)]>; def any_fsinh : PatFrags<(ops node:$src), [(strict_fsinh node:$src), (fsinh node:$src)]>; diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 3b2fd95076c465..d5222641342ac1 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -457,6 +457,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { RTLIBCASE(ACOS_F); case TargetOpcode::G_FATAN: RTLIBCASE(ATAN_F); + case TargetOpcode::G_FATAN2: + RTLIBCASE(ATAN2_F); case TargetOpcode::G_FSINH: RTLIBCASE(SINH_F); case TargetOpcode::G_FCOSH: @@ -1202,6 +1204,7 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { case TargetOpcode::G_FACOS: case TargetOpcode::G_FASIN: case TargetOpcode::G_FATAN: + case TargetOpcode::G_FATAN2: case TargetOpcode::G_FCOSH: case TargetOpcode::G_FSINH: case TargetOpcode::G_FTANH: @@ -3122,6 +3125,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_FACOS: case TargetOpcode::G_FASIN: case TargetOpcode::G_FATAN: + case TargetOpcode::G_FATAN2: case TargetOpcode::G_FCOSH: case TargetOpcode::G_FSINH: case TargetOpcode::G_FTANH: diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 9574464207d99f..722ceea29c951c 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -828,6 +828,7 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, case TargetOpcode::G_FACOS: case TargetOpcode::G_FASIN: case TargetOpcode::G_FATAN: + case TargetOpcode::G_FATAN2: case TargetOpcode::G_FCOSH: case TargetOpcode::G_FSINH: case TargetOpcode::G_FTANH: @@ -1715,6 +1716,7 @@ bool llvm::isPreISelGenericFloatingPointOpcode(unsigned Opc) { case TargetOpcode::G_FACOS: case TargetOpcode::G_FASIN: case TargetOpcode::G_FATAN: + case TargetOpcode::G_FATAN2: case TargetOpcode::G_FCOSH: case TargetOpcode::G_FSINH: case TargetOpcode::G_FTANH: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 04eb891f719d28..88106112d433ca 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4599,6 +4599,11 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { ExpandFPLibCall(Node, RTLIB::ATAN_F32, RTLIB::ATAN_F64, RTLIB::ATAN_F80, RTLIB::ATAN_F128, RTLIB::ATAN_PPCF128, Results); break; + case ISD::FATAN2: + case ISD::STRICT_FATAN2: + ExpandFPLibCall(Node, RTLIB::ATAN2_F32, RTLIB::ATAN2_F64, RTLIB::ATAN2_F80, + RTLIB::ATAN2_F128, RTLIB::ATAN2_PPCF128, Results); + break; case ISD::FSINH: case ISD::STRICT_FSINH: ExpandFPLibCall(Node, RTLIB::SINH_F32, RTLIB::SINH_F64, RTLIB::SINH_F80, @@ -5485,6 +5490,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::FMINIMUMNUM: case ISD::FMAXIMUMNUM: case ISD::FPOW: + case ISD::FATAN2: Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, @@ -5501,6 +5507,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::STRICT_FMAXNUM: case ISD::STRICT_FREM: case ISD::STRICT_FPOW: + case ISD::STRICT_FATAN2: Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, {Node->getOperand(0), Node->getOperand(1)}); Tmp2 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 2c81c829e75cbb..73c258f0f6f18c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -84,6 +84,8 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FASIN: R = SoftenFloatRes_FASIN(N); break; case ISD::STRICT_FATAN: case ISD::FATAN: R = SoftenFloatRes_FATAN(N); break; + case ISD::STRICT_FATAN2: + case ISD::FATAN2: R = SoftenFloatRes_FATAN2(N); break; case ISD::FCBRT: R = SoftenFloatRes_FCBRT(N); break; case ISD::STRICT_FCEIL: case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break; @@ -366,6 +368,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FATAN(SDNode *N) { RTLIB::ATAN_F80, RTLIB::ATAN_F128, RTLIB::ATAN_PPCF128)); } +SDValue DAGTypeLegalizer::SoftenFloatRes_FATAN2(SDNode *N) { + return SoftenFloatRes_Binary( + N, + GetFPLibCall(N->getValueType(0), RTLIB::ATAN2_F32, RTLIB::ATAN2_F64, + RTLIB::ATAN2_F80, RTLIB::ATAN2_F128, RTLIB::ATAN2_PPCF128)); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FCBRT(SDNode *N) { return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), RTLIB::CBRT_F32, @@ -1430,6 +1439,8 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::FASIN: ExpandFloatRes_FASIN(N, Lo, Hi); break; case ISD::STRICT_FATAN: case ISD::FATAN: ExpandFloatRes_FATAN(N, Lo, Hi); break; + case ISD::STRICT_FATAN2: + case ISD::FATAN2: ExpandFloatRes_FATAN2(N, Lo, Hi); break; case ISD::FCBRT: ExpandFloatRes_FCBRT(N, Lo, Hi); break; case ISD::STRICT_FCEIL: case ISD::FCEIL: ExpandFloatRes_FCEIL(N, Lo, Hi); break; @@ -1631,6 +1642,15 @@ void DAGTypeLegalizer::ExpandFloatRes_FATAN(SDNode *N, SDValue &Lo, Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FATAN2(SDNode *N, SDValue &Lo, + SDValue &Hi) { + ExpandFloatRes_Binary(N, + GetFPLibCall(N->getValueType(0), RTLIB::ATAN2_F32, + RTLIB::ATAN2_F64, RTLIB::ATAN2_F80, + RTLIB::ATAN2_F128, RTLIB::ATAN2_PPCF128), + Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FCBRT(SDNode *N, SDValue &Lo, SDValue &Hi) { ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), RTLIB::CBRT_F32, @@ -2673,6 +2693,7 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { case ISD::FMINNUM_IEEE: case ISD::FMUL: case ISD::FPOW: + case ISD::FATAN2: case ISD::FREM: case ISD::FSUB: R = PromoteFloatRes_BinOp(N); break; @@ -3115,6 +3136,7 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { case ISD::FMINNUM: case ISD::FMUL: case ISD::FPOW: + case ISD::FATAN2: case ISD::FREM: case ISD::FSUB: R = SoftPromoteHalfRes_BinOp(N); break; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index d14516ef3e2fbb..868da25ca8cb47 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -567,6 +567,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftenFloatRes_FACOS(SDNode *N); SDValue SoftenFloatRes_FASIN(SDNode *N); SDValue SoftenFloatRes_FATAN(SDNode *N); + SDValue SoftenFloatRes_FATAN2(SDNode *N); SDValue SoftenFloatRes_FMINNUM(SDNode *N); SDValue SoftenFloatRes_FMAXNUM(SDNode *N); SDValue SoftenFloatRes_FMINIMUMNUM(SDNode *N); @@ -661,6 +662,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void ExpandFloatRes_FACOS (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FASIN (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FATAN (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FATAN2 (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FMINNUM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FMAXNUM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FMINIMUMNUM(SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 0adf3cfb34c949..00b0c63378241f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -410,6 +410,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FASIN: case ISD::FACOS: case ISD::FATAN: + case ISD::FATAN2: case ISD::FSINH: case ISD::FCOSH: case ISD::FTANH: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 0a22f06271984e..2e70e22af3a62e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -164,6 +164,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::USHLSAT: case ISD::FPOW: + case ISD::FATAN2: case ISD::FREM: case ISD::FSUB: case ISD::MUL: @@ -1291,6 +1292,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::UDIV: case ISD::VP_UDIV: case ISD::FDIV: case ISD::VP_FDIV: case ISD::FPOW: + case ISD::FATAN2: case ISD::AND: case ISD::VP_AND: case ISD::OR: case ISD::VP_OR: case ISD::XOR: case ISD::VP_XOR: @@ -4579,6 +4581,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { break; case ISD::FPOW: + case ISD::FATAN2: case ISD::FREM: if (unrollExpandedOp()) break; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 64414e2d44e65a..ddce4d0caa0297 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5460,6 +5460,7 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const case ISD::FASIN: case ISD::FACOS: case ISD::FATAN: + case ISD::FATAN2: case ISD::FSINH: case ISD::FCOSH: case ISD::FTANH: diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 25213f587116d5..91ce9ced5e1a18 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6850,6 +6850,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, getValue(I.getArgOperand(0)), Flags)); return; } + case Intrinsic::atan2: + setValue(&I, DAG.getNode(ISD::FATAN2, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), Flags)); + return; case Intrinsic::lround: case Intrinsic::llround: case Intrinsic::lrint: @@ -9342,6 +9348,12 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { if (visitUnaryFloatCall(I, ISD::FATAN)) return; break; + case LibFunc_atan2: + case LibFunc_atan2f: + case LibFunc_atan2l: + if (visitBinaryFloatCall(I, ISD::FATAN2)) + return; + break; case LibFunc_sinh: case LibFunc_sinhf: case LibFunc_sinhl: diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 56fc538172f9fc..703efb70089742 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -227,6 +227,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::STRICT_FACOS: return "strict_facos"; case ISD::FATAN: return "fatan"; case ISD::STRICT_FATAN: return "strict_fatan"; + case ISD::FATAN2: return "fatan2"; + case ISD::STRICT_FATAN2: return "strict_fatan2"; case ISD::FSINH: return "fsinh"; case ISD::STRICT_FSINH: return "strict_fsinh"; case ISD::FCOSH: return "fcosh"; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 1f49d60c970593..7a28f7892cbf31 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -783,7 +783,7 @@ void TargetLoweringBase::initActions() { ISD::SIGN_EXTEND_VECTOR_INREG, ISD::ZERO_EXTEND_VECTOR_INREG, ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT, ISD::LROUND, ISD::LLROUND, ISD::FTAN, ISD::FACOS, ISD::FASIN, ISD::FATAN, - ISD::FCOSH, ISD::FSINH, ISD::FTANH}, + ISD::FCOSH, ISD::FSINH, ISD::FTANH, ISD::FATAN2}, VT, Expand); // Constrained floating-point operations default to expand. @@ -842,7 +842,8 @@ void TargetLoweringBase::initActions() { ISD::FEXP, ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, ISD::FROUNDEVEN, ISD::FTAN, ISD::FACOS, ISD::FASIN, - ISD::FATAN, ISD::FCOSH, ISD::FSINH, ISD::FTANH}, + ISD::FATAN, ISD::FCOSH, ISD::FSINH, ISD::FTANH, + ISD::FATAN2}, {MVT::f32, MVT::f64, MVT::f128}, Expand); // FIXME: Query RuntimeLibCalls to make the decision. @@ -850,7 +851,7 @@ void TargetLoweringBase::initActions() { {MVT::f32, MVT::f64, MVT::f128}, LibCall); setOperationAction({ISD::FTAN, ISD::FACOS, ISD::FASIN, ISD::FATAN, ISD::FCOSH, - ISD::FSINH, ISD::FTANH}, + ISD::FSINH, ISD::FTANH, ISD::FATAN2}, MVT::f16, Promote); // Default ISD::TRAP to expand (which turns it into abort). setOperationAction(ISD::TRAP, MVT::Other, Expand); diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp index d806f8093459ee..06167559a77697 100644 --- a/llvm/lib/IR/RuntimeLibcalls.cpp +++ b/llvm/lib/IR/RuntimeLibcalls.cpp @@ -49,6 +49,7 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT) { setLibcallName(RTLIB::ASIN_F128, "asinf128"); setLibcallName(RTLIB::ACOS_F128, "acosf128"); setLibcallName(RTLIB::ATAN_F128, "atanf128"); + setLibcallName(RTLIB::ATAN2_F128, "atan2f128"); setLibcallName(RTLIB::SINH_F128, "sinhf128"); setLibcallName(RTLIB::COSH_F128, "coshf128"); setLibcallName(RTLIB::TANH_F128, "tanhf128"); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 73f7f52846f625..1b453d49105519 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -858,6 +858,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FASIN , MVT::f80, Expand); setOperationAction(ISD::FACOS , MVT::f80, Expand); setOperationAction(ISD::FATAN , MVT::f80, Expand); + setOperationAction(ISD::FATAN2 , MVT::f80, Expand); setOperationAction(ISD::FSINH , MVT::f80, Expand); setOperationAction(ISD::FCOSH , MVT::f80, Expand); setOperationAction(ISD::FTANH , MVT::f80, Expand); @@ -2562,6 +2563,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, {ISD::FACOS, ISD::STRICT_FACOS, ISD::FASIN, ISD::STRICT_FASIN, ISD::FATAN, ISD::STRICT_FATAN, + ISD::FATAN2, ISD::STRICT_FATAN2, ISD::FCEIL, ISD::STRICT_FCEIL, ISD::FCOS, ISD::STRICT_FCOS, ISD::FCOSH, ISD::STRICT_FCOSH, diff --git a/llvm/test/Assembler/fp-intrinsics-attr.ll b/llvm/test/Assembler/fp-intrinsics-attr.ll index da6507f051766c..5b9a44710763e4 100644 --- a/llvm/test/Assembler/fp-intrinsics-attr.ll +++ b/llvm/test/Assembler/fp-intrinsics-attr.ll @@ -105,6 +105,11 @@ define void @func(double %a, double %b, double %c, i32 %i) strictfp { metadata !"round.dynamic", metadata !"fpexcept.strict") + %atan2 = call double @llvm.experimental.constrained.atan2.f64( + double %a, double %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %cosh = call double @llvm.experimental.constrained.cosh.f64( double %a, metadata !"round.dynamic", @@ -291,6 +296,9 @@ declare double @llvm.experimental.constrained.acos.f64(double, metadata, metadat declare double @llvm.experimental.constrained.atan.f64(double, metadata, metadata) ; CHECK: @llvm.experimental.constrained.atan.f64({{.*}}) #[[ATTR1]] +declare double @llvm.experimental.constrained.atan2.f64(double, double, metadata, metadata) +; CHECK: @llvm.experimental.constrained.atan2.f64({{.*}}) #[[ATTR1]] + declare double @llvm.experimental.constrained.sinh.f64(double, metadata, metadata) ; CHECK: @llvm.experimental.constrained.sinh.f64({{.*}}) #[[ATTR1]] diff --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll index bb87252e0b9b08..3a8facff83fd55 100644 --- a/llvm/test/CodeGen/X86/fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --function fatan2 --version 2 ; RUN: llc -O3 -mtriple=i686-pc-linux -mattr=+cmov < %s | FileCheck %s --check-prefix=X87 ; RUN: llc -O3 -mtriple=i686-pc-linux -mattr=sse2 < %s | FileCheck %s --check-prefix=X86-SSE ; RUN: llc -O3 -mtriple=x86_64-pc-linux < %s | FileCheck %s --check-prefix=SSE @@ -2962,6 +2962,64 @@ entry: ret double %result } +; Verify that atan2(42.1, 3.0) isn't simplified when the rounding mode is unknown. +define double @fatan2() #0 { +; X87-LABEL: fatan2: +; X87: # %bb.0: # %entry +; X87-NEXT: subl $28, %esp +; X87-NEXT: .cfi_def_cfa_offset 32 +; X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; X87-NEXT: fstpl {{[0-9]+}}(%esp) +; X87-NEXT: fldl {{\.?LCPI[0-9]+_[0-9]+}} +; X87-NEXT: fstpl (%esp) +; X87-NEXT: wait +; X87-NEXT: calll atan2 +; X87-NEXT: addl $28, %esp +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl +; +; X86-SSE-LABEL: fatan2: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: subl $28, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 32 +; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0] +; X86-SSE-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; X86-SSE-NEXT: movsd %xmm0, (%esp) +; X86-SSE-NEXT: calll atan2 +; X86-SSE-NEXT: addl $28, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: fatan2: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 +; SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; SSE-NEXT: movsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0] +; SSE-NEXT: callq atan2@PLT +; SSE-NEXT: popq %rax +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq +; +; AVX-LABEL: fatan2: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0] +; AVX-NEXT: callq atan2@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %result = call double @llvm.experimental.constrained.atan2.f64(double 42.1, + double 3.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + ; Verify that cosh(42.0) isn't simplified when the rounding mode is unknown. define double @fcosh() #0 { ; X87-LABEL: fcosh: @@ -3132,6 +3190,7 @@ declare double @llvm.experimental.constrained.tan.f64(double, metadata, metadata declare double @llvm.experimental.constrained.asin.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.acos.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.atan.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.atan2.f64(double, double, metadata, metadata) declare double @llvm.experimental.constrained.sinh.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.cosh.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.tanh.f64(double, metadata, metadata) diff --git a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll index 9e84dfa5c41ae6..ffaa9f6297ed8c 100644 --- a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll +++ b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll @@ -1247,6 +1247,50 @@ entry: ret fp128 %atan } +define fp128 @atan2(fp128 %x, fp128 %y) nounwind strictfp { +; ANDROID-LABEL: atan2: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq atan2l@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: atan2: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq atan2f128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq +; +; X86-LABEL: atan2: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll atan2l +; X86-NEXT: addl $44, %esp +; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: movaps %xmm0, (%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $24, %esp +; X86-NEXT: popl %esi +; X86-NEXT: retl $4 +entry: + %atan2 = call fp128 @llvm.experimental.constrained.atan2.f128(fp128 %x, fp128 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret fp128 %atan2 +} + define fp128 @tan(fp128 %x) nounwind strictfp { ; ANDROID-LABEL: tan: ; ANDROID: # %bb.0: # %entry @@ -1926,7 +1970,9 @@ declare fp128 @llvm.experimental.constrained.fdiv.f128(fp128, fp128, metadata, m declare fp128 @llvm.experimental.constrained.fma.f128(fp128, fp128, fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.frem.f128(fp128, fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.ceil.f128(fp128, metadata) +declare fp128 @llvm.experimental.constrained.acos.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.cos.f128(fp128, metadata, metadata) +declare fp128 @llvm.experimental.constrained.cosh.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.exp.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.exp2.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.floor.f128(fp128, metadata) @@ -1941,9 +1987,14 @@ declare fp128 @llvm.experimental.constrained.powi.f128(fp128, i32, metadata, met declare fp128 @llvm.experimental.constrained.rint.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.round.f128(fp128, metadata) declare fp128 @llvm.experimental.constrained.roundeven.f128(fp128, metadata) +declare fp128 @llvm.experimental.constrained.asin.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.sin.f128(fp128, metadata, metadata) +declare fp128 @llvm.experimental.constrained.sinh.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.sqrt.f128(fp128, metadata, metadata) +declare fp128 @llvm.experimental.constrained.atan.f128(fp128, metadata, metadata) +declare fp128 @llvm.experimental.constrained.atan2.f128(fp128, fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.tan.f128(fp128, metadata, metadata) +declare fp128 @llvm.experimental.constrained.tanh.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.trunc.f128(fp128, metadata) declare i32 @llvm.experimental.constrained.lrint.i32.f128(fp128, metadata, metadata) declare i64 @llvm.experimental.constrained.llrint.i64.f128(fp128, metadata, metadata) diff --git a/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll b/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll index c14e99f3acb34e..8bbc6247dbafd6 100644 --- a/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll +++ b/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll @@ -629,6 +629,35 @@ entry: ret x86_fp80 %atan } +define x86_fp80 @atan2(x86_fp80 %x, x86_fp80 %y) nounwind strictfp { +; X86-LABEL: atan2: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $24, %esp +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fstpt {{[0-9]+}}(%esp) +; X86-NEXT: fstpt (%esp) +; X86-NEXT: wait +; X86-NEXT: calll atan2l +; X86-NEXT: addl $24, %esp +; X86-NEXT: retl +; +; X64-LABEL: atan2: +; X64: # %bb.0: # %entry +; X64-NEXT: subq $40, %rsp +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fstpt {{[0-9]+}}(%rsp) +; X64-NEXT: fstpt (%rsp) +; X64-NEXT: wait +; X64-NEXT: callq atan2l@PLT +; X64-NEXT: addq $40, %rsp +; X64-NEXT: retq +entry: + %atan2 = call x86_fp80 @llvm.experimental.constrained.atan2.f80(x86_fp80 %x, x86_fp80 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret x86_fp80 %atan2 +} + define x86_fp80 @tan(x86_fp80 %x) nounwind strictfp { ; X86-LABEL: tan: ; X86: # %bb.0: # %entry @@ -809,7 +838,9 @@ attributes #0 = { strictfp } declare x86_fp80 @llvm.experimental.constrained.fma.f80(x86_fp80, x86_fp80, x86_fp80, metadata, metadata) declare x86_fp80 @llvm.experimental.constrained.frem.f80(x86_fp80, x86_fp80, metadata, metadata) declare x86_fp80 @llvm.experimental.constrained.ceil.f80(x86_fp80, metadata) +declare x86_fp80 @llvm.experimental.constrained.acos.f80(x86_fp80, metadata, metadata) declare x86_fp80 @llvm.experimental.constrained.cos.f80(x86_fp80, metadata, metadata) +declare x86_fp80 @llvm.experimental.constrained.cosh.f80(x86_fp80, metadata, metadata) declare x86_fp80 @llvm.experimental.constrained.exp.f80(x86_fp80, metadata, metadata) declare x86_fp80 @llvm.experimental.constrained.exp2.f80(x86_fp80, metadata, metadata) declare x86_fp80 @llvm.experimental.constrained.floor.f80(x86_fp80, metadata) @@ -824,8 +855,13 @@ declare x86_fp80 @llvm.experimental.constrained.powi.f80(x86_fp80, i32, metadata declare x86_fp80 @llvm.experimental.constrained.rint.f80(x86_fp80, metadata, metadata) declare x86_fp80 @llvm.experimental.constrained.round.f80(x86_fp80, metadata) declare x86_fp80 @llvm.experimental.constrained.roundeven.f80(x86_fp80, metadata) +declare x86_fp80 @llvm.experimental.constrained.asin.f80(x86_fp80, metadata, metadata) declare x86_fp80 @llvm.experimental.constrained.sin.f80(x86_fp80, metadata, metadata) +declare x86_fp80 @llvm.experimental.constrained.sinh.f80(x86_fp80, metadata, metadata) +declare x86_fp80 @llvm.experimental.constrained.atan.f80(x86_fp80, metadata, metadata) +declare x86_fp80 @llvm.experimental.constrained.atan2.f80(x86_fp80, x86_fp80, metadata, metadata) declare x86_fp80 @llvm.experimental.constrained.tan.f80(x86_fp80, metadata, metadata) +declare x86_fp80 @llvm.experimental.constrained.tanh.f80(x86_fp80, metadata, metadata) declare x86_fp80 @llvm.experimental.constrained.trunc.f80(x86_fp80, metadata) declare i32 @llvm.experimental.constrained.lrint.i32.f80(x86_fp80, metadata, metadata) declare i64 @llvm.experimental.constrained.llrint.i64.f80(x86_fp80, metadata, metadata) diff --git a/llvm/test/CodeGen/X86/llvm.atan2.ll b/llvm/test/CodeGen/X86/llvm.atan2.ll new file mode 100644 index 00000000000000..ef2e4be36203be --- /dev/null +++ b/llvm/test/CodeGen/X86/llvm.atan2.ll @@ -0,0 +1,80 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +define half @use_atan2f16(half %a, half %b) nounwind { +; CHECK-LABEL: use_atan2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: callq __extendhfsf2@PLT +; CHECK-NEXT: movss %xmm0, (%rsp) # 4-byte Spill +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: callq __extendhfsf2@PLT +; CHECK-NEXT: movss (%rsp), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq atan2f@PLT +; CHECK-NEXT: callq __truncsfhf2@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %x = call half @llvm.atan2.f16(half %a, half %b) + ret half %x +} + +define float @use_atan2f32(float %a, float %b) nounwind { +; CHECK-LABEL: use_atan2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp atan2f@PLT # TAILCALL + %x = call float @llvm.atan2.f32(float %a, float %b) + ret float %x +} + +define double @use_atan2f64(double %a, double %b) nounwind { +; CHECK-LABEL: use_atan2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp atan2@PLT # TAILCALL + %x = call double @llvm.atan2.f64(double %a, double %b) + ret double %x +} + +define x86_fp80 @use_atan2f80(x86_fp80 %a, x86_fp80 %b) nounwind { +; CHECK-LABEL: use_atan2f80: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) +; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) +; CHECK-NEXT: fstpt {{[0-9]+}}(%rsp) +; CHECK-NEXT: fstpt (%rsp) +; CHECK-NEXT: callq atan2l@PLT +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: retq + %x = call x86_fp80 @llvm.atan2.f80(x86_fp80 %a, x86_fp80 %b) + ret x86_fp80 %x +} + +define fp128 @use_atan2fp128(fp128 %a, fp128 %b) nounwind { +; CHECK-LABEL: use_atan2fp128: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp atan2f128@PLT # TAILCALL + %x = call fp128 @llvm.atan2.f128(fp128 %a, fp128 %b) + ret fp128 %x +} + +define ppc_fp128 @use_atan2ppc_fp128(ppc_fp128 %a, ppc_fp128 %b) nounwind { +; CHECK-LABEL: use_atan2ppc_fp128: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq atan2l@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %x = call ppc_fp128 @llvm.atan2.ppcf128(ppc_fp128 %a, ppc_fp128 %b) + ret ppc_fp128 %x +} + +declare half @llvm.atan2.f16(half, half) +declare float @llvm.atan2.f32(float, float) +declare double @llvm.atan2.f64(double, double) +declare x86_fp80 @llvm.atan2.f80(x86_fp80, x86_fp80) +declare fp128 @llvm.atan2.f128(fp128, fp128) +declare ppc_fp128 @llvm.atan2.ppcf128(ppc_fp128, ppc_fp128) diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll index b486014678466e..21dfdc3c2abe49 100644 --- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -8672,6 +8672,263 @@ entry: ret <4 x double> %atan } +define <1 x float> @constrained_vector_atan2_v1f32() #0 { +; CHECK-LABEL: constrained_vector_atan2_v1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: movss {{.*#+}} xmm1 = [2.3E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq atan2f@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_atan2_v1f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: vmovss {{.*#+}} xmm1 = [2.3E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq atan2f@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %atan2 = call <1 x float> @llvm.experimental.constrained.atan2.v1f32( + <1 x float> , + <1 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <1 x float> %atan2 +} + +define <2 x double> @constrained_vector_atan2_v2f64() #0 { +; CHECK-LABEL: constrained_vector_atan2_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq atan2@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0] +; CHECK-NEXT: callq atan2@PLT +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_atan2_v2f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $24, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 32 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0] +; AVX-NEXT: callq atan2@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0] +; AVX-NEXT: callq atan2@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: addq $24, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %atan2 = call <2 x double> @llvm.experimental.constrained.atan2.v2f64( + <2 x double> , + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %atan2 +} + +define <3 x float> @constrained_vector_atan2_v3f32() #0 { +; CHECK-LABEL: constrained_vector_atan2_v3f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: movss {{.*#+}} xmm1 = [2.5E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq atan2f@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: movss {{.*#+}} xmm1 = [2.3E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq atan2f@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: movss {{.*#+}} xmm1 = [2.4E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq atan2f@PLT +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_atan2_v3f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: vmovss {{.*#+}} xmm1 = [2.5E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq atan2f@PLT +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: vmovss {{.*#+}} xmm1 = [2.3E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq atan2f@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: vmovss {{.*#+}} xmm1 = [2.4E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq atan2f@PLT +; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %atan2 = call <3 x float> @llvm.experimental.constrained.atan2.v3f32( + <3 x float> , + <3 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x float> %atan2 +} + +define <3 x double> @constrained_vector_atan2_v3f64() #0 { +; CHECK-LABEL: constrained_vector_atan2_v3f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq atan2@PLT +; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0] +; CHECK-NEXT: callq atan2@PLT +; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3199999999999999E+1,0.0E+0] +; CHECK-NEXT: callq atan2@PLT +; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_atan2_v3f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0] +; AVX-NEXT: callq atan2@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0] +; AVX-NEXT: callq atan2@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3199999999999999E+1,0.0E+0] +; AVX-NEXT: vzeroupper +; AVX-NEXT: callq atan2@PLT +; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload +; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %atan2 = call <3 x double> @llvm.experimental.constrained.atan2.v3f64( + <3 x double> , + <3 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x double> %atan2 +} + +define <4 x double> @constrained_vector_atan2_v4f64() #0 { +; CHECK-LABEL: constrained_vector_atan2_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq atan2@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0] +; CHECK-NEXT: callq atan2@PLT +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] +; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3300000000000001E+1,0.0E+0] +; CHECK-NEXT: callq atan2@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3199999999999999E+1,0.0E+0] +; CHECK-NEXT: callq atan2@PLT +; CHECK-NEXT: movaps %xmm0, %xmm1 +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_atan2_v4f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3300000000000001E+1,0.0E+0] +; AVX-NEXT: callq atan2@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3199999999999999E+1,0.0E+0] +; AVX-NEXT: callq atan2@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0] +; AVX-NEXT: callq atan2@PLT +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0] +; AVX-NEXT: callq atan2@PLT +; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %atan2 = call <4 x double> @llvm.experimental.constrained.atan2.v4f64( + <4 x double> , + <4 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %atan2 +} + define <1 x float> @constrained_vector_cosh_v1f32() #0 { ; CHECK-LABEL: constrained_vector_cosh_v1f32: ; CHECK: # %bb.0: # %entry @@ -9546,6 +9803,7 @@ declare <4 x double> @llvm.experimental.constrained.tan.v4f64(<4 x double>, meta declare <4 x double> @llvm.experimental.constrained.asin.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.acos.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.atan.v4f64(<4 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.atan2.v4f64(<4 x double>, <4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.sinh.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.cosh.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.tanh.v4f64(<4 x double>, metadata, metadata) diff --git a/llvm/test/Feature/fp-intrinsics.ll b/llvm/test/Feature/fp-intrinsics.ll index 80f8b15abfaabe..ada22c39abc9e7 100644 --- a/llvm/test/Feature/fp-intrinsics.ll +++ b/llvm/test/Feature/fp-intrinsics.ll @@ -184,7 +184,7 @@ entry: ret double %result } -; Verify that atan(42.0) isn't simplified when the rounding mode is unknown. +; Verify that atan(42.0, 23.0) isn't simplified when the rounding mode is unknown. ; CHECK-LABEL: fatan ; CHECK: call double @llvm.experimental.constrained.atan define double @fatan() #0 { @@ -195,6 +195,19 @@ entry: ret double %result } +; Verify that atan2(42.0) isn't simplified when the rounding mode is unknown. +; CHECK-LABEL: fatan2 +; CHECK: call double @llvm.experimental.constrained.atan2 +define double @fatan2() #0 { +entry: + %result = call double @llvm.experimental.constrained.atan2.f64( + double 42.0, + double 23.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + ; Verify that cosh(42.0) isn't simplified when the rounding mode is unknown. ; CHECK-LABEL: fcosh ; CHECK: call double @llvm.experimental.constrained.cosh