diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 57d1fa33c8482c..db3b5cddd7c1c3 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1998,6 +1998,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case Intrinsic::atan: ISD = ISD::FATAN; break; + case Intrinsic::atan2: + ISD = ISD::FATAN2; + break; case Intrinsic::sinh: ISD = ISD::FSINH; break; diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index da43f5be10ff3b..0b6d155b6d161e 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -425,6 +425,7 @@ enum NodeType { STRICT_FASIN, STRICT_FACOS, STRICT_FATAN, + STRICT_FATAN2, STRICT_FSINH, STRICT_FCOSH, STRICT_FTANH, @@ -994,6 +995,8 @@ enum NodeType { FPOWI, /// FLDEXP - ldexp, inspired by libm (op0 * 2**op1). FLDEXP, + /// FATAN2 - atan2, inspired by libm. + FATAN2, /// FFREXP - frexp, extract fractional and exponent component of a /// floating-point value. Returns the two components as separate return diff --git a/llvm/include/llvm/IR/ConstrainedOps.def b/llvm/include/llvm/IR/ConstrainedOps.def index 56304c377b8393..30a82bf633d575 100644 --- a/llvm/include/llvm/IR/ConstrainedOps.def +++ b/llvm/include/llvm/IR/ConstrainedOps.def @@ -72,6 +72,7 @@ CMP_INSTRUCTION(FCmp, 2, 0, experimental_constrained_fcmps, FSETCCS DAG_FUNCTION(acos, 1, 1, experimental_constrained_acos, FACOS) DAG_FUNCTION(asin, 1, 1, experimental_constrained_asin, FASIN) DAG_FUNCTION(atan, 1, 1, experimental_constrained_atan, FATAN) +DAG_FUNCTION(atan2, 2, 1, experimental_constrained_atan2, FATAN2) DAG_FUNCTION(ceil, 1, 0, experimental_constrained_ceil, FCEIL) DAG_FUNCTION(cos, 1, 1, experimental_constrained_cos, FCOS) DAG_FUNCTION(cosh, 1, 1, experimental_constrained_cosh, FCOSH) diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 8a0721cf23f538..94e53f372127da 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1235,6 +1235,11 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn, IntrStrictFP] in [ LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; + def int_experimental_constrained_atan2 : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; def int_experimental_constrained_sin : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, llvm_metadata_ty, diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def index 69cf43140ad4bd..4aab658a86690c 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.def +++ b/llvm/include/llvm/IR/RuntimeLibcalls.def @@ -232,6 +232,11 @@ HANDLE_LIBCALL(ATAN_F64, "atan") HANDLE_LIBCALL(ATAN_F80, "atanl") HANDLE_LIBCALL(ATAN_F128,"atanl") HANDLE_LIBCALL(ATAN_PPCF128, "atanl") +HANDLE_LIBCALL(ATAN2_F32, "atan2f") +HANDLE_LIBCALL(ATAN2_F64, "atan2") +HANDLE_LIBCALL(ATAN2_F80, "atan2l") +HANDLE_LIBCALL(ATAN2_F128,"atan2l") +HANDLE_LIBCALL(ATAN2_PPCF128, "atan2l") HANDLE_LIBCALL(SINCOS_F32, nullptr) HANDLE_LIBCALL(SINCOS_F64, nullptr) HANDLE_LIBCALL(SINCOS_F80, nullptr) diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index adf8a75f620225..fa516fc9b10175 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -534,6 +534,7 @@ def ftan : SDNode<"ISD::FTAN" , SDTFPUnaryOp>; def fasin : SDNode<"ISD::FASIN" , SDTFPUnaryOp>; def facos : SDNode<"ISD::FACOS" , SDTFPUnaryOp>; def fatan : SDNode<"ISD::FATAN" , SDTFPUnaryOp>; +def fatan2 : SDNode<"ISD::FATAN2" , SDTFPBinOp>; def fsinh : SDNode<"ISD::FSINH" , SDTFPUnaryOp>; def fcosh : SDNode<"ISD::FCOSH" , SDTFPUnaryOp>; def ftanh : SDNode<"ISD::FTANH" , SDTFPUnaryOp>; @@ -602,6 +603,8 @@ def strict_facos : SDNode<"ISD::STRICT_FACOS", SDTFPUnaryOp, [SDNPHasChain]>; def strict_fatan : SDNode<"ISD::STRICT_FATAN", SDTFPUnaryOp, [SDNPHasChain]>; +def strict_fatan2 : SDNode<"ISD::STRICT_FATAN2", + SDTFPBinOp, [SDNPHasChain]>; def strict_fsinh : SDNode<"ISD::STRICT_FSINH", SDTFPUnaryOp, [SDNPHasChain]>; def strict_fcosh : SDNode<"ISD::STRICT_FCOSH", @@ -1588,6 +1591,9 @@ def any_facos : PatFrags<(ops node:$src), def any_fatan : PatFrags<(ops node:$src), [(strict_fatan node:$src), (fatan node:$src)]>; +def any_fatan2 : PatFrags<(ops node:$src1, node:$src2), + [(strict_fatan2 node:$src1, node:$src2), + (fatan2 node:$src1, node:$src2)]>; def any_fsinh : PatFrags<(ops node:$src), [(strict_fsinh node:$src), (fsinh node:$src)]>; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index ea22b4670d6f1f..e0a03383358b76 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4600,6 +4600,11 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { ExpandFPLibCall(Node, RTLIB::ATAN_F32, RTLIB::ATAN_F64, RTLIB::ATAN_F80, RTLIB::ATAN_F128, RTLIB::ATAN_PPCF128, Results); break; + case ISD::FATAN2: + case ISD::STRICT_FATAN2: + ExpandFPLibCall(Node, RTLIB::ATAN2_F32, RTLIB::ATAN2_F64, RTLIB::ATAN2_F80, + RTLIB::ATAN2_F128, RTLIB::ATAN2_PPCF128, Results); + break; case ISD::FSINH: case ISD::STRICT_FSINH: ExpandFPLibCall(Node, RTLIB::SINH_F32, RTLIB::SINH_F64, RTLIB::SINH_F80, @@ -5486,6 +5491,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::FMINIMUMNUM: case ISD::FMAXIMUMNUM: case ISD::FPOW: + case ISD::FATAN2: Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, @@ -5502,6 +5508,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::STRICT_FMAXNUM: case ISD::STRICT_FREM: case ISD::STRICT_FPOW: + case ISD::STRICT_FATAN2: Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, {Node->getOperand(0), Node->getOperand(1)}); Tmp2 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 2c81c829e75cbb..73c258f0f6f18c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -84,6 +84,8 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FASIN: R = SoftenFloatRes_FASIN(N); break; case ISD::STRICT_FATAN: case ISD::FATAN: R = SoftenFloatRes_FATAN(N); break; + case ISD::STRICT_FATAN2: + case ISD::FATAN2: R = SoftenFloatRes_FATAN2(N); break; case ISD::FCBRT: R = SoftenFloatRes_FCBRT(N); break; case ISD::STRICT_FCEIL: case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break; @@ -366,6 +368,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FATAN(SDNode *N) { RTLIB::ATAN_F80, RTLIB::ATAN_F128, RTLIB::ATAN_PPCF128)); } +SDValue DAGTypeLegalizer::SoftenFloatRes_FATAN2(SDNode *N) { + return SoftenFloatRes_Binary( + N, + GetFPLibCall(N->getValueType(0), RTLIB::ATAN2_F32, RTLIB::ATAN2_F64, + RTLIB::ATAN2_F80, RTLIB::ATAN2_F128, RTLIB::ATAN2_PPCF128)); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FCBRT(SDNode *N) { return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), RTLIB::CBRT_F32, @@ -1430,6 +1439,8 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::FASIN: ExpandFloatRes_FASIN(N, Lo, Hi); break; case ISD::STRICT_FATAN: case ISD::FATAN: ExpandFloatRes_FATAN(N, Lo, Hi); break; + case ISD::STRICT_FATAN2: + case ISD::FATAN2: ExpandFloatRes_FATAN2(N, Lo, Hi); break; case ISD::FCBRT: ExpandFloatRes_FCBRT(N, Lo, Hi); break; case ISD::STRICT_FCEIL: case ISD::FCEIL: ExpandFloatRes_FCEIL(N, Lo, Hi); break; @@ -1631,6 +1642,15 @@ void DAGTypeLegalizer::ExpandFloatRes_FATAN(SDNode *N, SDValue &Lo, Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FATAN2(SDNode *N, SDValue &Lo, + SDValue &Hi) { + ExpandFloatRes_Binary(N, + GetFPLibCall(N->getValueType(0), RTLIB::ATAN2_F32, + RTLIB::ATAN2_F64, RTLIB::ATAN2_F80, + RTLIB::ATAN2_F128, RTLIB::ATAN2_PPCF128), + Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FCBRT(SDNode *N, SDValue &Lo, SDValue &Hi) { ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), RTLIB::CBRT_F32, @@ -2673,6 +2693,7 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { case ISD::FMINNUM_IEEE: case ISD::FMUL: case ISD::FPOW: + case ISD::FATAN2: case ISD::FREM: case ISD::FSUB: R = PromoteFloatRes_BinOp(N); break; @@ -3115,6 +3136,7 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { case ISD::FMINNUM: case ISD::FMUL: case ISD::FPOW: + case ISD::FATAN2: case ISD::FREM: case ISD::FSUB: R = SoftPromoteHalfRes_BinOp(N); break; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index d14516ef3e2fbb..868da25ca8cb47 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -567,6 +567,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftenFloatRes_FACOS(SDNode *N); SDValue SoftenFloatRes_FASIN(SDNode *N); SDValue SoftenFloatRes_FATAN(SDNode *N); + SDValue SoftenFloatRes_FATAN2(SDNode *N); SDValue SoftenFloatRes_FMINNUM(SDNode *N); SDValue SoftenFloatRes_FMAXNUM(SDNode *N); SDValue SoftenFloatRes_FMINIMUMNUM(SDNode *N); @@ -661,6 +662,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void ExpandFloatRes_FACOS (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FASIN (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FATAN (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FATAN2 (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FMINNUM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FMAXNUM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FMINIMUMNUM(SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index ffecca78a2252c..a8042fc3e7a69a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -410,6 +410,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FASIN: case ISD::FACOS: case ISD::FATAN: + case ISD::FATAN2: case ISD::FSINH: case ISD::FCOSH: case ISD::FTANH: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index e0b47e1045b965..50e2a923699c8a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -164,6 +164,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::USHLSAT: case ISD::FPOW: + case ISD::FATAN2: case ISD::FREM: case ISD::FSUB: case ISD::MUL: @@ -1293,6 +1294,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::UDIV: case ISD::VP_UDIV: case ISD::FDIV: case ISD::VP_FDIV: case ISD::FPOW: + case ISD::FATAN2: case ISD::AND: case ISD::VP_AND: case ISD::OR: case ISD::VP_OR: case ISD::XOR: case ISD::VP_XOR: @@ -4581,6 +4583,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { break; case ISD::FPOW: + case ISD::FATAN2: case ISD::FREM: if (unrollExpandedOp()) break; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index ff4b2f409d7c33..d63ed7ecf0236b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5471,6 +5471,7 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const case ISD::FASIN: case ISD::FACOS: case ISD::FATAN: + case ISD::FATAN2: case ISD::FSINH: case ISD::FCOSH: case ISD::FTANH: diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 805b8ecf009598..9d822474996759 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6861,6 +6861,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, getValue(I.getArgOperand(0)), Flags)); return; } + case Intrinsic::atan2: + setValue(&I, DAG.getNode(ISD::FATAN2, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), Flags)); + return; case Intrinsic::lround: case Intrinsic::llround: case Intrinsic::lrint: @@ -9353,6 +9359,12 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { if (visitUnaryFloatCall(I, ISD::FATAN)) return; break; + case LibFunc_atan2: + case LibFunc_atan2f: + case LibFunc_atan2l: + if (visitBinaryFloatCall(I, ISD::FATAN2)) + return; + break; case LibFunc_sinh: case LibFunc_sinhf: case LibFunc_sinhl: diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 56fc538172f9fc..703efb70089742 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -227,6 +227,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::STRICT_FACOS: return "strict_facos"; case ISD::FATAN: return "fatan"; case ISD::STRICT_FATAN: return "strict_fatan"; + case ISD::FATAN2: return "fatan2"; + case ISD::STRICT_FATAN2: return "strict_fatan2"; case ISD::FSINH: return "fsinh"; case ISD::STRICT_FSINH: return "strict_fsinh"; case ISD::FCOSH: return "fcosh"; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 1f49d60c970593..7a28f7892cbf31 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -783,7 +783,7 @@ void TargetLoweringBase::initActions() { ISD::SIGN_EXTEND_VECTOR_INREG, ISD::ZERO_EXTEND_VECTOR_INREG, ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT, ISD::LROUND, ISD::LLROUND, ISD::FTAN, ISD::FACOS, ISD::FASIN, ISD::FATAN, - ISD::FCOSH, ISD::FSINH, ISD::FTANH}, + ISD::FCOSH, ISD::FSINH, ISD::FTANH, ISD::FATAN2}, VT, Expand); // Constrained floating-point operations default to expand. @@ -842,7 +842,8 @@ void TargetLoweringBase::initActions() { ISD::FEXP, ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, ISD::FROUNDEVEN, ISD::FTAN, ISD::FACOS, ISD::FASIN, - ISD::FATAN, ISD::FCOSH, ISD::FSINH, ISD::FTANH}, + ISD::FATAN, ISD::FCOSH, ISD::FSINH, ISD::FTANH, + ISD::FATAN2}, {MVT::f32, MVT::f64, MVT::f128}, Expand); // FIXME: Query RuntimeLibCalls to make the decision. @@ -850,7 +851,7 @@ void TargetLoweringBase::initActions() { {MVT::f32, MVT::f64, MVT::f128}, LibCall); setOperationAction({ISD::FTAN, ISD::FACOS, ISD::FASIN, ISD::FATAN, ISD::FCOSH, - ISD::FSINH, ISD::FTANH}, + ISD::FSINH, ISD::FTANH, ISD::FATAN2}, MVT::f16, Promote); // Default ISD::TRAP to expand (which turns it into abort). setOperationAction(ISD::TRAP, MVT::Other, Expand); diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp index d806f8093459ee..06167559a77697 100644 --- a/llvm/lib/IR/RuntimeLibcalls.cpp +++ b/llvm/lib/IR/RuntimeLibcalls.cpp @@ -49,6 +49,7 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT) { setLibcallName(RTLIB::ASIN_F128, "asinf128"); setLibcallName(RTLIB::ACOS_F128, "acosf128"); setLibcallName(RTLIB::ATAN_F128, "atanf128"); + setLibcallName(RTLIB::ATAN2_F128, "atan2f128"); setLibcallName(RTLIB::SINH_F128, "sinhf128"); setLibcallName(RTLIB::COSH_F128, "coshf128"); setLibcallName(RTLIB::TANH_F128, "tanhf128"); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 5b4b27c888952c..0155409dfda0fc 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -858,6 +858,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FASIN , MVT::f80, Expand); setOperationAction(ISD::FACOS , MVT::f80, Expand); setOperationAction(ISD::FATAN , MVT::f80, Expand); + setOperationAction(ISD::FATAN2 , MVT::f80, Expand); setOperationAction(ISD::FSINH , MVT::f80, Expand); setOperationAction(ISD::FCOSH , MVT::f80, Expand); setOperationAction(ISD::FTANH , MVT::f80, Expand); @@ -2562,6 +2563,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, {ISD::FACOS, ISD::STRICT_FACOS, ISD::FASIN, ISD::STRICT_FASIN, ISD::FATAN, ISD::STRICT_FATAN, + ISD::FATAN2, ISD::STRICT_FATAN2, ISD::FCEIL, ISD::STRICT_FCEIL, ISD::FCOS, ISD::STRICT_FCOS, ISD::FCOSH, ISD::STRICT_FCOSH, diff --git a/llvm/test/Assembler/fp-intrinsics-attr.ll b/llvm/test/Assembler/fp-intrinsics-attr.ll index da6507f051766c..5b9a44710763e4 100644 --- a/llvm/test/Assembler/fp-intrinsics-attr.ll +++ b/llvm/test/Assembler/fp-intrinsics-attr.ll @@ -105,6 +105,11 @@ define void @func(double %a, double %b, double %c, i32 %i) strictfp { metadata !"round.dynamic", metadata !"fpexcept.strict") + %atan2 = call double @llvm.experimental.constrained.atan2.f64( + double %a, double %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %cosh = call double @llvm.experimental.constrained.cosh.f64( double %a, metadata !"round.dynamic", @@ -291,6 +296,9 @@ declare double @llvm.experimental.constrained.acos.f64(double, metadata, metadat declare double @llvm.experimental.constrained.atan.f64(double, metadata, metadata) ; CHECK: @llvm.experimental.constrained.atan.f64({{.*}}) #[[ATTR1]] +declare double @llvm.experimental.constrained.atan2.f64(double, double, metadata, metadata) +; CHECK: @llvm.experimental.constrained.atan2.f64({{.*}}) #[[ATTR1]] + declare double @llvm.experimental.constrained.sinh.f64(double, metadata, metadata) ; CHECK: @llvm.experimental.constrained.sinh.f64({{.*}}) #[[ATTR1]] diff --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll index bb87252e0b9b08..3577f252f50dac 100644 --- a/llvm/test/CodeGen/X86/fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll @@ -2962,6 +2962,64 @@ entry: ret double %result } +; Verify that atan2(42.1, 3.0) isn't simplified when the rounding mode is unknown. +define double @fatan2() #0 { +; X87-LABEL: fatan2: +; X87: # %bb.0: # %entry +; X87-NEXT: subl $28, %esp +; X87-NEXT: .cfi_def_cfa_offset 32 +; X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; X87-NEXT: fstpl {{[0-9]+}}(%esp) +; X87-NEXT: fldl {{\.?LCPI[0-9]+_[0-9]+}} +; X87-NEXT: fstpl (%esp) +; X87-NEXT: wait +; X87-NEXT: calll atan2 +; X87-NEXT: addl $28, %esp +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl +; +; X86-SSE-LABEL: fatan2: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: subl $28, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 32 +; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0] +; X86-SSE-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; X86-SSE-NEXT: movsd %xmm0, (%esp) +; X86-SSE-NEXT: calll atan2 +; X86-SSE-NEXT: addl $28, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: fatan2: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 +; SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; SSE-NEXT: movsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0] +; SSE-NEXT: callq atan2@PLT +; SSE-NEXT: popq %rax +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq +; +; AVX-LABEL: fatan2: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0] +; AVX-NEXT: callq atan2@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %result = call double @llvm.experimental.constrained.atan2.f64(double 42.1, + double 3.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + ; Verify that cosh(42.0) isn't simplified when the rounding mode is unknown. define double @fcosh() #0 { ; X87-LABEL: fcosh: @@ -3132,6 +3190,7 @@ declare double @llvm.experimental.constrained.tan.f64(double, metadata, metadata declare double @llvm.experimental.constrained.asin.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.acos.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.atan.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.atan2.f64(double, double, metadata, metadata) declare double @llvm.experimental.constrained.sinh.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.cosh.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.tanh.f64(double, metadata, metadata) diff --git a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll index 84574e3691848b..ffaa9f6297ed8c 100644 --- a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll +++ b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll @@ -1247,6 +1247,50 @@ entry: ret fp128 %atan } +define fp128 @atan2(fp128 %x, fp128 %y) nounwind strictfp { +; ANDROID-LABEL: atan2: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq atan2l@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: atan2: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq atan2f128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq +; +; X86-LABEL: atan2: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll atan2l +; X86-NEXT: addl $44, %esp +; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: movaps %xmm0, (%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $24, %esp +; X86-NEXT: popl %esi +; X86-NEXT: retl $4 +entry: + %atan2 = call fp128 @llvm.experimental.constrained.atan2.f128(fp128 %x, fp128 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret fp128 %atan2 +} + define fp128 @tan(fp128 %x) nounwind strictfp { ; ANDROID-LABEL: tan: ; ANDROID: # %bb.0: # %entry @@ -1948,6 +1992,7 @@ declare fp128 @llvm.experimental.constrained.sin.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.sinh.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.sqrt.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.atan.f128(fp128, metadata, metadata) +declare fp128 @llvm.experimental.constrained.atan2.f128(fp128, fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.tan.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.tanh.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.trunc.f128(fp128, metadata) diff --git a/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll b/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll index 293133b08e761a..8bbc6247dbafd6 100644 --- a/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll +++ b/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll @@ -629,6 +629,35 @@ entry: ret x86_fp80 %atan } +define x86_fp80 @atan2(x86_fp80 %x, x86_fp80 %y) nounwind strictfp { +; X86-LABEL: atan2: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $24, %esp +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fstpt {{[0-9]+}}(%esp) +; X86-NEXT: fstpt (%esp) +; X86-NEXT: wait +; X86-NEXT: calll atan2l +; X86-NEXT: addl $24, %esp +; X86-NEXT: retl +; +; X64-LABEL: atan2: +; X64: # %bb.0: # %entry +; X64-NEXT: subq $40, %rsp +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fstpt {{[0-9]+}}(%rsp) +; X64-NEXT: fstpt (%rsp) +; X64-NEXT: wait +; X64-NEXT: callq atan2l@PLT +; X64-NEXT: addq $40, %rsp +; X64-NEXT: retq +entry: + %atan2 = call x86_fp80 @llvm.experimental.constrained.atan2.f80(x86_fp80 %x, x86_fp80 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret x86_fp80 %atan2 +} + define x86_fp80 @tan(x86_fp80 %x) nounwind strictfp { ; X86-LABEL: tan: ; X86: # %bb.0: # %entry @@ -830,6 +859,7 @@ declare x86_fp80 @llvm.experimental.constrained.asin.f80(x86_fp80, metadata, met declare x86_fp80 @llvm.experimental.constrained.sin.f80(x86_fp80, metadata, metadata) declare x86_fp80 @llvm.experimental.constrained.sinh.f80(x86_fp80, metadata, metadata) declare x86_fp80 @llvm.experimental.constrained.atan.f80(x86_fp80, metadata, metadata) +declare x86_fp80 @llvm.experimental.constrained.atan2.f80(x86_fp80, x86_fp80, metadata, metadata) declare x86_fp80 @llvm.experimental.constrained.tan.f80(x86_fp80, metadata, metadata) declare x86_fp80 @llvm.experimental.constrained.tanh.f80(x86_fp80, metadata, metadata) declare x86_fp80 @llvm.experimental.constrained.trunc.f80(x86_fp80, metadata) diff --git a/llvm/test/CodeGen/X86/llvm.atan2.ll b/llvm/test/CodeGen/X86/llvm.atan2.ll new file mode 100644 index 00000000000000..ef2e4be36203be --- /dev/null +++ b/llvm/test/CodeGen/X86/llvm.atan2.ll @@ -0,0 +1,80 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +define half @use_atan2f16(half %a, half %b) nounwind { +; CHECK-LABEL: use_atan2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: callq __extendhfsf2@PLT +; CHECK-NEXT: movss %xmm0, (%rsp) # 4-byte Spill +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: callq __extendhfsf2@PLT +; CHECK-NEXT: movss (%rsp), %xmm1 # 4-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: callq atan2f@PLT +; CHECK-NEXT: callq __truncsfhf2@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %x = call half @llvm.atan2.f16(half %a, half %b) + ret half %x +} + +define float @use_atan2f32(float %a, float %b) nounwind { +; CHECK-LABEL: use_atan2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp atan2f@PLT # TAILCALL + %x = call float @llvm.atan2.f32(float %a, float %b) + ret float %x +} + +define double @use_atan2f64(double %a, double %b) nounwind { +; CHECK-LABEL: use_atan2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp atan2@PLT # TAILCALL + %x = call double @llvm.atan2.f64(double %a, double %b) + ret double %x +} + +define x86_fp80 @use_atan2f80(x86_fp80 %a, x86_fp80 %b) nounwind { +; CHECK-LABEL: use_atan2f80: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) +; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) +; CHECK-NEXT: fstpt {{[0-9]+}}(%rsp) +; CHECK-NEXT: fstpt (%rsp) +; CHECK-NEXT: callq atan2l@PLT +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: retq + %x = call x86_fp80 @llvm.atan2.f80(x86_fp80 %a, x86_fp80 %b) + ret x86_fp80 %x +} + +define fp128 @use_atan2fp128(fp128 %a, fp128 %b) nounwind { +; CHECK-LABEL: use_atan2fp128: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp atan2f128@PLT # TAILCALL + %x = call fp128 @llvm.atan2.f128(fp128 %a, fp128 %b) + ret fp128 %x +} + +define ppc_fp128 @use_atan2ppc_fp128(ppc_fp128 %a, ppc_fp128 %b) nounwind { +; CHECK-LABEL: use_atan2ppc_fp128: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq atan2l@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %x = call ppc_fp128 @llvm.atan2.ppcf128(ppc_fp128 %a, ppc_fp128 %b) + ret ppc_fp128 %x +} + +declare half @llvm.atan2.f16(half, half) +declare float @llvm.atan2.f32(float, float) +declare double @llvm.atan2.f64(double, double) +declare x86_fp80 @llvm.atan2.f80(x86_fp80, x86_fp80) +declare fp128 @llvm.atan2.f128(fp128, fp128) +declare ppc_fp128 @llvm.atan2.ppcf128(ppc_fp128, ppc_fp128) diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll index b486014678466e..21dfdc3c2abe49 100644 --- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -8672,6 +8672,263 @@ entry: ret <4 x double> %atan } +define <1 x float> @constrained_vector_atan2_v1f32() #0 { +; CHECK-LABEL: constrained_vector_atan2_v1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: movss {{.*#+}} xmm1 = [2.3E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq atan2f@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_atan2_v1f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: vmovss {{.*#+}} xmm1 = [2.3E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq atan2f@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %atan2 = call <1 x float> @llvm.experimental.constrained.atan2.v1f32( + <1 x float> , + <1 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <1 x float> %atan2 +} + +define <2 x double> @constrained_vector_atan2_v2f64() #0 { +; CHECK-LABEL: constrained_vector_atan2_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq atan2@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0] +; CHECK-NEXT: callq atan2@PLT +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_atan2_v2f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $24, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 32 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0] +; AVX-NEXT: callq atan2@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0] +; AVX-NEXT: callq atan2@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: addq $24, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %atan2 = call <2 x double> @llvm.experimental.constrained.atan2.v2f64( + <2 x double> , + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %atan2 +} + +define <3 x float> @constrained_vector_atan2_v3f32() #0 { +; CHECK-LABEL: constrained_vector_atan2_v3f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: movss {{.*#+}} xmm1 = [2.5E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq atan2f@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: movss {{.*#+}} xmm1 = [2.3E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq atan2f@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: movss {{.*#+}} xmm1 = [2.4E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq atan2f@PLT +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_atan2_v3f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: vmovss {{.*#+}} xmm1 = [2.5E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq atan2f@PLT +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: vmovss {{.*#+}} xmm1 = [2.3E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq atan2f@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: vmovss {{.*#+}} xmm1 = [2.4E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq atan2f@PLT +; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %atan2 = call <3 x float> @llvm.experimental.constrained.atan2.v3f32( + <3 x float> , + <3 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x float> %atan2 +} + +define <3 x double> @constrained_vector_atan2_v3f64() #0 { +; CHECK-LABEL: constrained_vector_atan2_v3f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq atan2@PLT +; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0] +; CHECK-NEXT: callq atan2@PLT +; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3199999999999999E+1,0.0E+0] +; CHECK-NEXT: callq atan2@PLT +; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_atan2_v3f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0] +; AVX-NEXT: callq atan2@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0] +; AVX-NEXT: callq atan2@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3199999999999999E+1,0.0E+0] +; AVX-NEXT: vzeroupper +; AVX-NEXT: callq atan2@PLT +; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload +; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %atan2 = call <3 x double> @llvm.experimental.constrained.atan2.v3f64( + <3 x double> , + <3 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x double> %atan2 +} + +define <4 x double> @constrained_vector_atan2_v4f64() #0 { +; CHECK-LABEL: constrained_vector_atan2_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq atan2@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0] +; CHECK-NEXT: callq atan2@PLT +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] +; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3300000000000001E+1,0.0E+0] +; CHECK-NEXT: callq atan2@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3199999999999999E+1,0.0E+0] +; CHECK-NEXT: callq atan2@PLT +; CHECK-NEXT: movaps %xmm0, %xmm1 +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_atan2_v4f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3300000000000001E+1,0.0E+0] +; AVX-NEXT: callq atan2@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3199999999999999E+1,0.0E+0] +; AVX-NEXT: callq atan2@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0] +; AVX-NEXT: callq atan2@PLT +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0] +; AVX-NEXT: callq atan2@PLT +; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %atan2 = call <4 x double> @llvm.experimental.constrained.atan2.v4f64( + <4 x double> , + <4 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %atan2 +} + define <1 x float> @constrained_vector_cosh_v1f32() #0 { ; CHECK-LABEL: constrained_vector_cosh_v1f32: ; CHECK: # %bb.0: # %entry @@ -9546,6 +9803,7 @@ declare <4 x double> @llvm.experimental.constrained.tan.v4f64(<4 x double>, meta declare <4 x double> @llvm.experimental.constrained.asin.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.acos.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.atan.v4f64(<4 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.atan2.v4f64(<4 x double>, <4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.sinh.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.cosh.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.tanh.v4f64(<4 x double>, metadata, metadata) diff --git a/llvm/test/Feature/fp-intrinsics.ll b/llvm/test/Feature/fp-intrinsics.ll index 80f8b15abfaabe..ada22c39abc9e7 100644 --- a/llvm/test/Feature/fp-intrinsics.ll +++ b/llvm/test/Feature/fp-intrinsics.ll @@ -184,7 +184,7 @@ entry: ret double %result } -; Verify that atan(42.0) isn't simplified when the rounding mode is unknown. +; Verify that atan(42.0, 23.0) isn't simplified when the rounding mode is unknown. ; CHECK-LABEL: fatan ; CHECK: call double @llvm.experimental.constrained.atan define double @fatan() #0 { @@ -195,6 +195,19 @@ entry: ret double %result } +; Verify that atan2(42.0) isn't simplified when the rounding mode is unknown. +; CHECK-LABEL: fatan2 +; CHECK: call double @llvm.experimental.constrained.atan2 +define double @fatan2() #0 { +entry: + %result = call double @llvm.experimental.constrained.atan2.f64( + double 42.0, + double 23.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + ; Verify that cosh(42.0) isn't simplified when the rounding mode is unknown. ; CHECK-LABEL: fcosh ; CHECK: call double @llvm.experimental.constrained.cosh