From 189dd9d66e3a814524de443b5f140fd4299555db Mon Sep 17 00:00:00 2001 From: varkor Date: Thu, 30 May 2019 23:10:54 +0100 Subject: [PATCH] [X86] Combine fminnum/fmaxnum with non-nan operand to fmin/fmax If we have a known non-nan operand, place it in the second operand of fmin/fmax that is returned if either operand is nan. Differential Revision: reviews.llvm.org/D62448 llvm-svn: 361704 Co-Authored-By: nikic --- llvm/lib/Target/X86/X86ISelLowering.cpp | 12 ++++++--- llvm/test/CodeGen/X86/extract-fp.ll | 36 +++++++++++++++++++++++++ llvm/test/CodeGen/X86/fmaxnum.ll | 28 +++++++++++++++++++ llvm/test/CodeGen/X86/fminnum.ll | 28 +++++++++++++++++++ 4 files changed, 100 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2dfee3a4701e63..8e0f065458d911 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -38936,9 +38936,6 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG, if (Subtarget.useSoftFloat()) return SDValue(); - // TODO: If an operand is already known to be a NaN or not a NaN, this - // should be an optional swap and FMAX/FMIN. - EVT VT = N->getValueType(0); if (!((Subtarget.hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) || (Subtarget.hasSSE2() && (VT == MVT::f64 || VT == MVT::v2f64)) || @@ -38955,6 +38952,13 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG, if (DAG.getTarget().Options.NoNaNsFPMath || N->getFlags().hasNoNaNs()) return DAG.getNode(MinMaxOp, DL, VT, Op0, Op1, N->getFlags()); + // If one of the operands is known non-NaN use the native min/max instructions + // with the non-NaN input as second operand. + if (DAG.isKnownNeverNaN(Op1)) + return DAG.getNode(MinMaxOp, DL, VT, Op0, Op1, N->getFlags()); + if (DAG.isKnownNeverNaN(Op0)) + return DAG.getNode(MinMaxOp, DL, VT, Op1, Op0, N->getFlags()); + // If we have to respect NaN inputs, this takes at least 3 instructions. // Favor a library call when operating on a scalar and minimizing code size. if (!VT.isVector() && DAG.getMachineFunction().getFunction().optForMinSize()) @@ -41549,7 +41553,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::FMAX: return combineFMinFMax(N, DAG); case ISD::FMINNUM: case ISD::FMAXNUM: return combineFMinNumFMaxNum(N, DAG, Subtarget); - case X86ISD::CVTSI2P: + case X86ISD::CVTSI2P: case X86ISD::CVTUI2P: return combineX86INT_TO_FP(N, DAG, DCI); case X86ISD::BT: return combineBT(N, DAG, DCI); case ISD::ANY_EXTEND: diff --git a/llvm/test/CodeGen/X86/extract-fp.ll b/llvm/test/CodeGen/X86/extract-fp.ll index 06ba30b603e50b..50762fa01b3046 100644 --- a/llvm/test/CodeGen/X86/extract-fp.ll +++ b/llvm/test/CodeGen/X86/extract-fp.ll @@ -84,3 +84,39 @@ define float @ext_frem_v4f32_constant_op0(<4 x float> %x) { ret float %ext } +define float @ext_maxnum_v4f32(<4 x float> %x) nounwind { +; CHECK-LABEL: ext_maxnum_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: maxss {{.*}}(%rip), %xmm0 +; CHECK-NEXT: retq + %v = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> ) + %r = extractelement <4 x float> %v, i32 2 + ret float %r +} + +define double @ext_minnum_v2f64(<2 x double> %x) nounwind { +; CHECK-LABEL: ext_minnum_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: minsd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: retq + %v = call <2 x double> @llvm.minnum.v2f64(<2 x double> , <2 x double> %x) + %r = extractelement <2 x double> %v, i32 1 + ret double %r +} + +;define double @ext_maximum_v4f64(<2 x double> %x) nounwind { +; %v = call <2 x double> @llvm.maximum.v2f64(<2 x double> %x, <2 x double> ) +; %r = extractelement <2 x double> %v, i32 1 +; ret double %r +;} + +;define float @ext_minimum_v4f32(<4 x float> %x) nounwind { +; %v = call <4 x float> @llvm.minimum.v4f32(<4 x float> %x, <4 x float> ) +; %r = extractelement <4 x float> %v, i32 1 +; ret float %r +;} + +declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) +declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>) diff --git a/llvm/test/CodeGen/X86/fmaxnum.ll b/llvm/test/CodeGen/X86/fmaxnum.ll index 013d12c665700d..123e993a72af95 100644 --- a/llvm/test/CodeGen/X86/fmaxnum.ll +++ b/llvm/test/CodeGen/X86/fmaxnum.ll @@ -349,5 +349,33 @@ define <2 x double> @maxnum_intrinsic_nnan_attr_f64(<2 x double> %a, <2 x double ret <2 x double> %r } +define float @test_maxnum_const_op1(float %x) { +; SSE-LABEL: test_maxnum_const_op1: +; SSE: # %bb.0: +; SSE-NEXT: maxss {{.*}}(%rip), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_maxnum_const_op1: +; AVX: # %bb.0: +; AVX-NEXT: vmaxss {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: retq + %r = call float @llvm.maxnum.f32(float 1.0, float %x) + ret float %r +} + +define float @test_maxnum_const_op2(float %x) { +; SSE-LABEL: test_maxnum_const_op2: +; SSE: # %bb.0: +; SSE-NEXT: maxss {{.*}}(%rip), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_maxnum_const_op2: +; AVX: # %bb.0: +; AVX-NEXT: vmaxss {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: retq + %r = call float @llvm.maxnum.f32(float %x, float 1.0) + ret float %r +} + attributes #0 = { "no-nans-fp-math"="true" } diff --git a/llvm/test/CodeGen/X86/fminnum.ll b/llvm/test/CodeGen/X86/fminnum.ll index 052423e5709af6..59767442e303de 100644 --- a/llvm/test/CodeGen/X86/fminnum.ll +++ b/llvm/test/CodeGen/X86/fminnum.ll @@ -341,5 +341,33 @@ define <4 x float> @minnum_intrinsic_nnan_attr_v4f32(<4 x float> %a, <4 x float> ret <4 x float> %r } +define float @test_minnum_const_op1(float %x) { +; SSE-LABEL: test_minnum_const_op1: +; SSE: # %bb.0: +; SSE-NEXT: minss {{.*}}(%rip), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_minnum_const_op1: +; AVX: # %bb.0: +; AVX-NEXT: vminss {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: retq + %r = call float @llvm.minnum.f32(float 1.0, float %x) + ret float %r +} + +define float @test_minnum_const_op2(float %x) { +; SSE-LABEL: test_minnum_const_op2: +; SSE: # %bb.0: +; SSE-NEXT: minss {{.*}}(%rip), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_minnum_const_op2: +; AVX: # %bb.0: +; AVX-NEXT: vminss {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: retq + %r = call float @llvm.minnum.f32(float %x, float 1.0) + ret float %r +} + attributes #0 = { "no-nans-fp-math"="true" }