From 2c7870dccaf31167b7d7b422ed51d1f0b3e343d3 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Thu, 19 Nov 2020 09:38:14 -0800 Subject: [PATCH 1/3] [NewPM] Add pipeline EP callback after initial frontend cleanup This matches the legacy PM's EP_ModuleOptimizerEarly. Some backends use this extension point and adding the pass somewhere else like PipelineStartEPCallback doesn't work. Reviewed By: ychen Differential Revision: https://reviews.llvm.org/D91804 --- llvm/include/llvm/Passes/PassBuilder.h | 12 ++++++++++++ llvm/lib/Passes/PassBuilder.cpp | 5 +++++ llvm/test/Other/new-pm-O0-ep-callbacks.ll | 1 + llvm/test/Other/new-pm-defaults.ll | 5 +++++ llvm/test/Other/pass-pipeline-parsing.ll | 3 +++ llvm/tools/opt/NewPMDriver.cpp | 12 ++++++++++++ 6 files changed, 38 insertions(+) diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h index 97e0b19ed07f00..fb1a83306f556a 100644 --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -597,6 +597,15 @@ class PassBuilder { PipelineStartEPCallbacks.push_back(C); } + /// Register a callback for a default optimizer pipeline extension point. + /// + /// This extension point allows adding optimization right after passes that do + /// basic simplification of the input IR. + void registerPipelineEarlySimplificationEPCallback( + const std::function &C) { + PipelineEarlySimplificationEPCallbacks.push_back(C); + } + /// Register a callback for a default optimizer pipeline extension point /// /// This extension point allows adding optimizations at the very end of the @@ -729,6 +738,9 @@ class PassBuilder { // Module callbacks SmallVector, 2> PipelineStartEPCallbacks; + SmallVector, 2> + PipelineEarlySimplificationEPCallbacks; + SmallVector, 2> ModuleAnalysisRegistrationCallbacks; SmallVector&1 < %s | FileCheck %s ; RUN: opt -disable-output -debug-pass-manager -passes-ep-vectorizer-start=no-op-function -passes='default' 2>&1 < %s | FileCheck %s ; RUN: opt -disable-output -debug-pass-manager -passes-ep-pipeline-start=no-op-module -passes='default' 2>&1 < %s | FileCheck %s +; RUN: opt -disable-output -debug-pass-manager -passes-ep-pipeline-early-simplification=no-op-module -passes='default' 2>&1 < %s | FileCheck %s ; RUN: opt -disable-output -debug-pass-manager -passes-ep-optimizer-last=no-op-function -passes='default' 2>&1 < %s | FileCheck %s ; CHECK: Running pass: NoOp diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll index 1f2142c5bcd1c4..9e27486e981bc0 100644 --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -55,6 +55,10 @@ ; RUN: -passes='default' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,%llvmcheckext,CHECK-EP-PIPELINE-START,CHECK-O23SZ ; RUN: opt -disable-verify -debug-pass-manager \ +; RUN: -passes-ep-pipeline-early-simplification='no-op-module' \ +; RUN: -passes='default' -S %s 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,%llvmcheckext,CHECK-EP-PIPELINE-EARLY-SIMPLIFICATION,CHECK-O23SZ +; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes-ep-pipeline-start='no-op-module' \ ; RUN: -passes='lto-pre-link' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-LTO,CHECK-O3,%llvmcheckext,CHECK-EP-PIPELINE-START,CHECK-O23SZ @@ -84,6 +88,7 @@ ; CHECK-O-NEXT: Running pass: LowerExpectIntrinsicPass ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Finished llvm::Function pass manager run. +; CHECK-EP-PIPELINE-EARLY-SIMPLIFICATION-NEXT: Running pass: NoOpModulePass ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: GlobalOptPass diff --git a/llvm/test/Other/pass-pipeline-parsing.ll b/llvm/test/Other/pass-pipeline-parsing.ll index adf7554ac503b5..4cff050f52df0b 100644 --- a/llvm/test/Other/pass-pipeline-parsing.ll +++ b/llvm/test/Other/pass-pipeline-parsing.ll @@ -276,6 +276,9 @@ ; RUN: opt -passes-ep-pipeline-start=bad -passes=no-op-function \ ; RUN: /dev/null -disable-output 2>&1 | FileCheck %s -check-prefix=PASSES-EP-PIPELINESTART-ERR ; PASSES-EP-PIPELINESTART-ERR: Could not parse -passes-ep-pipeline-start pipeline: unknown pass name 'bad' +; RUN: opt -passes-ep-pipeline-early-simplification=bad -passes=no-op-function \ +; RUN: /dev/null -disable-output 2>&1 | FileCheck %s -check-prefix=PASSES-EP-PIPELINEEARLYSIMPLIFICATION-ERR +; PASSES-EP-PIPELINEEARLYSIMPLIFICATION-ERR: Could not parse -passes-ep-pipeline-early-simplification pipeline: unknown pass name 'bad' define void @f() { entry: diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp index 224a3242eb17ff..9badbdf64932d3 100644 --- a/llvm/tools/opt/NewPMDriver.cpp +++ b/llvm/tools/opt/NewPMDriver.cpp @@ -107,6 +107,11 @@ static cl::opt PipelineStartEPPipeline( cl::desc("A textual description of the module pass pipeline inserted at " "the PipelineStart extension point into default pipelines"), cl::Hidden); +static cl::opt PipelineEarlySimplificationEPPipeline( + "passes-ep-pipeline-early-simplification", + cl::desc("A textual description of the module pass pipeline inserted at " + "the EarlySimplification extension point into default pipelines"), + cl::Hidden); static cl::opt OptimizerLastEPPipeline( "passes-ep-optimizer-last", cl::desc("A textual description of the module pass pipeline inserted at " @@ -195,6 +200,13 @@ static void registerEPCallbacks(PassBuilder &PB) { ExitOnError Err("Unable to parse PipelineStartEP pipeline: "); Err(PB.parsePassPipeline(PM, PipelineStartEPPipeline)); }); + if (tryParsePipelineText( + PB, PipelineEarlySimplificationEPPipeline)) + PB.registerPipelineEarlySimplificationEPCallback( + [&PB](ModulePassManager &PM, PassBuilder::OptimizationLevel) { + ExitOnError Err("Unable to parse EarlySimplification pipeline: "); + Err(PB.parsePassPipeline(PM, PipelineEarlySimplificationEPPipeline)); + }); if (tryParsePipelineText(PB, OptimizerLastEPPipeline)) PB.registerOptimizerLastEPCallback( [&PB](ModulePassManager &PM, PassBuilder::OptimizationLevel) { From 9c588f53fc423dd0ed69250fbc93b37b40c0ef44 Mon Sep 17 00:00:00 2001 From: QingShan Zhang Date: Wed, 25 Nov 2020 05:37:15 +0000 Subject: [PATCH 2/3] [DAGCombine] Add hook to allow target specific test for sqrt input PowerPC has instruction ftsqrt/xstsqrtdp etc to do the input test for software square root. LLVM now tests it with smallest normalized value using abs + setcc. We should add hook to target that has test instructions. Reviewed By: Spatel, Chen Zheng, Qiu Chao Fang Differential Revision: https://reviews.llvm.org/D80706 --- llvm/include/llvm/CodeGen/TargetLowering.h | 9 ++++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 41 +++++++++-------- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 29 ++++++++++++ llvm/lib/Target/PowerPC/PPCISelLowering.h | 5 +++ llvm/lib/Target/PowerPC/PPCInstrFormats.td | 3 +- llvm/lib/Target/PowerPC/PPCInstrInfo.td | 7 ++- llvm/lib/Target/PowerPC/PPCInstrVSX.td | 3 +- llvm/test/CodeGen/PowerPC/fma-mutate.ll | 7 +-- llvm/test/CodeGen/PowerPC/recipest.ll | 45 ++++++++----------- 9 files changed, 96 insertions(+), 53 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 164cbd71071320..16580a9160b9a1 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -4277,6 +4277,15 @@ class TargetLowering : public TargetLoweringBase { return SDValue(); } + /// Return a target-dependent comparison result if the input operand is + /// suitable for use with a square root estimate calculation. For example, the + /// comparison may check if the operand is NAN, INF, zero, normal, etc. The + /// result should be used as the condition operand for a select or branch. + virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, + const DenormalMode &Mode) const { + return SDValue(); + } + //===--------------------------------------------------------------------===// // Legalization utility functions // diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index cae602d166d15e..4ac1743d2d3428 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -22056,26 +22056,31 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, // possibly a denormal. Force the answer to 0.0 for those cases. SDLoc DL(Op); EVT CCVT = getSetCCResultType(VT); - ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT; + SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); DenormalMode DenormMode = DAG.getDenormalMode(VT); - if (DenormMode.Input == DenormalMode::IEEE) { - // This is specifically a check for the handling of denormal inputs, - // not the result. - - // fabs(X) < SmallestNormal ? 0.0 : Est - const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); - APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem); - SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT); - SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); - SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op); - SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT); - Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est); - } else { - // X == 0.0 ? 0.0 : Est - SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); - SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ); - Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est); + // Try the target specific test first. + SDValue Test = TLI.getSqrtInputTest(Op, DAG, DenormMode); + if (!Test) { + // If no test provided by target, testing it with denormal inputs to + // avoid wrong estimate. + if (DenormMode.Input == DenormalMode::IEEE) { + // This is specifically a check for the handling of denormal inputs, + // not the result. + + // Test = fabs(X) < SmallestNormal + const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); + APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem); + SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT); + SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op); + Test = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT); + } else + // Test = X == 0.0 + Test = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ); } + // Test ? 0.0 : Est + Est = DAG.getNode(Test.getValueType().isVector() ? ISD::VSELECT + : ISD::SELECT, + DL, VT, Test, FPZero, Est); } } return Est; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 10aecf97fcdf1a..d19fbd477d77e0 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1447,6 +1447,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { return "PPCISD::FP_TO_SINT_IN_VSR"; case PPCISD::FRE: return "PPCISD::FRE"; case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE"; + case PPCISD::FTSQRT: + return "PPCISD::FTSQRT"; case PPCISD::STFIWX: return "PPCISD::STFIWX"; case PPCISD::VPERM: return "PPCISD::VPERM"; case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; @@ -12758,6 +12760,33 @@ static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) { return RefinementSteps; } +SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG, + const DenormalMode &Mode) const { + // TODO - add support for v2f64/v4f32 + EVT VT = Op.getValueType(); + if (VT != MVT::f64) + return SDValue(); + + SDLoc DL(Op); + // The output register of FTSQRT is CR field. + SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op); + // ftsqrt BF,FRB + // Let e_b be the unbiased exponent of the double-precision + // floating-point operand in register FRB. + // fe_flag is set to 1 if either of the following conditions occurs. + // - The double-precision floating-point operand in register FRB is a zero, + // a NaN, or an infinity, or a negative value. + // - e_b is less than or equal to -970. + // Otherwise fe_flag is set to 0. + // Both VSX and non-VSX versions would set EQ bit in the CR if the number is + // not eligible for iteration. (zero/negative/infinity/nan or unbiased + // exponent is less than -970) + SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32); + return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1, + FTSQRT, SRIdxVal), + 0); +} + SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 414a355264f834..6c4899fae22cbf 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -89,6 +89,9 @@ namespace llvm { FRE, FRSQRTE, + /// Test instruction for software square root. + FTSQRT, + /// VPERM - The PPC VPERM Instruction. /// VPERM, @@ -1283,6 +1286,8 @@ namespace llvm { bool Reciprocal) const override; SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override; + SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, + const DenormalMode &Mode) const override; unsigned combineRepeatedFPDivisors() const override; SDValue diff --git a/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/llvm/lib/Target/PowerPC/PPCInstrFormats.td index 5ff5fc78326ba8..646efe64a22c7c 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrFormats.td +++ b/llvm/lib/Target/PowerPC/PPCInstrFormats.td @@ -637,9 +637,10 @@ class XForm_17 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, } class XForm_17a opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, - InstrItinClass itin> + InstrItinClass itin, list pattern> : XForm_17 { let FRA = 0; + let Pattern = pattern; } class XForm_18 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 2e77d04d4a79e2..de9ae99adac731 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -74,6 +74,9 @@ def SDT_PPCcondbr : SDTypeProfile<0, 3, [ SDTCisVT<0, i32>, SDTCisVT<2, OtherVT> ]>; +def SDT_PPCFtsqrt : SDTypeProfile<1, 1, [ + SDTCisVT<0, i32>]>; + def SDT_PPClbrx : SDTypeProfile<1, 2, [ SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT> ]>; @@ -124,6 +127,7 @@ def SDT_PPCFPMinMax : SDTypeProfile<1, 2, [ def PPCfre : SDNode<"PPCISD::FRE", SDTFPUnaryOp, []>; def PPCfrsqrte: SDNode<"PPCISD::FRSQRTE", SDTFPUnaryOp, []>; +def PPCftsqrt : SDNode<"PPCISD::FTSQRT", SDT_PPCFtsqrt,[]>; def PPCfcfid : SDNode<"PPCISD::FCFID", SDTFPUnaryOp, []>; def PPCfcfidu : SDNode<"PPCISD::FCFIDU", SDTFPUnaryOp, []>; @@ -2643,7 +2647,8 @@ let isCompare = 1, mayRaiseFPException = 1, hasSideEffects = 0 in { def FTDIV: XForm_17<63, 128, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB), "ftdiv $crD, $fA, $fB", IIC_FPCompare>; def FTSQRT: XForm_17a<63, 160, (outs crrc:$crD), (ins f8rc:$fB), - "ftsqrt $crD, $fB", IIC_FPCompare>; + "ftsqrt $crD, $fB", IIC_FPCompare, + [(set i32:$crD, (PPCftsqrt f64:$fB))]>; let mayRaiseFPException = 1, hasSideEffects = 0 in { let Interpretation64Bit = 1, isCodeGenOnly = 1 in diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 1ffbd405d87aa2..b023c059606397 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -629,7 +629,8 @@ let hasSideEffects = 0 in { "xstdivdp $crD, $XA, $XB", IIC_FPCompare, []>; def XSTSQRTDP : XX2Form_1<60, 106, (outs crrc:$crD), (ins vsfrc:$XB), - "xstsqrtdp $crD, $XB", IIC_FPCompare, []>; + "xstsqrtdp $crD, $XB", IIC_FPCompare, + [(set i32:$crD, (PPCftsqrt f64:$XB))]>; def XVTDIVDP : XX3Form_1<60, 125, (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB), "xvtdivdp $crD, $XA, $XB", IIC_FPCompare, []>; diff --git a/llvm/test/CodeGen/PowerPC/fma-mutate.ll b/llvm/test/CodeGen/PowerPC/fma-mutate.ll index a1e3473edf222a..62cce7362c682e 100644 --- a/llvm/test/CodeGen/PowerPC/fma-mutate.ll +++ b/llvm/test/CodeGen/PowerPC/fma-mutate.ll @@ -9,12 +9,9 @@ declare double @llvm.sqrt.f64(double) define double @foo3_fmf(double %a) nounwind { ; CHECK-LABEL: foo3_fmf: ; CHECK: # %bb.0: -; CHECK-NEXT: xsabsdp 0, 1 -; CHECK-NEXT: addis 3, 2, .LCPI0_2@toc@ha -; CHECK-NEXT: lfd 2, .LCPI0_2@toc@l(3) -; CHECK-NEXT: xscmpudp 0, 0, 2 +; CHECK-NEXT: xstsqrtdp 0, 1 ; CHECK-NEXT: xxlxor 0, 0, 0 -; CHECK-NEXT: blt 0, .LBB0_2 +; CHECK-NEXT: bc 12, 2, .LBB0_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: xsrsqrtedp 0, 1 ; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha diff --git a/llvm/test/CodeGen/PowerPC/recipest.ll b/llvm/test/CodeGen/PowerPC/recipest.ll index e3894bcd23f5a3..cd8520b35ffad5 100644 --- a/llvm/test/CodeGen/PowerPC/recipest.ll +++ b/llvm/test/CodeGen/PowerPC/recipest.ll @@ -749,11 +749,8 @@ define <4 x float> @hoo2_safe(<4 x float> %a, <4 x float> %b) nounwind { define double @foo3_fmf(double %a) nounwind { ; CHECK-P7-LABEL: foo3_fmf: ; CHECK-P7: # %bb.0: -; CHECK-P7-NEXT: fabs 0, 1 -; CHECK-P7-NEXT: addis 3, 2, .LCPI20_2@toc@ha -; CHECK-P7-NEXT: lfd 2, .LCPI20_2@toc@l(3) -; CHECK-P7-NEXT: fcmpu 0, 0, 2 -; CHECK-P7-NEXT: blt 0, .LBB20_2 +; CHECK-P7-NEXT: ftsqrt 0, 1 +; CHECK-P7-NEXT: bc 12, 2, .LBB20_2 ; CHECK-P7-NEXT: # %bb.1: ; CHECK-P7-NEXT: frsqrte 0, 1 ; CHECK-P7-NEXT: addis 3, 2, .LCPI20_0@toc@ha @@ -770,18 +767,15 @@ define double @foo3_fmf(double %a) nounwind { ; CHECK-P7-NEXT: fmul 1, 1, 0 ; CHECK-P7-NEXT: blr ; CHECK-P7-NEXT: .LBB20_2: -; CHECK-P7-NEXT: addis 3, 2, .LCPI20_3@toc@ha -; CHECK-P7-NEXT: lfs 1, .LCPI20_3@toc@l(3) +; CHECK-P7-NEXT: addis 3, 2, .LCPI20_2@toc@ha +; CHECK-P7-NEXT: lfs 1, .LCPI20_2@toc@l(3) ; CHECK-P7-NEXT: blr ; ; CHECK-P8-LABEL: foo3_fmf: ; CHECK-P8: # %bb.0: -; CHECK-P8-NEXT: xsabsdp 0, 1 -; CHECK-P8-NEXT: addis 3, 2, .LCPI20_2@toc@ha -; CHECK-P8-NEXT: lfd 2, .LCPI20_2@toc@l(3) -; CHECK-P8-NEXT: xscmpudp 0, 0, 2 +; CHECK-P8-NEXT: xstsqrtdp 0, 1 ; CHECK-P8-NEXT: xxlxor 0, 0, 0 -; CHECK-P8-NEXT: blt 0, .LBB20_2 +; CHECK-P8-NEXT: bc 12, 2, .LBB20_2 ; CHECK-P8-NEXT: # %bb.1: ; CHECK-P8-NEXT: xsrsqrtedp 0, 1 ; CHECK-P8-NEXT: addis 3, 2, .LCPI20_0@toc@ha @@ -803,12 +797,9 @@ define double @foo3_fmf(double %a) nounwind { ; ; CHECK-P9-LABEL: foo3_fmf: ; CHECK-P9: # %bb.0: -; CHECK-P9-NEXT: addis 3, 2, .LCPI20_2@toc@ha -; CHECK-P9-NEXT: xsabsdp 0, 1 -; CHECK-P9-NEXT: lfd 2, .LCPI20_2@toc@l(3) -; CHECK-P9-NEXT: xscmpudp 0, 0, 2 +; CHECK-P9-NEXT: xstsqrtdp 0, 1 ; CHECK-P9-NEXT: xxlxor 0, 0, 0 -; CHECK-P9-NEXT: blt 0, .LBB20_2 +; CHECK-P9-NEXT: bc 12, 2, .LBB20_2 ; CHECK-P9-NEXT: # %bb.1: ; CHECK-P9-NEXT: xsrsqrtedp 0, 1 ; CHECK-P9-NEXT: addis 3, 2, .LCPI20_0@toc@ha @@ -1038,18 +1029,18 @@ define <2 x double> @hoo4_fmf(<2 x double> %a) #1 { ; CHECK-P7-LABEL: hoo4_fmf: ; CHECK-P7: # %bb.0: ; CHECK-P7-NEXT: addis 3, 2, .LCPI26_2@toc@ha +; CHECK-P7-NEXT: ftsqrt 0, 1 ; CHECK-P7-NEXT: fmr 3, 1 -; CHECK-P7-NEXT: addis 4, 2, .LCPI26_1@toc@ha +; CHECK-P7-NEXT: addis 4, 2, .LCPI26_0@toc@ha ; CHECK-P7-NEXT: lfs 0, .LCPI26_2@toc@l(3) -; CHECK-P7-NEXT: addis 3, 2, .LCPI26_0@toc@ha -; CHECK-P7-NEXT: lfs 4, .LCPI26_1@toc@l(4) -; CHECK-P7-NEXT: lfs 5, .LCPI26_0@toc@l(3) -; CHECK-P7-NEXT: fcmpu 0, 1, 0 +; CHECK-P7-NEXT: addis 3, 2, .LCPI26_1@toc@ha +; CHECK-P7-NEXT: lfs 5, .LCPI26_0@toc@l(4) +; CHECK-P7-NEXT: lfs 4, .LCPI26_1@toc@l(3) ; CHECK-P7-NEXT: fmr 1, 0 -; CHECK-P7-NEXT: bne 0, .LBB26_3 +; CHECK-P7-NEXT: bc 4, 2, .LBB26_3 ; CHECK-P7-NEXT: # %bb.1: -; CHECK-P7-NEXT: fcmpu 0, 2, 0 -; CHECK-P7-NEXT: bne 0, .LBB26_4 +; CHECK-P7-NEXT: ftsqrt 0, 2 +; CHECK-P7-NEXT: bc 4, 2, .LBB26_4 ; CHECK-P7-NEXT: .LBB26_2: ; CHECK-P7-NEXT: fmr 2, 0 ; CHECK-P7-NEXT: blr @@ -1063,8 +1054,8 @@ define <2 x double> @hoo4_fmf(<2 x double> %a) #1 { ; CHECK-P7-NEXT: fmadd 1, 3, 1, 5 ; CHECK-P7-NEXT: fmul 3, 3, 4 ; CHECK-P7-NEXT: fmul 1, 3, 1 -; CHECK-P7-NEXT: fcmpu 0, 2, 0 -; CHECK-P7-NEXT: beq 0, .LBB26_2 +; CHECK-P7-NEXT: ftsqrt 0, 2 +; CHECK-P7-NEXT: bc 12, 2, .LBB26_2 ; CHECK-P7-NEXT: .LBB26_4: ; CHECK-P7-NEXT: frsqrte 0, 2 ; CHECK-P7-NEXT: fmul 3, 2, 0 From 9130651126b745b18138b816487cdeb8a689a27f Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Wed, 25 Nov 2020 13:20:15 +0700 Subject: [PATCH 3/3] Revert "[SCEV] Generalize no-self-wrap check in isLoopInvariantExitCondDuringFirstIterations" This reverts commit 7dcc8899174f44b7447bc48a9f2ff27f5458f8b7. This patch introduced a logical error that breaks whole logic of this analysis. All checks we are making are supposed to be loop-independent, so that we could safely remove the range check. The 'nw' fact is loop-dependent, so we can remove the check basing on facts from this very check. Motivating examples will follow-up. --- llvm/lib/Analysis/ScalarEvolution.cpp | 31 +++++++++++++-------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 08ed363918a952..5f77f4aa05c2c8 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -9643,19 +9643,17 @@ ScalarEvolution::getLoopInvariantExitCondDuringFirstIterations( if (!ICmpInst::isRelational(Pred)) return None; + // TODO: Support steps other than +/- 1. const SCEV *Step = AR->getStepRecurrence(*this); - bool IsStepNonPositive = isKnownNonPositive(Step); - if (!IsStepNonPositive && !isKnownNonNegative(Step)) + auto *One = getOne(Step->getType()); + auto *MinusOne = getNegativeSCEV(One); + if (Step != One && Step != MinusOne) return None; - bool HasNoSelfWrap = AR->hasNoSelfWrap(); - if (!HasNoSelfWrap) - // If num iter has same type as the AddRec, and step is +/- 1, even max - // possible number of iterations is not enough to self-wrap. - if (MaxIter->getType() == AR->getType()) - if (Step == getOne(AR->getType()) || Step == getMinusOne(AR->getType())) - HasNoSelfWrap = true; - // Only proceed with non-self-wrapping ARs. - if (!HasNoSelfWrap) + + // Type mismatch here means that MaxIter is potentially larger than max + // unsigned value in start type, which mean we cannot prove no wrap for the + // indvar. + if (AR->getType() != MaxIter->getType()) return None; // Value of IV on suggested last iteration. @@ -9663,13 +9661,14 @@ ScalarEvolution::getLoopInvariantExitCondDuringFirstIterations( // Does it still meet the requirement? if (!isKnownPredicateAt(Pred, Last, RHS, Context)) return None; - // We know that the addrec does not have a self-wrap. To prove that there is - // no signed/unsigned wrap, we need to check that - // Start <= Last for positive step or Start >= Last for negative step. Either - // works for zero step. + // Because step is +/- 1 and MaxIter has same type as Start (i.e. it does + // not exceed max unsigned value of this type), this effectively proves + // that there is no wrap during the iteration. To prove that there is no + // signed/unsigned wrap, we need to check that + // Start <= Last for step = 1 or Start >= Last for step = -1. ICmpInst::Predicate NoOverflowPred = CmpInst::isSigned(Pred) ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; - if (IsStepNonPositive) + if (Step == MinusOne) NoOverflowPred = CmpInst::getSwappedPredicate(NoOverflowPred); const SCEV *Start = AR->getStart(); if (!isKnownPredicateAt(NoOverflowPred, Start, Last, Context))