Merged master:1c00d096a608 into amd-gfx:ec1abe3201ce

Local branch amd-gfx ec1abe3 Merged master:147ccc848a55 into amd-gfx:d3b8b5809e03 Remote branch master 1c00d09 [VE] LVLGen sets VL before vector insts
jaebaek · Nov 16, 2020 · 202115b · 202115b
2 parents ec1abe3 + 1c00d09
commit 202115b
Show file tree

Hide file tree

Showing 8 changed files with 410 additions and 75 deletions.
diff --git a/llvm/lib/Target/VE/CMakeLists.txt b/llvm/lib/Target/VE/CMakeLists.txt
@@ -14,6 +14,7 @@ tablegen(LLVM VEGenCallingConv.inc -gen-callingconv)
 add_public_tablegen_target(VECommonTableGen)
 
 add_llvm_target(VECodeGen
+  LVLGen.cpp
   VEAsmPrinter.cpp
   VEFrameLowering.cpp
   VEISelDAGToDAG.cpp

diff --git a/llvm/lib/Target/VE/LVLGen.cpp b/llvm/lib/Target/VE/LVLGen.cpp
@@ -0,0 +1,132 @@
+//===-- LVLGen.cpp - LVL instruction generator ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "VE.h"
+#include "VESubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "lvl-gen"
+
+namespace {
+struct LVLGen : public MachineFunctionPass {
+  const TargetInstrInfo *TII;
+  const TargetRegisterInfo *TRI;
+
+  static char ID;
+  LVLGen() : MachineFunctionPass(ID) {}
+  bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+  bool runOnMachineFunction(MachineFunction &F) override;
+
+  unsigned getVL(const MachineInstr &MI);
+  int getVLIndex(unsigned Opcode);
+};
+char LVLGen::ID = 0;
+
+} // end of anonymous namespace
+
+FunctionPass *llvm::createLVLGenPass() { return new LVLGen; }
+
+int LVLGen::getVLIndex(unsigned Opcode) {
+  const MCInstrDesc &MCID = TII->get(Opcode);
+
+  // If an instruction has VLIndex information, return it.
+  if (HAS_VLINDEX(MCID.TSFlags))
+    return GET_VLINDEX(MCID.TSFlags);
+
+  return -1;
+}
+
+// returns a register holding a vector length. NoRegister is returned when
+// this MI does not have a vector length.
+unsigned LVLGen::getVL(const MachineInstr &MI) {
+  int Index = getVLIndex(MI.getOpcode());
+  if (Index >= 0)
+    return MI.getOperand(Index).getReg();
+
+  return VE::NoRegister;
+}
+
+bool LVLGen::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+#define RegName(no)                                                            \
+  (MBB.getParent()->getSubtarget<VESubtarget>().getRegisterInfo()->getName(no))
+
+  bool Changed = false;
+  bool HasRegForVL = false;
+  unsigned RegForVL;
+
+  for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) {
+    MachineBasicBlock::iterator MI = I;
+
+    unsigned Reg = getVL(*MI);
+    if (Reg != VE::NoRegister) {
+      LLVM_DEBUG(dbgs() << "Vector instruction found: ");
+      LLVM_DEBUG(MI->dump());
+      LLVM_DEBUG(dbgs() << "Vector length is " << RegName(Reg) << ". ");
+      LLVM_DEBUG(dbgs() << "Current VL is "
+                        << (HasRegForVL ? RegName(RegForVL) : "unknown")
+                        << ". ");
+
+      if (!HasRegForVL || RegForVL != Reg) {
+        LLVM_DEBUG(dbgs() << "Generate a LVL instruction to load "
+                          << RegName(Reg) << ".\n");
+        BuildMI(MBB, I, MI->getDebugLoc(), TII->get(VE::LVLr)).addReg(Reg);
+        HasRegForVL = true;
+        RegForVL = Reg;
+        Changed = true;
+      } else {
+        LLVM_DEBUG(dbgs() << "Reuse current VL.\n");
+      }
+    } else if (HasRegForVL) {
+      // Old VL is overwritten, so disable HasRegForVL.
+      if (MI->findRegisterDefOperandIdx(RegForVL, false, false, TRI) != -1) {
+        LLVM_DEBUG(dbgs() << RegName(RegForVL) << " is killed: ");
+        LLVM_DEBUG(MI->dump());
+        HasRegForVL = false;
+      }
+    }
+    if (HasRegForVL) {
+      // The latest VL is killed, so disable HasRegForVL.
+      if (MI->killsRegister(RegForVL, TRI)) {
+        LLVM_DEBUG(dbgs() << RegName(RegForVL) << " is killed: ");
+        LLVM_DEBUG(MI->dump());
+        HasRegForVL = false;
+      }
+    }
+
+    ++I;
+  }
+  return Changed;
+}
+
+bool LVLGen::runOnMachineFunction(MachineFunction &F) {
+  LLVM_DEBUG(dbgs() << "********** Begin LVLGen **********\n");
+  LLVM_DEBUG(dbgs() << "********** Function: " << F.getName() << '\n');
+  LLVM_DEBUG(F.dump());
+
+  bool Changed = false;
+
+  const VESubtarget &Subtarget = F.getSubtarget<VESubtarget>();
+  TII = Subtarget.getInstrInfo();
+  TRI = Subtarget.getRegisterInfo();
+
+  for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
+    Changed |= runOnMachineBasicBlock(*FI);
+
+  if (Changed) {
+    LLVM_DEBUG(dbgs() << "\n");
+    LLVM_DEBUG(F.dump());
+  }
+  LLVM_DEBUG(dbgs() << "********** End LVLGen **********\n");
+  return Changed;
+}
diff --git a/llvm/lib/Target/VE/VE.h b/llvm/lib/Target/VE/VE.h
@@ -29,6 +29,7 @@ class MachineInstr;
 
 FunctionPass *createVEISelDag(VETargetMachine &TM);
 FunctionPass *createVEPromoteToI1Pass();
+FunctionPass *createLVLGenPass();
 
 void LowerVEMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
                                  AsmPrinter &AP);

diff --git a/llvm/lib/Target/VE/VETargetMachine.cpp b/llvm/lib/Target/VE/VETargetMachine.cpp
@@ -98,6 +98,7 @@ class VEPassConfig : public TargetPassConfig {
 
   void addIRPasses() override;
   bool addInstSelector() override;
+  void addPreEmitPass() override;
 };
 } // namespace
 
@@ -115,3 +116,8 @@ bool VEPassConfig::addInstSelector() {
   addPass(createVEISelDag(getVETargetMachine()));
   return false;
 }
+
+void VEPassConfig::addPreEmitPass() {
+  // LVLGen should be called after scheduling and register allocation
+  addPass(createLVLGenPass());
+}
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1290,66 +1290,6 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
   return MadeAnyChanges;
 }
 
-enum ExitCondAnalysisResult {
-  CanBeRemoved,
-  CanBeReplacedWithInvariant,
-  CannotOptimize
-};
-
-/// If the condition of BI is trivially true during at least first MaxIter
-/// iterations, return CanBeRemoved.
-/// If the condition is equivalent to loop-invariant condition expressed as
-/// 'InvariantLHS `InvariantPred` InvariantRHS', fill them into respective
-/// output parameters and return CanBeReplacedWithInvariant.
-/// Otherwise, return CannotOptimize.
-static ExitCondAnalysisResult
-analyzeCond(const Loop *L, BranchInst *BI, ScalarEvolution *SE,
-            bool ProvingLoopExit, const SCEV *MaxIter,
-            ICmpInst::Predicate &InvariantPred, const SCEV *&InvariantLHS,
-            const SCEV *&InvariantRHS) {
-  ICmpInst::Predicate Pred;
-  Value *LHS, *RHS;
-  using namespace PatternMatch;
-  BasicBlock *TrueSucc, *FalseSucc;
-  if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
-                      m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc))))
-    return CannotOptimize;
-
-  assert((L->contains(TrueSucc) != L->contains(FalseSucc)) &&
-         "Not a loop exit!");
-
-  // 'LHS pred RHS' should now mean that we stay in loop.
-  if (L->contains(FalseSucc))
-    Pred = CmpInst::getInversePredicate(Pred);
-
-  // If we are proving loop exit, invert the predicate.
-  if (ProvingLoopExit)
-    Pred = CmpInst::getInversePredicate(Pred);
-
-  const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
-  const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
-  // Can we prove it to be trivially true?
-  if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI))
-    return CanBeRemoved;
-
-  if (ProvingLoopExit)
-    return CannotOptimize;
-
-  // Check if there is a loop-invariant predicate equivalent to our check.
-  auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
-                                                               L, BI, MaxIter);
-  if (!LIP)
-    return CannotOptimize;
-  InvariantPred = LIP->Pred;
-  InvariantLHS = LIP->LHS;
-  InvariantRHS = LIP->RHS;
-
-  // Can we prove it to be trivially true?
-  if (SE->isKnownPredicateAt(InvariantPred, InvariantLHS, InvariantRHS, BI))
-    return CanBeRemoved;
-  return CanBeReplacedWithInvariant;
-}
-
 static void replaceExitCond(BranchInst *BI, Value *NewCond,
                             SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
   auto *OldCond = BI->getCondition();
@@ -1390,26 +1330,55 @@ static bool optimizeLoopExitWithUnknownExitCount(
     const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
     ScalarEvolution *SE, SCEVExpander &Rewriter,
     SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
-  if (SkipLastIter) {
-    const SCEV *One = SE->getOne(MaxIter->getType());
-    MaxIter = SE->getMinusSCEV(MaxIter, One);
-  }
-  ICmpInst::Predicate InvariantPred;
-  const SCEV *InvariantLHS, *InvariantRHS;
-  switch (analyzeCond(L, BI, SE, Inverted, MaxIter, InvariantPred, InvariantLHS,
-                      InvariantRHS)) {
-  case CanBeRemoved:
+  ICmpInst::Predicate Pred;
+  Value *LHS, *RHS;
+  using namespace PatternMatch;
+  BasicBlock *TrueSucc, *FalseSucc;
+  if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
+                      m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc))))
+    return false;
+
+  assert((L->contains(TrueSucc) != L->contains(FalseSucc)) &&
+         "Not a loop exit!");
+
+  // 'LHS pred RHS' should now mean that we stay in loop.
+  if (L->contains(FalseSucc))
+    Pred = CmpInst::getInversePredicate(Pred);
+
+  // If we are proving loop exit, invert the predicate.
+  if (Inverted)
+    Pred = CmpInst::getInversePredicate(Pred);
+
+  const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
+  const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
+  // Can we prove it to be trivially true?
+  if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI)) {
     foldExit(L, ExitingBB, Inverted, DeadInsts);
     return true;
-  case CanBeReplacedWithInvariant: {
-    replaceWithInvariantCond(L, ExitingBB, InvariantPred, InvariantLHS,
-                             InvariantRHS, Rewriter, DeadInsts);
-    return true;
   }
-  case CannotOptimize:
+  // Further logic works for non-inverted condition only.
+  if (Inverted)
     return false;
+
+  if (SkipLastIter) {
+    const SCEV *One = SE->getOne(MaxIter->getType());
+    MaxIter = SE->getMinusSCEV(MaxIter, One);
   }
-  llvm_unreachable("Unknown case!");
+
+  // Check if there is a loop-invariant predicate equivalent to our check.
+  auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
+                                                               L, BI, MaxIter);
+  if (!LIP)
+    return false;
+
+  // Can we prove it to be trivially true?
+  if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
+    foldExit(L, ExitingBB, Inverted, DeadInsts);
+  else
+    replaceWithInvariantCond(L, ExitingBB, LIP->Pred, LIP->LHS, LIP->RHS,
+                             Rewriter, DeadInsts);
+
+  return true;
 }
 
 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {

diff --git a/llvm/test/CodeGen/VE/VELIntrinsics/lvlgen.ll b/llvm/test/CodeGen/VE/VELIntrinsics/lvlgen.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s
+
+; Test for correct placement of 'lvl' instructions
+
+; Function Attrs: nounwind readonly
+declare <256 x double> @llvm.ve.vl.vld.vssl(i64, i8*, i32)
+declare void @llvm.ve.vl.vst.vssl(<256 x double>, i64, i8*, i32)
+
+; Check that the backend can handle constant VL as well as parametric VL
+; sources.
+
+; Function Attrs: nounwind
+define void @switching_vl(i32 %evl, i32 %evl2, i8* %P, i8* %Q) {
+; CHECK-LABEL: switching_vl:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    lea %s4, 256
+; CHECK-NEXT:    lvl %s4
+; CHECK-NEXT:    vld %v0, 8, %s2
+; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vst %v0, 16, %s3
+; CHECK-NEXT:    lea %s4, 128
+; CHECK-NEXT:    lvl %s4
+; CHECK-NEXT:    vld %v0, 16, %s2
+; CHECK-NEXT:    adds.w.sx %s1, %s1, (0)1
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vst %v0, 16, %s3
+; CHECK-NEXT:    lvl %s4
+; CHECK-NEXT:    vld %v0, 8, %s2
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vst %v0, 16, %s3
+; CHECK-NEXT:    or %s11, 0, %s9
+  %l0 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %P, i32 256)
+  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l0, i64 16, i8* %Q, i32 %evl)
+  %l1 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 16, i8* %P, i32 128)
+  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l1, i64 16, i8* %Q, i32 %evl2)
+  %l2 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %P, i32 128)
+  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l2, i64 16, i8* %Q, i32 %evl)
+  ret void
+}
+
+; Check that no redundant 'lvl' is inserted when vector length does not change
+; in a basic block.
+
+
+; Function Attrs: nounwind
+define void @stable_vl(i32 %evl, i8* %P, i8* %Q) {
+; CHECK-LABEL: stable_vl:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vld %v0, 8, %s1
+; CHECK-NEXT:    vst %v0, 16, %s2
+; CHECK-NEXT:    vld %v0, 16, %s1
+; CHECK-NEXT:    vst %v0, 16, %s2
+; CHECK-NEXT:    vld %v0, 8, %s1
+; CHECK-NEXT:    vst %v0, 16, %s2
+; CHECK-NEXT:    or %s11, 0, %s9
+  %l0 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %P, i32 %evl)
+  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l0, i64 16, i8* %Q, i32 %evl)
+  %l1 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 16, i8* %P, i32 %evl)
+  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l1, i64 16, i8* %Q, i32 %evl)
+  %l2 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %P, i32 %evl)
+  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l2, i64 16, i8* %Q, i32 %evl)
+  ret void
+}