[VE] LVLGen sets VL before vector insts

The VE backend represents vector instructions with an explicit 'i32' vector length operand. In the VE ISA, the vector length is always read from the VL hardware register. The LVLGen pass inserts 'lvl' instructions as necessary to set VL to the right value before each vector instruction. Reviewed By: kaz7 Differential Revision: https://reviews.llvm.org/D91416
jaebaek · Nov 16, 2020 · 1c00d09 · 1c00d09
1 parent b4624f6
commit 1c00d09
Show file tree

Hide file tree

Showing 7 changed files with 366 additions and 0 deletions.
diff --git a/llvm/lib/Target/VE/CMakeLists.txt b/llvm/lib/Target/VE/CMakeLists.txt
@@ -14,6 +14,7 @@ tablegen(LLVM VEGenCallingConv.inc -gen-callingconv)
 add_public_tablegen_target(VECommonTableGen)
 
 add_llvm_target(VECodeGen
+  LVLGen.cpp
   VEAsmPrinter.cpp
   VEFrameLowering.cpp
   VEISelDAGToDAG.cpp

diff --git a/llvm/lib/Target/VE/LVLGen.cpp b/llvm/lib/Target/VE/LVLGen.cpp
@@ -0,0 +1,132 @@
+//===-- LVLGen.cpp - LVL instruction generator ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "VE.h"
+#include "VESubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "lvl-gen"
+
+namespace {
+struct LVLGen : public MachineFunctionPass {
+  const TargetInstrInfo *TII;
+  const TargetRegisterInfo *TRI;
+
+  static char ID;
+  LVLGen() : MachineFunctionPass(ID) {}
+  bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+  bool runOnMachineFunction(MachineFunction &F) override;
+
+  unsigned getVL(const MachineInstr &MI);
+  int getVLIndex(unsigned Opcode);
+};
+char LVLGen::ID = 0;
+
+} // end of anonymous namespace
+
+FunctionPass *llvm::createLVLGenPass() { return new LVLGen; }
+
+int LVLGen::getVLIndex(unsigned Opcode) {
+  const MCInstrDesc &MCID = TII->get(Opcode);
+
+  // If an instruction has VLIndex information, return it.
+  if (HAS_VLINDEX(MCID.TSFlags))
+    return GET_VLINDEX(MCID.TSFlags);
+
+  return -1;
+}
+
+// returns a register holding a vector length. NoRegister is returned when
+// this MI does not have a vector length.
+unsigned LVLGen::getVL(const MachineInstr &MI) {
+  int Index = getVLIndex(MI.getOpcode());
+  if (Index >= 0)
+    return MI.getOperand(Index).getReg();
+
+  return VE::NoRegister;
+}
+
+bool LVLGen::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+#define RegName(no)                                                            \
+  (MBB.getParent()->getSubtarget<VESubtarget>().getRegisterInfo()->getName(no))
+
+  bool Changed = false;
+  bool HasRegForVL = false;
+  unsigned RegForVL;
+
+  for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) {
+    MachineBasicBlock::iterator MI = I;
+
+    unsigned Reg = getVL(*MI);
+    if (Reg != VE::NoRegister) {
+      LLVM_DEBUG(dbgs() << "Vector instruction found: ");
+      LLVM_DEBUG(MI->dump());
+      LLVM_DEBUG(dbgs() << "Vector length is " << RegName(Reg) << ". ");
+      LLVM_DEBUG(dbgs() << "Current VL is "
+                        << (HasRegForVL ? RegName(RegForVL) : "unknown")
+                        << ". ");
+
+      if (!HasRegForVL || RegForVL != Reg) {
+        LLVM_DEBUG(dbgs() << "Generate a LVL instruction to load "
+                          << RegName(Reg) << ".\n");
+        BuildMI(MBB, I, MI->getDebugLoc(), TII->get(VE::LVLr)).addReg(Reg);
+        HasRegForVL = true;
+        RegForVL = Reg;
+        Changed = true;
+      } else {
+        LLVM_DEBUG(dbgs() << "Reuse current VL.\n");
+      }
+    } else if (HasRegForVL) {
+      // Old VL is overwritten, so disable HasRegForVL.
+      if (MI->findRegisterDefOperandIdx(RegForVL, false, false, TRI) != -1) {
+        LLVM_DEBUG(dbgs() << RegName(RegForVL) << " is killed: ");
+        LLVM_DEBUG(MI->dump());
+        HasRegForVL = false;
+      }
+    }
+    if (HasRegForVL) {
+      // The latest VL is killed, so disable HasRegForVL.
+      if (MI->killsRegister(RegForVL, TRI)) {
+        LLVM_DEBUG(dbgs() << RegName(RegForVL) << " is killed: ");
+        LLVM_DEBUG(MI->dump());
+        HasRegForVL = false;
+      }
+    }
+
+    ++I;
+  }
+  return Changed;
+}
+
+bool LVLGen::runOnMachineFunction(MachineFunction &F) {
+  LLVM_DEBUG(dbgs() << "********** Begin LVLGen **********\n");
+  LLVM_DEBUG(dbgs() << "********** Function: " << F.getName() << '\n');
+  LLVM_DEBUG(F.dump());
+
+  bool Changed = false;
+
+  const VESubtarget &Subtarget = F.getSubtarget<VESubtarget>();
+  TII = Subtarget.getInstrInfo();
+  TRI = Subtarget.getRegisterInfo();
+
+  for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
+    Changed |= runOnMachineBasicBlock(*FI);
+
+  if (Changed) {
+    LLVM_DEBUG(dbgs() << "\n");
+    LLVM_DEBUG(F.dump());
+  }
+  LLVM_DEBUG(dbgs() << "********** End LVLGen **********\n");
+  return Changed;
+}
diff --git a/llvm/lib/Target/VE/VE.h b/llvm/lib/Target/VE/VE.h
@@ -29,6 +29,7 @@ class MachineInstr;
 
 FunctionPass *createVEISelDag(VETargetMachine &TM);
 FunctionPass *createVEPromoteToI1Pass();
+FunctionPass *createLVLGenPass();
 
 void LowerVEMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
                                  AsmPrinter &AP);

diff --git a/llvm/lib/Target/VE/VETargetMachine.cpp b/llvm/lib/Target/VE/VETargetMachine.cpp
@@ -98,6 +98,7 @@ class VEPassConfig : public TargetPassConfig {
 
   void addIRPasses() override;
   bool addInstSelector() override;
+  void addPreEmitPass() override;
 };
 } // namespace
 
@@ -115,3 +116,8 @@ bool VEPassConfig::addInstSelector() {
   addPass(createVEISelDag(getVETargetMachine()));
   return false;
 }
+
+void VEPassConfig::addPreEmitPass() {
+  // LVLGen should be called after scheduling and register allocation
+  addPass(createLVLGenPass());
+}
diff --git a/llvm/test/CodeGen/VE/VELIntrinsics/lvlgen.ll b/llvm/test/CodeGen/VE/VELIntrinsics/lvlgen.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s
+
+; Test for correct placement of 'lvl' instructions
+
+; Function Attrs: nounwind readonly
+declare <256 x double> @llvm.ve.vl.vld.vssl(i64, i8*, i32)
+declare void @llvm.ve.vl.vst.vssl(<256 x double>, i64, i8*, i32)
+
+; Check that the backend can handle constant VL as well as parametric VL
+; sources.
+
+; Function Attrs: nounwind
+define void @switching_vl(i32 %evl, i32 %evl2, i8* %P, i8* %Q) {
+; CHECK-LABEL: switching_vl:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    lea %s4, 256
+; CHECK-NEXT:    lvl %s4
+; CHECK-NEXT:    vld %v0, 8, %s2
+; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vst %v0, 16, %s3
+; CHECK-NEXT:    lea %s4, 128
+; CHECK-NEXT:    lvl %s4
+; CHECK-NEXT:    vld %v0, 16, %s2
+; CHECK-NEXT:    adds.w.sx %s1, %s1, (0)1
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vst %v0, 16, %s3
+; CHECK-NEXT:    lvl %s4
+; CHECK-NEXT:    vld %v0, 8, %s2
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vst %v0, 16, %s3
+; CHECK-NEXT:    or %s11, 0, %s9
+  %l0 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %P, i32 256)
+  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l0, i64 16, i8* %Q, i32 %evl)
+  %l1 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 16, i8* %P, i32 128)
+  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l1, i64 16, i8* %Q, i32 %evl2)
+  %l2 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %P, i32 128)
+  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l2, i64 16, i8* %Q, i32 %evl)
+  ret void
+}
+
+; Check that no redundant 'lvl' is inserted when vector length does not change
+; in a basic block.
+
+
+; Function Attrs: nounwind
+define void @stable_vl(i32 %evl, i8* %P, i8* %Q) {
+; CHECK-LABEL: stable_vl:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vld %v0, 8, %s1
+; CHECK-NEXT:    vst %v0, 16, %s2
+; CHECK-NEXT:    vld %v0, 16, %s1
+; CHECK-NEXT:    vst %v0, 16, %s2
+; CHECK-NEXT:    vld %v0, 8, %s1
+; CHECK-NEXT:    vst %v0, 16, %s2
+; CHECK-NEXT:    or %s11, 0, %s9
+  %l0 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %P, i32 %evl)
+  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l0, i64 16, i8* %Q, i32 %evl)
+  %l1 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 16, i8* %P, i32 %evl)
+  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l1, i64 16, i8* %Q, i32 %evl)
+  %l2 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %P, i32 %evl)
+  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l2, i64 16, i8* %Q, i32 %evl)
+  ret void
+}