Skip to content

Commit

Permalink
Merged master:1c00d096a608 into amd-gfx:ec1abe3201ce
Browse files Browse the repository at this point in the history
Local branch amd-gfx ec1abe3 Merged master:147ccc848a55 into amd-gfx:d3b8b5809e03
Remote branch master 1c00d09 [VE] LVLGen sets VL before vector insts
  • Loading branch information
Sw authored and Sw committed Nov 16, 2020
2 parents ec1abe3 + 1c00d09 commit 202115b
Show file tree
Hide file tree
Showing 8 changed files with 410 additions and 75 deletions.
1 change: 1 addition & 0 deletions llvm/lib/Target/VE/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ tablegen(LLVM VEGenCallingConv.inc -gen-callingconv)
add_public_tablegen_target(VECommonTableGen)

add_llvm_target(VECodeGen
LVLGen.cpp
VEAsmPrinter.cpp
VEFrameLowering.cpp
VEISelDAGToDAG.cpp
Expand Down
132 changes: 132 additions & 0 deletions llvm/lib/Target/VE/LVLGen.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
//===-- LVLGen.cpp - LVL instruction generator ----------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "VE.h"
#include "VESubtarget.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"

using namespace llvm;

#define DEBUG_TYPE "lvl-gen"

namespace {
struct LVLGen : public MachineFunctionPass {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;

static char ID;
LVLGen() : MachineFunctionPass(ID) {}
bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
bool runOnMachineFunction(MachineFunction &F) override;

unsigned getVL(const MachineInstr &MI);
int getVLIndex(unsigned Opcode);
};
char LVLGen::ID = 0;

} // end of anonymous namespace

FunctionPass *llvm::createLVLGenPass() { return new LVLGen; }

int LVLGen::getVLIndex(unsigned Opcode) {
const MCInstrDesc &MCID = TII->get(Opcode);

// If an instruction has VLIndex information, return it.
if (HAS_VLINDEX(MCID.TSFlags))
return GET_VLINDEX(MCID.TSFlags);

return -1;
}

// returns a register holding a vector length. NoRegister is returned when
// this MI does not have a vector length.
unsigned LVLGen::getVL(const MachineInstr &MI) {
int Index = getVLIndex(MI.getOpcode());
if (Index >= 0)
return MI.getOperand(Index).getReg();

return VE::NoRegister;
}

bool LVLGen::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
#define RegName(no) \
(MBB.getParent()->getSubtarget<VESubtarget>().getRegisterInfo()->getName(no))

bool Changed = false;
bool HasRegForVL = false;
unsigned RegForVL;

for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) {
MachineBasicBlock::iterator MI = I;

unsigned Reg = getVL(*MI);
if (Reg != VE::NoRegister) {
LLVM_DEBUG(dbgs() << "Vector instruction found: ");
LLVM_DEBUG(MI->dump());
LLVM_DEBUG(dbgs() << "Vector length is " << RegName(Reg) << ". ");
LLVM_DEBUG(dbgs() << "Current VL is "
<< (HasRegForVL ? RegName(RegForVL) : "unknown")
<< ". ");

if (!HasRegForVL || RegForVL != Reg) {
LLVM_DEBUG(dbgs() << "Generate a LVL instruction to load "
<< RegName(Reg) << ".\n");
BuildMI(MBB, I, MI->getDebugLoc(), TII->get(VE::LVLr)).addReg(Reg);
HasRegForVL = true;
RegForVL = Reg;
Changed = true;
} else {
LLVM_DEBUG(dbgs() << "Reuse current VL.\n");
}
} else if (HasRegForVL) {
// Old VL is overwritten, so disable HasRegForVL.
if (MI->findRegisterDefOperandIdx(RegForVL, false, false, TRI) != -1) {
LLVM_DEBUG(dbgs() << RegName(RegForVL) << " is killed: ");
LLVM_DEBUG(MI->dump());
HasRegForVL = false;
}
}
if (HasRegForVL) {
// The latest VL is killed, so disable HasRegForVL.
if (MI->killsRegister(RegForVL, TRI)) {
LLVM_DEBUG(dbgs() << RegName(RegForVL) << " is killed: ");
LLVM_DEBUG(MI->dump());
HasRegForVL = false;
}
}

++I;
}
return Changed;
}

bool LVLGen::runOnMachineFunction(MachineFunction &F) {
LLVM_DEBUG(dbgs() << "********** Begin LVLGen **********\n");
LLVM_DEBUG(dbgs() << "********** Function: " << F.getName() << '\n');
LLVM_DEBUG(F.dump());

bool Changed = false;

const VESubtarget &Subtarget = F.getSubtarget<VESubtarget>();
TII = Subtarget.getInstrInfo();
TRI = Subtarget.getRegisterInfo();

for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
Changed |= runOnMachineBasicBlock(*FI);

if (Changed) {
LLVM_DEBUG(dbgs() << "\n");
LLVM_DEBUG(F.dump());
}
LLVM_DEBUG(dbgs() << "********** End LVLGen **********\n");
return Changed;
}
1 change: 1 addition & 0 deletions llvm/lib/Target/VE/VE.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class MachineInstr;

FunctionPass *createVEISelDag(VETargetMachine &TM);
FunctionPass *createVEPromoteToI1Pass();
FunctionPass *createLVLGenPass();

void LowerVEMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
AsmPrinter &AP);
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/VE/VETargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ class VEPassConfig : public TargetPassConfig {

void addIRPasses() override;
bool addInstSelector() override;
void addPreEmitPass() override;
};
} // namespace

Expand All @@ -115,3 +116,8 @@ bool VEPassConfig::addInstSelector() {
addPass(createVEISelDag(getVETargetMachine()));
return false;
}

void VEPassConfig::addPreEmitPass() {
// LVLGen should be called after scheduling and register allocation
addPass(createLVLGenPass());
}
119 changes: 44 additions & 75 deletions llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1290,66 +1290,6 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
return MadeAnyChanges;
}

enum ExitCondAnalysisResult {
CanBeRemoved,
CanBeReplacedWithInvariant,
CannotOptimize
};

/// If the condition of BI is trivially true during at least first MaxIter
/// iterations, return CanBeRemoved.
/// If the condition is equivalent to loop-invariant condition expressed as
/// 'InvariantLHS `InvariantPred` InvariantRHS', fill them into respective
/// output parameters and return CanBeReplacedWithInvariant.
/// Otherwise, return CannotOptimize.
static ExitCondAnalysisResult
analyzeCond(const Loop *L, BranchInst *BI, ScalarEvolution *SE,
bool ProvingLoopExit, const SCEV *MaxIter,
ICmpInst::Predicate &InvariantPred, const SCEV *&InvariantLHS,
const SCEV *&InvariantRHS) {
ICmpInst::Predicate Pred;
Value *LHS, *RHS;
using namespace PatternMatch;
BasicBlock *TrueSucc, *FalseSucc;
if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc))))
return CannotOptimize;

assert((L->contains(TrueSucc) != L->contains(FalseSucc)) &&
"Not a loop exit!");

// 'LHS pred RHS' should now mean that we stay in loop.
if (L->contains(FalseSucc))
Pred = CmpInst::getInversePredicate(Pred);

// If we are proving loop exit, invert the predicate.
if (ProvingLoopExit)
Pred = CmpInst::getInversePredicate(Pred);

const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
// Can we prove it to be trivially true?
if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI))
return CanBeRemoved;

if (ProvingLoopExit)
return CannotOptimize;

// Check if there is a loop-invariant predicate equivalent to our check.
auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
L, BI, MaxIter);
if (!LIP)
return CannotOptimize;
InvariantPred = LIP->Pred;
InvariantLHS = LIP->LHS;
InvariantRHS = LIP->RHS;

// Can we prove it to be trivially true?
if (SE->isKnownPredicateAt(InvariantPred, InvariantLHS, InvariantRHS, BI))
return CanBeRemoved;
return CanBeReplacedWithInvariant;
}

static void replaceExitCond(BranchInst *BI, Value *NewCond,
SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
auto *OldCond = BI->getCondition();
Expand Down Expand Up @@ -1390,26 +1330,55 @@ static bool optimizeLoopExitWithUnknownExitCount(
const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
ScalarEvolution *SE, SCEVExpander &Rewriter,
SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
if (SkipLastIter) {
const SCEV *One = SE->getOne(MaxIter->getType());
MaxIter = SE->getMinusSCEV(MaxIter, One);
}
ICmpInst::Predicate InvariantPred;
const SCEV *InvariantLHS, *InvariantRHS;
switch (analyzeCond(L, BI, SE, Inverted, MaxIter, InvariantPred, InvariantLHS,
InvariantRHS)) {
case CanBeRemoved:
ICmpInst::Predicate Pred;
Value *LHS, *RHS;
using namespace PatternMatch;
BasicBlock *TrueSucc, *FalseSucc;
if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc))))
return false;

assert((L->contains(TrueSucc) != L->contains(FalseSucc)) &&
"Not a loop exit!");

// 'LHS pred RHS' should now mean that we stay in loop.
if (L->contains(FalseSucc))
Pred = CmpInst::getInversePredicate(Pred);

// If we are proving loop exit, invert the predicate.
if (Inverted)
Pred = CmpInst::getInversePredicate(Pred);

const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
// Can we prove it to be trivially true?
if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI)) {
foldExit(L, ExitingBB, Inverted, DeadInsts);
return true;
case CanBeReplacedWithInvariant: {
replaceWithInvariantCond(L, ExitingBB, InvariantPred, InvariantLHS,
InvariantRHS, Rewriter, DeadInsts);
return true;
}
case CannotOptimize:
// Further logic works for non-inverted condition only.
if (Inverted)
return false;

if (SkipLastIter) {
const SCEV *One = SE->getOne(MaxIter->getType());
MaxIter = SE->getMinusSCEV(MaxIter, One);
}
llvm_unreachable("Unknown case!");

// Check if there is a loop-invariant predicate equivalent to our check.
auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
L, BI, MaxIter);
if (!LIP)
return false;

// Can we prove it to be trivially true?
if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
foldExit(L, ExitingBB, Inverted, DeadInsts);
else
replaceWithInvariantCond(L, ExitingBB, LIP->Pred, LIP->LHS, LIP->RHS,
Rewriter, DeadInsts);

return true;
}

bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
Expand Down
66 changes: 66 additions & 0 deletions llvm/test/CodeGen/VE/VELIntrinsics/lvlgen.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s

; Test for correct placement of 'lvl' instructions

; Function Attrs: nounwind readonly
declare <256 x double> @llvm.ve.vl.vld.vssl(i64, i8*, i32)
declare void @llvm.ve.vl.vst.vssl(<256 x double>, i64, i8*, i32)

; Check that the backend can handle constant VL as well as parametric VL
; sources.

; Function Attrs: nounwind
define void @switching_vl(i32 %evl, i32 %evl2, i8* %P, i8* %Q) {
; CHECK-LABEL: switching_vl:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: lea %s4, 256
; CHECK-NEXT: lvl %s4
; CHECK-NEXT: vld %v0, 8, %s2
; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
; CHECK-NEXT: lvl %s0
; CHECK-NEXT: vst %v0, 16, %s3
; CHECK-NEXT: lea %s4, 128
; CHECK-NEXT: lvl %s4
; CHECK-NEXT: vld %v0, 16, %s2
; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1
; CHECK-NEXT: lvl %s1
; CHECK-NEXT: vst %v0, 16, %s3
; CHECK-NEXT: lvl %s4
; CHECK-NEXT: vld %v0, 8, %s2
; CHECK-NEXT: lvl %s0
; CHECK-NEXT: vst %v0, 16, %s3
; CHECK-NEXT: or %s11, 0, %s9
%l0 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %P, i32 256)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l0, i64 16, i8* %Q, i32 %evl)
%l1 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 16, i8* %P, i32 128)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l1, i64 16, i8* %Q, i32 %evl2)
%l2 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %P, i32 128)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l2, i64 16, i8* %Q, i32 %evl)
ret void
}

; Check that no redundant 'lvl' is inserted when vector length does not change
; in a basic block.


; Function Attrs: nounwind
define void @stable_vl(i32 %evl, i8* %P, i8* %Q) {
; CHECK-LABEL: stable_vl:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
; CHECK-NEXT: lvl %s0
; CHECK-NEXT: vld %v0, 8, %s1
; CHECK-NEXT: vst %v0, 16, %s2
; CHECK-NEXT: vld %v0, 16, %s1
; CHECK-NEXT: vst %v0, 16, %s2
; CHECK-NEXT: vld %v0, 8, %s1
; CHECK-NEXT: vst %v0, 16, %s2
; CHECK-NEXT: or %s11, 0, %s9
%l0 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %P, i32 %evl)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l0, i64 16, i8* %Q, i32 %evl)
%l1 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 16, i8* %P, i32 %evl)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l1, i64 16, i8* %Q, i32 %evl)
%l2 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %P, i32 %evl)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l2, i64 16, i8* %Q, i32 %evl)
ret void
}
Loading

0 comments on commit 202115b

Please sign in to comment.