Skip to content

Commit

Permalink
Revert "[AMDGPU] Come back patch for the 'Assign register class for c…
Browse files Browse the repository at this point in the history
…ross block values according to the divergence.'"

This reverts commit c4d256a.

Also xfailed three tests.

Change-Id: I6d301a01be105d1a93ef3b148cd8c1b852da7f25
  • Loading branch information
Tim Renouf committed Nov 1, 2019
1 parent acb3c7c commit 717c9c6
Show file tree
Hide file tree
Showing 47 changed files with 411 additions and 530 deletions.
210 changes: 121 additions & 89 deletions llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,16 +114,10 @@ class SIFixSGPRCopies : public MachineFunctionPass {
public:
static char ID;

MachineRegisterInfo *MRI;
const SIRegisterInfo *TRI;
const SIInstrInfo *TII;

SIFixSGPRCopies() : MachineFunctionPass(ID) {}

bool runOnMachineFunction(MachineFunction &MF) override;

void processPHINode(MachineInstr &MI);

StringRef getPassName() const override { return "SI Fix SGPR copies"; }

void getAnalysisUsage(AnalysisUsage &AU) const override {
Expand Down Expand Up @@ -320,6 +314,52 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
return true;
}

static bool phiHasVGPROperands(const MachineInstr &PHI,
const MachineRegisterInfo &MRI,
const SIRegisterInfo *TRI,
const SIInstrInfo *TII) {
for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
Register Reg = PHI.getOperand(i).getReg();
if (TRI->hasVGPRs(MRI.getRegClass(Reg)))
return true;
}
return false;
}

static bool phiHasBreakDef(const MachineInstr &PHI,
const MachineRegisterInfo &MRI,
SmallSet<unsigned, 8> &Visited) {
for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
Register Reg = PHI.getOperand(i).getReg();
if (Visited.count(Reg))
continue;

Visited.insert(Reg);

MachineInstr *DefInstr = MRI.getVRegDef(Reg);
switch (DefInstr->getOpcode()) {
default:
break;
case AMDGPU::SI_IF_BREAK:
return true;
case AMDGPU::PHI:
if (phiHasBreakDef(*DefInstr, MRI, Visited))
return true;
}
}
return false;
}

static bool hasTerminatorThatModifiesExec(const MachineBasicBlock &MBB,
const TargetRegisterInfo &TRI) {
for (MachineBasicBlock::const_iterator I = MBB.getFirstTerminator(),
E = MBB.end(); I != E; ++I) {
if (I->modifiesRegister(AMDGPU::EXEC, &TRI))
return true;
}
return false;
}

static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy,
const MachineInstr *MoveImm,
const SIInstrInfo *TII,
Expand Down Expand Up @@ -381,6 +421,12 @@ bool searchPredecessors(const MachineBasicBlock *MBB,
return false;
}

static bool predsHasDivergentTerminator(MachineBasicBlock *MBB,
const TargetRegisterInfo *TRI) {
return searchPredecessors(MBB, nullptr, [TRI](MachineBasicBlock *MBB) {
return hasTerminatorThatModifiesExec(*MBB, *TRI); });
}

// Checks if there is potential path From instruction To instruction.
// If CutOff is specified and it sits in between of that path we ignore
// a higher portion of the path and report it is not reachable.
Expand Down Expand Up @@ -649,9 +695,9 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,

bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
MRI = &MF.getRegInfo();
TRI = ST.getRegisterInfo();
TII = ST.getInstrInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
const SIInstrInfo *TII = ST.getInstrInfo();
MDT = &getAnalysis<MachineDominatorTree>();

SmallVector<MachineInstr *, 16> Worklist;
Expand All @@ -673,7 +719,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
Register DstReg = MI.getOperand(0).getReg();

const TargetRegisterClass *SrcRC, *DstRC;
std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, *MRI);
std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI);

if (!Register::isVirtualRegister(DstReg)) {
// If the destination register is a physical register there isn't
Expand All @@ -682,7 +728,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
// the first lane. Insert a readfirstlane and hope for the best.
if (DstReg == AMDGPU::M0 && TRI->hasVectorRegisters(SrcRC)) {
Register TmpReg
= MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
= MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);

BuildMI(MBB, MI, MI.getDebugLoc(),
TII->get(AMDGPU::V_READFIRSTLANE_B32), TmpReg)
Expand All @@ -700,7 +746,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
break;
}

MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
MachineInstr *DefMI = MRI.getVRegDef(SrcReg);
unsigned SMovOp;
int64_t Imm;
// If we are just copying an immediate, we can replace the copy with
Expand All @@ -719,13 +765,70 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
break;
}
case AMDGPU::PHI: {
processPHINode(MI);
Register Reg = MI.getOperand(0).getReg();
if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
break;

// We don't need to fix the PHI if the common dominator of the
// two incoming blocks terminates with a uniform branch.
bool HasVGPROperand = phiHasVGPROperands(MI, MRI, TRI, TII);
if (MI.getNumExplicitOperands() == 5 && !HasVGPROperand) {
MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB();
MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB();

if (!predsHasDivergentTerminator(MBB0, TRI) &&
!predsHasDivergentTerminator(MBB1, TRI)) {
LLVM_DEBUG(dbgs()
<< "Not fixing PHI for uniform branch: " << MI << '\n');
break;
}
}

// If a PHI node defines an SGPR and any of its operands are VGPRs,
// then we need to move it to the VALU.
//
// Also, if a PHI node defines an SGPR and has all SGPR operands
// we must move it to the VALU, because the SGPR operands will
// all end up being assigned the same register, which means
// there is a potential for a conflict if different threads take
// different control flow paths.
//
// For Example:
//
// sgpr0 = def;
// ...
// sgpr1 = def;
// ...
// sgpr2 = PHI sgpr0, sgpr1
// use sgpr2;
//
// Will Become:
//
// sgpr2 = def;
// ...
// sgpr2 = def;
// ...
// use sgpr2
//
// The one exception to this rule is when one of the operands
// is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
// instruction. In this case, there we know the program will
// never enter the second block (the loop) without entering
// the first block (where the condition is computed), so there
// is no chance for values to be over-written.

SmallSet<unsigned, 8> Visited;
if (HasVGPROperand || !phiHasBreakDef(MI, MRI, Visited)) {
LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI);
TII->moveToVALU(MI, MDT);
}

break;
}
case AMDGPU::REG_SEQUENCE:
if (TRI->hasVectorRegisters(TII->getOpRegClass(MI, 0)) ||
!hasVectorOperands(MI, TRI)) {
foldVGPRCopyIntoRegSequence(MI, TRI, TII, *MRI);
foldVGPRCopyIntoRegSequence(MI, TRI, TII, MRI);
continue;
}

Expand All @@ -735,9 +838,9 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
break;
case AMDGPU::INSERT_SUBREG: {
const TargetRegisterClass *DstRC, *Src0RC, *Src1RC;
DstRC = MRI->getRegClass(MI.getOperand(0).getReg());
Src0RC = MRI->getRegClass(MI.getOperand(1).getReg());
Src1RC = MRI->getRegClass(MI.getOperand(2).getReg());
DstRC = MRI.getRegClass(MI.getOperand(0).getReg());
Src0RC = MRI.getRegClass(MI.getOperand(1).getReg());
Src1RC = MRI.getRegClass(MI.getOperand(2).getReg());
if (TRI->isSGPRClass(DstRC) &&
(TRI->hasVectorRegisters(Src0RC) ||
TRI->hasVectorRegisters(Src1RC))) {
Expand All @@ -753,78 +856,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
fixWriteLane(MF);

if (MF.getTarget().getOptLevel() > CodeGenOpt::None && EnableM0Merge)
hoistAndMergeSGPRInits(AMDGPU::M0, *MRI, TRI, *MDT, TII);
hoistAndMergeSGPRInits(AMDGPU::M0, MRI, TRI, *MDT, TII);

return true;
}

void SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
unsigned numVGPRUses = 0;
SetVector<const MachineInstr *> worklist;
worklist.insert(&MI);
while (!worklist.empty()) {
const MachineInstr *Instr = worklist.pop_back_val();
unsigned Reg = Instr->getOperand(0).getReg();
for (const auto &Use : MRI->use_operands(Reg)) {
const MachineInstr *UseMI = Use.getParent();
if (UseMI->isCopy() || UseMI->isRegSequence()) {
if (UseMI->isCopy() &&
UseMI->getOperand(0).getReg().isPhysical() &&
!TRI->isSGPRReg(*MRI, UseMI->getOperand(0).getReg())) {
numVGPRUses++;
}
worklist.insert(UseMI);
continue;
}

if (UseMI->isPHI()) {
const TargetRegisterClass *UseRC = MRI->getRegClass(Use.getReg());
if (!TRI->isSGPRReg(*MRI, Use.getReg()) &&
UseRC != &AMDGPU::VReg_1RegClass)
numVGPRUses++;
continue;
}

const TargetRegisterClass *OpRC =
TII->getOpRegClass(*UseMI, UseMI->getOperandNo(&Use));
if (!TRI->isSGPRClass(OpRC) && OpRC != &AMDGPU::VS_32RegClass &&
OpRC != &AMDGPU::VS_64RegClass) {
numVGPRUses++;
}
}
}
bool hasVGPRInput = false;
for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
unsigned InputReg = MI.getOperand(i).getReg();
MachineInstr *Def = MRI->getVRegDef(InputReg);
if (TRI->isVGPR(*MRI, InputReg)) {
if (Def->isCopy()) {
unsigned SrcReg = Def->getOperand(1).getReg();
const TargetRegisterClass *RC =
TRI->getRegClassForReg(*MRI, SrcReg);
if (TRI->isSGPRClass(RC))
continue;
}
hasVGPRInput = true;
break;
}
else if (Def->isCopy() &&
TRI->isVGPR(*MRI, Def->getOperand(1).getReg())) {
hasVGPRInput = true;
break;
}
}
unsigned PHIRes = MI.getOperand(0).getReg();
const TargetRegisterClass *RC0 = MRI->getRegClass(PHIRes);

if ((!TRI->isVGPR(*MRI, PHIRes) && RC0 != &AMDGPU::VReg_1RegClass) &&
(hasVGPRInput || numVGPRUses > 1)) {
LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI);
TII->moveToVALU(MI);
}
else {
LLVM_DEBUG(dbgs() << "Legalizing PHI: " << MI);
TII->legalizeOperands(MI, MDT);
}

}
107 changes: 0 additions & 107 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10960,110 +10960,3 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {

return AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(RMW);
}

const TargetRegisterClass *
SITargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
const TargetRegisterClass *RC = TargetLoweringBase::getRegClassFor(VT, false);
const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
if (RC == &AMDGPU::VReg_1RegClass && !isDivergent)
return Subtarget->getWavefrontSize() == 64 ? &AMDGPU::SReg_64RegClass
: &AMDGPU::SReg_32RegClass;
if (!TRI->isSGPRClass(RC) && !isDivergent)
return TRI->getEquivalentSGPRClass(RC);
else if (TRI->isSGPRClass(RC) && isDivergent)
return TRI->getEquivalentVGPRClass(RC);

return RC;
}

static bool hasCFUser(const Value *V, SmallPtrSet<const Value *, 16> &Visited) {
if (!Visited.insert(V).second)
return false;
bool Result = false;
for (auto U : V->users()) {
if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(U)) {
if (V == U->getOperand(1)) {
switch (Intrinsic->getIntrinsicID()) {
default:
Result = false;
break;
case Intrinsic::amdgcn_if_break:
case Intrinsic::amdgcn_if:
case Intrinsic::amdgcn_else:
Result = true;
break;
}
}
if (V == U->getOperand(0)) {
switch (Intrinsic->getIntrinsicID()) {
default:
Result = false;
break;
case Intrinsic::amdgcn_end_cf:
case Intrinsic::amdgcn_loop:
Result = true;
break;
}
}
} else {
Result = hasCFUser(U, Visited);
}
if (Result)
break;
}
return Result;
}

bool SITargetLowering::requiresUniformRegister(MachineFunction &MF,
const Value *V) const {
if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) {
switch (Intrinsic->getIntrinsicID()) {
default:
return false;
case Intrinsic::amdgcn_if_break:
return true;
}
}
if (const ExtractValueInst *ExtValue = dyn_cast<ExtractValueInst>(V)) {
if (const IntrinsicInst *Intrinsic =
dyn_cast<IntrinsicInst>(ExtValue->getOperand(0))) {
switch (Intrinsic->getIntrinsicID()) {
default:
return false;
case Intrinsic::amdgcn_if:
case Intrinsic::amdgcn_else: {
ArrayRef<unsigned> Indices = ExtValue->getIndices();
if (Indices.size() == 1 && Indices[0] == 1) {
return true;
}
}
}
}
}
if (const CallInst *CI = dyn_cast<CallInst>(V)) {
if (isa<InlineAsm>(CI->getCalledValue())) {
const SIRegisterInfo *SIRI = Subtarget->getRegisterInfo();
ImmutableCallSite CS(CI);
TargetLowering::AsmOperandInfoVector TargetConstraints = ParseConstraints(
MF.getDataLayout(), Subtarget->getRegisterInfo(), CS);
for (auto &TC : TargetConstraints) {
if (TC.Type == InlineAsm::isOutput) {
ComputeConstraintToUse(TC, SDValue());
unsigned AssignedReg;
const TargetRegisterClass *RC;
std::tie(AssignedReg, RC) = getRegForInlineAsmConstraint(
SIRI, TC.ConstraintCode, TC.ConstraintVT);
if (RC) {
MachineRegisterInfo &MRI = MF.getRegInfo();
if (AssignedReg != 0 && SIRI->isSGPRReg(MRI, AssignedReg))
return true;
else if (SIRI->isSGPRClass(RC))
return true;
}
}
}
}
}
SmallPtrSet<const Value *, 16> Visited;
return hasCFUser(V, Visited);
}
Loading

0 comments on commit 717c9c6

Please sign in to comment.