Revert "[AMDGPU] Come back patch for the 'Assign register class for c…

…ross block values according to the divergence.'" This reverts commit c4d256a. Also xfailed three tests. Change-Id: I6d301a01be105d1a93ef3b148cd8c1b852da7f25
jaebaek · Nov 1, 2019 · 717c9c6 · 717c9c6
1 parent acb3c7c
commit 717c9c6
Show file tree

Hide file tree

Showing 47 changed files with 411 additions and 530 deletions.
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -114,16 +114,10 @@ class SIFixSGPRCopies : public MachineFunctionPass {
 public:
   static char ID;
 
-  MachineRegisterInfo *MRI;
-  const SIRegisterInfo *TRI;
-  const SIInstrInfo *TII;
-
   SIFixSGPRCopies() : MachineFunctionPass(ID) {}
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
-  void processPHINode(MachineInstr &MI);
-
   StringRef getPassName() const override { return "SI Fix SGPR copies"; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -320,6 +314,52 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
   return true;
 }
 
+static bool phiHasVGPROperands(const MachineInstr &PHI,
+                               const MachineRegisterInfo &MRI,
+                               const SIRegisterInfo *TRI,
+                               const SIInstrInfo *TII) {
+  for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
+    Register Reg = PHI.getOperand(i).getReg();
+    if (TRI->hasVGPRs(MRI.getRegClass(Reg)))
+      return true;
+  }
+  return false;
+}
+
+static bool phiHasBreakDef(const MachineInstr &PHI,
+                           const MachineRegisterInfo &MRI,
+                           SmallSet<unsigned, 8> &Visited) {
+  for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
+    Register Reg = PHI.getOperand(i).getReg();
+    if (Visited.count(Reg))
+      continue;
+
+    Visited.insert(Reg);
+
+    MachineInstr *DefInstr = MRI.getVRegDef(Reg);
+    switch (DefInstr->getOpcode()) {
+    default:
+      break;
+    case AMDGPU::SI_IF_BREAK:
+      return true;
+    case AMDGPU::PHI:
+      if (phiHasBreakDef(*DefInstr, MRI, Visited))
+        return true;
+    }
+  }
+  return false;
+}
+
+static bool hasTerminatorThatModifiesExec(const MachineBasicBlock &MBB,
+                                          const TargetRegisterInfo &TRI) {
+  for (MachineBasicBlock::const_iterator I = MBB.getFirstTerminator(),
+       E = MBB.end(); I != E; ++I) {
+    if (I->modifiesRegister(AMDGPU::EXEC, &TRI))
+      return true;
+  }
+  return false;
+}
+
 static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy,
                                     const MachineInstr *MoveImm,
                                     const SIInstrInfo *TII,
@@ -381,6 +421,12 @@ bool searchPredecessors(const MachineBasicBlock *MBB,
   return false;
 }
 
+static bool predsHasDivergentTerminator(MachineBasicBlock *MBB,
+                                        const TargetRegisterInfo *TRI) {
+  return searchPredecessors(MBB, nullptr, [TRI](MachineBasicBlock *MBB) {
+           return hasTerminatorThatModifiesExec(*MBB, *TRI); });
+}
+
 // Checks if there is potential path From instruction To instruction.
 // If CutOff is specified and it sits in between of that path we ignore
 // a higher portion of the path and report it is not reachable.
@@ -649,9 +695,9 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
 
 bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
-  MRI = &MF.getRegInfo();
-  TRI = ST.getRegisterInfo();
-  TII = ST.getInstrInfo();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
+  const SIInstrInfo *TII = ST.getInstrInfo();
   MDT = &getAnalysis<MachineDominatorTree>();
 
   SmallVector<MachineInstr *, 16> Worklist;
@@ -673,7 +719,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
         Register DstReg = MI.getOperand(0).getReg();
 
         const TargetRegisterClass *SrcRC, *DstRC;
-        std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, *MRI);
+        std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI);
 
         if (!Register::isVirtualRegister(DstReg)) {
           // If the destination register is a physical register there isn't
@@ -682,7 +728,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
           // the first lane. Insert a readfirstlane and hope for the best.
           if (DstReg == AMDGPU::M0 && TRI->hasVectorRegisters(SrcRC)) {
             Register TmpReg
-              = MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+              = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
 
             BuildMI(MBB, MI, MI.getDebugLoc(),
                     TII->get(AMDGPU::V_READFIRSTLANE_B32), TmpReg)
@@ -700,7 +746,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
             break;
           }
 
-          MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+          MachineInstr *DefMI = MRI.getVRegDef(SrcReg);
           unsigned SMovOp;
           int64_t Imm;
           // If we are just copying an immediate, we can replace the copy with
@@ -719,13 +765,70 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
         break;
       }
       case AMDGPU::PHI: {
-        processPHINode(MI);
+        Register Reg = MI.getOperand(0).getReg();
+        if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
+          break;
+
+        // We don't need to fix the PHI if the common dominator of the
+        // two incoming blocks terminates with a uniform branch.
+        bool HasVGPROperand = phiHasVGPROperands(MI, MRI, TRI, TII);
+        if (MI.getNumExplicitOperands() == 5 && !HasVGPROperand) {
+          MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB();
+          MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB();
+
+          if (!predsHasDivergentTerminator(MBB0, TRI) &&
+              !predsHasDivergentTerminator(MBB1, TRI)) {
+            LLVM_DEBUG(dbgs()
+                       << "Not fixing PHI for uniform branch: " << MI << '\n');
+            break;
+          }
+        }
+
+        // If a PHI node defines an SGPR and any of its operands are VGPRs,
+        // then we need to move it to the VALU.
+        //
+        // Also, if a PHI node defines an SGPR and has all SGPR operands
+        // we must move it to the VALU, because the SGPR operands will
+        // all end up being assigned the same register, which means
+        // there is a potential for a conflict if different threads take
+        // different control flow paths.
+        //
+        // For Example:
+        //
+        // sgpr0 = def;
+        // ...
+        // sgpr1 = def;
+        // ...
+        // sgpr2 = PHI sgpr0, sgpr1
+        // use sgpr2;
+        //
+        // Will Become:
+        //
+        // sgpr2 = def;
+        // ...
+        // sgpr2 = def;
+        // ...
+        // use sgpr2
+        //
+        // The one exception to this rule is when one of the operands
+        // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
+        // instruction.  In this case, there we know the program will
+        // never enter the second block (the loop) without entering
+        // the first block (where the condition is computed), so there
+        // is no chance for values to be over-written.
+
+        SmallSet<unsigned, 8> Visited;
+        if (HasVGPROperand || !phiHasBreakDef(MI, MRI, Visited)) {
+          LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI);
+          TII->moveToVALU(MI, MDT);
+        }
+
         break;
       }
       case AMDGPU::REG_SEQUENCE:
         if (TRI->hasVectorRegisters(TII->getOpRegClass(MI, 0)) ||
             !hasVectorOperands(MI, TRI)) {
-          foldVGPRCopyIntoRegSequence(MI, TRI, TII, *MRI);
+          foldVGPRCopyIntoRegSequence(MI, TRI, TII, MRI);
           continue;
         }
 
@@ -735,9 +838,9 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
         break;
       case AMDGPU::INSERT_SUBREG: {
         const TargetRegisterClass *DstRC, *Src0RC, *Src1RC;
-        DstRC = MRI->getRegClass(MI.getOperand(0).getReg());
-        Src0RC = MRI->getRegClass(MI.getOperand(1).getReg());
-        Src1RC = MRI->getRegClass(MI.getOperand(2).getReg());
+        DstRC = MRI.getRegClass(MI.getOperand(0).getReg());
+        Src0RC = MRI.getRegClass(MI.getOperand(1).getReg());
+        Src1RC = MRI.getRegClass(MI.getOperand(2).getReg());
         if (TRI->isSGPRClass(DstRC) &&
             (TRI->hasVectorRegisters(Src0RC) ||
              TRI->hasVectorRegisters(Src1RC))) {
@@ -753,78 +856,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
   fixWriteLane(MF);
 
   if (MF.getTarget().getOptLevel() > CodeGenOpt::None && EnableM0Merge)
-    hoistAndMergeSGPRInits(AMDGPU::M0, *MRI, TRI, *MDT, TII);
+    hoistAndMergeSGPRInits(AMDGPU::M0, MRI, TRI, *MDT, TII);
 
   return true;
 }
-
-void SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
-  unsigned numVGPRUses = 0;
-  SetVector<const MachineInstr *> worklist;
-  worklist.insert(&MI);
-  while (!worklist.empty()) {
-    const MachineInstr *Instr = worklist.pop_back_val();
-    unsigned Reg = Instr->getOperand(0).getReg();
-    for (const auto &Use : MRI->use_operands(Reg)) {
-      const MachineInstr *UseMI = Use.getParent();
-      if (UseMI->isCopy() || UseMI->isRegSequence()) {
-        if (UseMI->isCopy() &&
-          UseMI->getOperand(0).getReg().isPhysical() &&
-          !TRI->isSGPRReg(*MRI, UseMI->getOperand(0).getReg())) {
-          numVGPRUses++;
-        }
-        worklist.insert(UseMI);
-        continue;
-      }
-
-      if (UseMI->isPHI()) {
-        const TargetRegisterClass *UseRC = MRI->getRegClass(Use.getReg());
-        if (!TRI->isSGPRReg(*MRI, Use.getReg()) &&
-          UseRC != &AMDGPU::VReg_1RegClass)
-          numVGPRUses++;
-        continue;
-      }
-
-      const TargetRegisterClass *OpRC =
-        TII->getOpRegClass(*UseMI, UseMI->getOperandNo(&Use));
-      if (!TRI->isSGPRClass(OpRC) && OpRC != &AMDGPU::VS_32RegClass &&
-        OpRC != &AMDGPU::VS_64RegClass) {
-        numVGPRUses++;
-      }
-    }
-  }
-  bool hasVGPRInput = false;
-  for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
-    unsigned InputReg = MI.getOperand(i).getReg();
-    MachineInstr *Def = MRI->getVRegDef(InputReg);
-    if (TRI->isVGPR(*MRI, InputReg)) {
-      if (Def->isCopy()) {
-        unsigned SrcReg = Def->getOperand(1).getReg();
-        const TargetRegisterClass *RC =
-          TRI->getRegClassForReg(*MRI, SrcReg);
-        if (TRI->isSGPRClass(RC))
-          continue;
-      }
-      hasVGPRInput = true;
-      break;
-    }
-    else if (Def->isCopy() &&
-      TRI->isVGPR(*MRI, Def->getOperand(1).getReg())) {
-      hasVGPRInput = true;
-      break;
-    }
-  }
-  unsigned PHIRes = MI.getOperand(0).getReg();
-  const TargetRegisterClass *RC0 = MRI->getRegClass(PHIRes);
-
-  if ((!TRI->isVGPR(*MRI, PHIRes) && RC0 != &AMDGPU::VReg_1RegClass) &&
-    (hasVGPRInput || numVGPRUses > 1)) {
-    LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI);
-    TII->moveToVALU(MI);
-  }
-  else {
-    LLVM_DEBUG(dbgs() << "Legalizing PHI: " << MI);
-    TII->legalizeOperands(MI, MDT);
-  }
-
-}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -10960,110 +10960,3 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
 
   return AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(RMW);
 }
-
-const TargetRegisterClass *
-SITargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
-  const TargetRegisterClass *RC = TargetLoweringBase::getRegClassFor(VT, false);
-  const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
-  if (RC == &AMDGPU::VReg_1RegClass && !isDivergent)
-    return Subtarget->getWavefrontSize() == 64 ? &AMDGPU::SReg_64RegClass
-                                               : &AMDGPU::SReg_32RegClass;
-  if (!TRI->isSGPRClass(RC) && !isDivergent)
-    return TRI->getEquivalentSGPRClass(RC);
-  else if (TRI->isSGPRClass(RC) && isDivergent)
-    return TRI->getEquivalentVGPRClass(RC);
-
-  return RC;
-}
-
-static bool hasCFUser(const Value *V, SmallPtrSet<const Value *, 16> &Visited) {
-  if (!Visited.insert(V).second)
-    return false;
-  bool Result = false;
-  for (auto U : V->users()) {
-    if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(U)) {
-      if (V == U->getOperand(1)) {
-        switch (Intrinsic->getIntrinsicID()) {
-        default:
-          Result = false;
-          break;
-        case Intrinsic::amdgcn_if_break:
-        case Intrinsic::amdgcn_if:
-        case Intrinsic::amdgcn_else:
-          Result = true;
-          break;
-        }
-      }
-      if (V == U->getOperand(0)) {
-        switch (Intrinsic->getIntrinsicID()) {
-        default:
-          Result = false;
-          break;
-        case Intrinsic::amdgcn_end_cf:
-        case Intrinsic::amdgcn_loop:
-          Result = true;
-          break;
-        }
-      }
-    } else {
-      Result = hasCFUser(U, Visited);
-    }
-    if (Result)
-      break;
-  }
-  return Result;
-}
-
-bool SITargetLowering::requiresUniformRegister(MachineFunction &MF,
-                                               const Value *V) const {
-  if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) {
-    switch (Intrinsic->getIntrinsicID()) {
-    default:
-      return false;
-    case Intrinsic::amdgcn_if_break:
-      return true;
-    }
-  }
-  if (const ExtractValueInst *ExtValue = dyn_cast<ExtractValueInst>(V)) {
-    if (const IntrinsicInst *Intrinsic =
-            dyn_cast<IntrinsicInst>(ExtValue->getOperand(0))) {
-      switch (Intrinsic->getIntrinsicID()) {
-      default:
-        return false;
-      case Intrinsic::amdgcn_if:
-      case Intrinsic::amdgcn_else: {
-        ArrayRef<unsigned> Indices = ExtValue->getIndices();
-        if (Indices.size() == 1 && Indices[0] == 1) {
-          return true;
-        }
-      }
-      }
-    }
-  }
-  if (const CallInst *CI = dyn_cast<CallInst>(V)) {
-    if (isa<InlineAsm>(CI->getCalledValue())) {
-      const SIRegisterInfo *SIRI = Subtarget->getRegisterInfo();
-      ImmutableCallSite CS(CI);
-      TargetLowering::AsmOperandInfoVector TargetConstraints = ParseConstraints(
-          MF.getDataLayout(), Subtarget->getRegisterInfo(), CS);
-      for (auto &TC : TargetConstraints) {
-        if (TC.Type == InlineAsm::isOutput) {
-          ComputeConstraintToUse(TC, SDValue());
-          unsigned AssignedReg;
-          const TargetRegisterClass *RC;
-          std::tie(AssignedReg, RC) = getRegForInlineAsmConstraint(
-              SIRI, TC.ConstraintCode, TC.ConstraintVT);
-          if (RC) {
-            MachineRegisterInfo &MRI = MF.getRegInfo();
-            if (AssignedReg != 0 && SIRI->isSGPRReg(MRI, AssignedReg))
-              return true;
-            else if (SIRI->isSGPRClass(RC))
-              return true;
-          }
-        }
-      }
-    }
-  }
-  SmallPtrSet<const Value *, 16> Visited;
-  return hasCFUser(V, Visited);
-}