Skip to content

Commit

Permalink
Revert "[AMDGPU] Fix-up cases where writelane has 2 SGPR operands"
Browse files Browse the repository at this point in the history
This reverts commit 0c5e25e.

Preparation for the same change from upstream (with mods)

Change-Id: I6f8e32f0928a30ebea294decfac027d16ff30a34
  • Loading branch information
dstutt committed Oct 21, 2019
1 parent 556e5e3 commit ec94370
Show file tree
Hide file tree
Showing 6 changed files with 39 additions and 153 deletions.
64 changes: 0 additions & 64 deletions llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@
#include "SIInstrInfo.h"
#include "SIRegisterInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "Utils/AMDGPUMCUtils.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
Expand Down Expand Up @@ -405,67 +404,6 @@ static bool isReachable(const MachineInstr *From,
(const MachineBasicBlock *MBB) { return MBB == MBBFrom; });
}

// Writelane is special in that it can use SGPR and M0 (which would normally
// count as using the constant bus twice - but in this case it is allowed as the
// lane selector doesn't count as a use of the constant bus).
// However, it is still required to abide by the 1 SGPR rule
// Apply a fix here as we might have multiple SGPRs after legalizing VGPRs to
// SGPRs
static bool fixWriteLane(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const MachineRegisterInfo &MRI = MF.getRegInfo();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
const SIInstrInfo *TII = ST.getInstrInfo();
bool Changed = false;

for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI) {
MachineBasicBlock &MBB = *BI;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
MachineInstr &MI = *I;

if (MI.getOpcode() == AMDGPU::V_WRITELANE_B32) {
int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
int Src1Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
MachineOperand &Src0 = MI.getOperand(Src0Idx);
MachineOperand &Src1 = MI.getOperand(Src1Idx);

// Check to see if the instruction violates the 1 SGPR rule
if ((Src0.isReg() && TRI->isSGPRReg(MRI, Src0.getReg()) && Src0.getReg() != AMDGPU::M0) &&
(Src1.isReg() && TRI->isSGPRReg(MRI, Src1.getReg()) && Src1.getReg() != AMDGPU::M0)) {

// Check for trivially easy constant prop into one of the operands
// If this is the case then perform the operation now to resolve SGPR
// issue
bool Resolved = false;
std::vector<MachineOperand*> MOs { &Src0, &Src1 };
for (auto MO : MOs ) {
auto Imm = AMDGPU::foldToImm(*MO, &MRI, TII);
if (Imm && TII->isInlineConstant(APInt(64, *Imm, true))) {
MO->ChangeToImmediate(*Imm);
Changed = true;
Resolved = true;
break;
}
}

if (!Resolved) {
// Haven't managed to resolve by replacing an SGPR with an immediate
// Move src1 to be in M0
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
.add(Src1);
Src1.ChangeToRegister(AMDGPU::M0, false);
Changed = true;
}
}
}
}
}

return Changed;
}

// Return the first non-prologue instruction in the block.
static MachineBasicBlock::iterator
getFirstNonPrologue(MachineBasicBlock *MBB, const TargetInstrInfo *TII) {
Expand Down Expand Up @@ -750,8 +688,6 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
}
}

fixWriteLane(MF);

if (MF.getTarget().getOptLevel() > CodeGenOpt::None && EnableM0Merge)
hoistAndMergeSGPRInits(AMDGPU::M0, *MRI, TRI, *MDT, TII);

Expand Down
42 changes: 35 additions & 7 deletions llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
#include "SIRegisterInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "Utils/AMDGPUMCUtils.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
Expand Down Expand Up @@ -78,6 +77,8 @@ class SIPeepholeSDWA : public MachineFunctionPass {
std::unordered_map<MachineInstr *, SDWAOperandsVector> PotentialMatches;
SmallVector<MachineInstr *, 8> ConvertedInstructions;

Optional<int64_t> foldToImm(const MachineOperand &Op) const;

public:
static char ID;

Expand Down Expand Up @@ -518,6 +519,33 @@ bool SDWADstPreserveOperand::convertToSDWA(MachineInstr &MI,
return SDWADstOperand::convertToSDWA(MI, TII);
}

Optional<int64_t> SIPeepholeSDWA::foldToImm(const MachineOperand &Op) const {
if (Op.isImm()) {
return Op.getImm();
}

// If this is not immediate then it can be copy of immediate value, e.g.:
// %1 = S_MOV_B32 255;
if (Op.isReg()) {
for (const MachineOperand &Def : MRI->def_operands(Op.getReg())) {
if (!isSameReg(Op, Def))
continue;

const MachineInstr *DefInst = Def.getParent();
if (!TII->isFoldableCopy(*DefInst))
return None;

const MachineOperand &Copied = DefInst->getOperand(1);
if (!Copied.isImm())
return None;

return Copied.getImm();
}
}

return None;
}

std::unique_ptr<SDWAOperand>
SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
unsigned Opcode = MI.getOpcode();
Expand All @@ -537,7 +565,7 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
// from: v_lshlrev_b32_e32 v1, 16/24, v0
// to SDWA dst:v1 dst_sel:WORD_1/BYTE_3 dst_unused:UNUSED_PAD
MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
auto Imm = AMDGPU::foldToImm(*Src0, MRI, TII);
auto Imm = foldToImm(*Src0);
if (!Imm)
break;

Expand Down Expand Up @@ -578,7 +606,7 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
// from: v_lshlrev_b16_e32 v1, 8, v0
// to SDWA dst:v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD
MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
auto Imm = AMDGPU::foldToImm(*Src0, MRI, TII);
auto Imm = foldToImm(*Src0);
if (!Imm || *Imm != 8)
break;

Expand Down Expand Up @@ -618,12 +646,12 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
// 24 | 8 | BYTE_3

MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
auto Offset = AMDGPU::foldToImm(*Src1, MRI, TII);
auto Offset = foldToImm(*Src1);
if (!Offset)
break;

MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
auto Width = AMDGPU::foldToImm(*Src2, MRI, TII);
auto Width = foldToImm(*Src2);
if (!Width)
break;

Expand Down Expand Up @@ -666,10 +694,10 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
auto ValSrc = Src1;
auto Imm = AMDGPU::foldToImm(*Src0, MRI, TII);
auto Imm = foldToImm(*Src0);

if (!Imm) {
Imm = AMDGPU::foldToImm(*Src1, MRI, TII);
Imm = foldToImm(*Src1);
ValSrc = Src0;
}

Expand Down
52 changes: 0 additions & 52 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUMCUtils.cpp

This file was deleted.

25 changes: 0 additions & 25 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUMCUtils.h

This file was deleted.

1 change: 0 additions & 1 deletion llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,4 @@ add_llvm_library(LLVMAMDGPUUtils
AMDKernelCodeTUtils.cpp
AMDGPUAsmUtils.cpp
AMDGPUPALMetadata.cpp
AMDGPUMCUtils.cpp
)
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
declare i32 @llvm.amdgcn.writelane(i32, i32, i32) #0

; CHECK-LABEL: {{^}}test_writelane_sreg:
; CHECK: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, m0
; CHECK: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
define amdgpu_kernel void @test_writelane_sreg(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #1 {
%oldval = load i32, i32 addrspace(1)* %out
%writelane = call i32 @llvm.amdgcn.writelane(i32 %src0, i32 %src1, i32 %oldval)
Expand Down Expand Up @@ -39,7 +39,7 @@ define amdgpu_kernel void @test_writelane_vreg_lane(i32 addrspace(1)* %out, <2 x
; CHECK-LABEL: {{^}}test_writelane_m0_sreg:
; CHECK: s_mov_b32 m0, -1
; CHECK: s_mov_b32 [[COPY_M0:s[0-9]+]], m0
; CHECK: v_writelane_b32 v{{[0-9]+}}, [[COPY_M0]], m0
; CHECK: v_writelane_b32 v{{[0-9]+}}, [[COPY_M0]], s{{[0-9]+}}
define amdgpu_kernel void @test_writelane_m0_sreg(i32 addrspace(1)* %out, i32 %src1) #1 {
%oldval = load i32, i32 addrspace(1)* %out
%m0 = call i32 asm "s_mov_b32 m0, -1", "={m0}"()
Expand All @@ -59,7 +59,7 @@ define amdgpu_kernel void @test_writelane_imm(i32 addrspace(1)* %out, i32 %src0)

; CHECK-LABEL: {{^}}test_writelane_sreg_oldval:
; CHECK: v_mov_b32_e32 [[OLDVAL:v[0-9]+]], s{{[0-9]+}}
; CHECK: v_writelane_b32 [[OLDVAL]], s{{[0-9]+}}, m0
; CHECK: v_writelane_b32 [[OLDVAL]], s{{[0-9]+}}, s{{[0-9]+}}
define amdgpu_kernel void @test_writelane_sreg_oldval(i32 inreg %oldval, i32 addrspace(1)* %out, i32 %src0, i32 %src1) #1 {
%writelane = call i32 @llvm.amdgcn.writelane(i32 %src0, i32 %src1, i32 %oldval)
store i32 %writelane, i32 addrspace(1)* %out, align 4
Expand All @@ -68,7 +68,7 @@ define amdgpu_kernel void @test_writelane_sreg_oldval(i32 inreg %oldval, i32 add

; CHECK-LABEL: {{^}}test_writelane_imm_oldval:
; CHECK: v_mov_b32_e32 [[OLDVAL:v[0-9]+]], 42
; CHECK: v_writelane_b32 [[OLDVAL]], s{{[0-9]+}}, m0
; CHECK: v_writelane_b32 [[OLDVAL]], s{{[0-9]+}}, s{{[0-9]+}}
define amdgpu_kernel void @test_writelane_imm_oldval(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #1 {
%writelane = call i32 @llvm.amdgcn.writelane(i32 %src0, i32 %src1, i32 42)
store i32 %writelane, i32 addrspace(1)* %out, align 4
Expand Down

0 comments on commit ec94370

Please sign in to comment.