Skip to content

Commit

Permalink
[Xe] Handle 64-bit scalar moves
Browse files Browse the repository at this point in the history
Add support for lowering copies of SRF_2x32 registers.
This fixes issue reported in intel#1119.
  • Loading branch information
jfuentes authored and Joel Fuentes committed May 4, 2024
1 parent db9b2c2 commit c4ddfe2
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 27 deletions.
57 changes: 31 additions & 26 deletions llvm/lib/Target/Xe/XeInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,42 +75,40 @@ bool XeInstrInfo::isSyncInstr(const MachineInstr &MI) const {
}
}

static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
unsigned NumRegs, bool IsGRF) {
// FIXME: swap the condition for SRF since sr0's encoding value is larger
// than sr1
assert(IsGRF && "SRF is not tested");
return DestReg > SrcReg && (DestReg - SrcReg) < NumRegs;
bool XeInstrInfo::forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
unsigned NumRegs, bool IsGRF) const {
const XeRegisterInfo *TRI = &getRegisterInfo();
unsigned DestEnc = TRI->getEncodingValue(DestReg);
unsigned SrcEnc = TRI->getEncodingValue(SrcReg);
return IsGRF ? DestEnc > SrcEnc && (DestEnc - SrcEnc) < NumRegs
: DestEnc < SrcEnc && (SrcEnc - DestEnc) < NumRegs;
}

void XeInstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, MCRegister DestReg,
MCRegister SrcReg, bool KillSrc, unsigned Op,
unsigned RegSize, bool IsGRF) const {
void XeInstrInfo::copyPhysRegTuple(
MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL,
MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Op,
unsigned RegSize, const TargetRegisterClass *RC, bool IsGRF) const {
const XeRegisterInfo *TRI = &getRegisterInfo();
uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
unsigned NumElts = TRI->getNumEltsFromRegClass(RC);
unsigned TotalSize = TRI->getRegSizeInBits(*RC);
unsigned NumMoves = TotalSize / RegSize;
auto Indices = TRI->getSubRegsFromSize(RegSize);
unsigned NumIndices = Indices.size();

int SubReg = 0, End = NumIndices, Incr = 1;
if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding,
NumIndices * (RegSize == 64 ? 2 : 1),
IsGRF)) {
SubReg = NumIndices - 1;
assert(NumMoves > 1 && "More than one move expected");

int SubReg = 0, End = NumMoves, Incr = 1;
if (forwardCopyWillClobberTuple(DestReg, SrcReg, NumElts, IsGRF)) {
SubReg = NumMoves - 1;
End = -1;
Incr = -1;
}

for (; SubReg != End; SubReg += Incr)
BuildMI(MBB, I, DL, get(Op), RI.getSubReg(DestReg, Indices[SubReg]))
.addReg(RI.getSubReg(SrcReg, Indices[SubReg]), getKillRegState(KillSrc))
.addImm(static_cast<uint32_t>(Xe::SrcModifier::None))
.addImm(/*Fields*/ 0)
.addReg(Xe::ptrue)
.addImm(/*predicate modifier*/ 0)
.addImm(/*SWSB*/ 0);
addCommonFields(
BuildMI(MBB, I, DL, get(Op), RI.getSubReg(DestReg, Indices[SubReg]))
.addReg(RI.getSubReg(SrcReg, Indices[SubReg]),
getKillRegState(KillSrc))
.addImm(static_cast<uint32_t>(Xe::SrcModifier::None)));
}

void XeInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Expand Down Expand Up @@ -141,8 +139,15 @@ void XeInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
else if (Xe::GRF_4x32RegClass.contains(DestReg, SrcReg)) {
Op = Xe::MOV_b64_r;
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, Op, /*RegSize*/ 64,
&Xe::GRF_4x32RegClass,
/*IsGRF*/ true);
return;
} else if (Xe::SRF_2x32RegClass.contains(DestReg, SrcReg)) {
Op = Xe::SMOV_b32_r;
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, Op, /*RegSize*/ 32,
&Xe::SRF_2x32RegClass,
/*IsGRF*/ false);
return;
}

assert(Op && "Cannot copy registers");
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Target/Xe/XeInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@ class XeInstrInfo : public XeGenInstrInfo {
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const DebugLoc &DL, MCRegister DestReg,
MCRegister SrcReg, bool KillSrc, unsigned Op,
unsigned RegSize, bool IsGRF) const;
unsigned RegSize, const TargetRegisterClass *RC,
bool IsGRF) const;
bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
unsigned NumRegs, bool IsGRF) const;

bool isFP(const MachineInstr& MI) const;
bool isInt(const MachineInstr& MI) const;
Expand Down
13 changes: 13 additions & 0 deletions llvm/test/CodeGen/Xe/lower-copy.mir
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,19 @@ body: |
; CHECK: $sr24 = SMOV_b32_r $sr0, 0, 0, $ptrue, 0, 0
$sr24 = COPY $sr0
; CHECK:$sr24 = SMOV_b32_r $sr5, 0, 0, $ptrue, 0, 0
; CHECK:$sr25 = SMOV_b32_r $sr6, 0, 0, $ptrue, 0, 0
$sr24_sr25 = COPY $sr5_sr6
; Do backward copy as forward copy will clobber sr8
; CHECK:$sr9 = SMOV_b32_r $sr8, 0, 0, $ptrue, 0, 0
; CHECK:$sr8 = SMOV_b32_r $sr7, 0, 0, $ptrue, 0, 0
$sr8_sr9 = COPY $sr7_sr8
; CHECK:$sr4 = SMOV_b32_r $sr5, 0, 0, $ptrue, 0, 0
; CHECK:$sr5 = SMOV_b32_r $sr6, 0, 0, $ptrue, 0, 0
$sr4_sr5 = COPY $sr5_sr6
; CHECK: RET 0, $ptrue, 0, 0
RET 0, $ptrue, 0, 0
...

0 comments on commit c4ddfe2

Please sign in to comment.