Skip to content

Commit

Permalink
irjit: Handle branch/jump in branch delay slots.
Browse files Browse the repository at this point in the history
See hrydgard#15952 for more detail.
  • Loading branch information
unknownbrackets committed Sep 4, 2022
1 parent 0fc3619 commit d08ee44
Showing 1 changed file with 83 additions and 16 deletions.
99 changes: 83 additions & 16 deletions Core/MIPS/IR/IRCompBranch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,27 @@ using namespace MIPSAnalyst;
namespace MIPSComp
{

// This seems to be the same for all branch types.
static u32 ResolveNotTakenTarget(u32 pc, const MIPSOpcode &op, bool likely, const MIPSOpcode &delaySlotOp, const MIPSInfo &delaySlotInfo) {
u32 notTakenTarget = pc + 8;
if ((delaySlotInfo & (IS_JUMP | IS_CONDBRANCH)) != 0) {
// If a branch has a j/jr/jal/jalr as a delay slot, that is run if the branch is not taken.
// TODO: Technically, in the likely case, we should somehow suppress andLink on this exit.
bool isJump = (delaySlotInfo & IS_JUMP) != 0;
// If the delay slot is a branch, likely skips it.
if (isJump || !likely)
notTakenTarget -= 4;

// For a branch (not a jump), it actually should try the delay slot and take its target potentially.
// This is similar to the VFPU case and has not been seen, so just report it.
if (!isJump && SignExtend16ToU32(delaySlotOp) != SignExtend16ToU32(op) - 1)
ERROR_LOG_REPORT(JIT, "Branch in branch delay slot at %08x with different target", pc);
if (isJump && likely && (delaySlotInfo & (OUT_RA | OUT_RD)) != 0)
ERROR_LOG_REPORT(JIT, "Jump in likely branch delay slot with link at %08x", pc);
}
return notTakenTarget;
}

void IRFrontend::BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely) {
if (js.inDelaySlot) {
ERROR_LOG_REPORT(JIT, "Branch in RSRTComp delay slot at %08x in block starting at %08x", GetCompilerPC(), js.blockStart);
Expand All @@ -65,7 +86,9 @@ void IRFrontend::BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely) {
u32 targetAddr = GetCompilerPC() + offset + 4;

MIPSOpcode delaySlotOp = GetOffsetInstruction(1);
MIPSInfo delaySlotInfo = MIPSGetInfo(delaySlotOp);
js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp);
bool delaySlotIsBranch = (delaySlotInfo & (IS_JUMP | IS_CONDBRANCH)) != 0;
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs);

// Often, div/divu are followed by a likely "break" if the divisor was zero.
Expand All @@ -91,18 +114,28 @@ void IRFrontend::BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely) {
}
}

if (!likely)
u32 notTakenTarget = ResolveNotTakenTarget(GetCompilerPC(), op, likely, delaySlotOp, delaySlotInfo);

if (!likely && !delaySlotIsBranch)
CompileDelaySlot();

int dcAmount = js.downcountAmount;
ir.Write(IROp::Downcount, 0, ir.AddConstant(dcAmount));
js.downcountAmount = 0;

FlushAll();
ir.Write(ComparisonToExit(cc), ir.AddConstant(GetCompilerPC() + 8), lhs, rhs);
ir.Write(ComparisonToExit(cc), ir.AddConstant(notTakenTarget), lhs, rhs);
// This makes the block "impure" :(
if (likely)
if (likely && !delaySlotIsBranch)
CompileDelaySlot();
if (delaySlotIsBranch) {
// We still link when the branch is taken (targetAddr case.)
// Remember, it's from the perspective of the delay slot, so +12.
if ((delaySlotInfo & OUT_RA) != 0)
ir.WriteSetConstant(MIPS_REG_RA, GetCompilerPC() + 12);
if ((delaySlotInfo & OUT_RD) != 0)
ir.WriteSetConstant(MIPS_GET_RD(delaySlotOp), GetCompilerPC() + 12);
}

FlushAll();
ir.Write(IROp::ExitToConst, ir.AddConstant(targetAddr));
Expand All @@ -122,7 +155,9 @@ void IRFrontend::BranchRSZeroComp(MIPSOpcode op, IRComparison cc, bool andLink,
u32 targetAddr = GetCompilerPC() + offset + 4;

MIPSOpcode delaySlotOp = GetOffsetInstruction(1);
MIPSInfo delaySlotInfo = MIPSGetInfo(delaySlotOp);
js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp);
bool delaySlotIsBranch = (delaySlotInfo & (IS_JUMP | IS_CONDBRANCH)) != 0;
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);

MIPSGPReg lhs = rs;
Expand All @@ -133,17 +168,28 @@ void IRFrontend::BranchRSZeroComp(MIPSOpcode op, IRComparison cc, bool andLink,
if (andLink)
ir.WriteSetConstant(MIPS_REG_RA, GetCompilerPC() + 8);

if (!likely)
u32 notTakenTarget = ResolveNotTakenTarget(GetCompilerPC(), op, likely, delaySlotOp, delaySlotInfo);

if (!likely && !delaySlotIsBranch)
CompileDelaySlot();

int dcAmount = js.downcountAmount;
ir.Write(IROp::Downcount, 0, ir.AddConstant(dcAmount));
js.downcountAmount = 0;

FlushAll();
ir.Write(ComparisonToExit(cc), ir.AddConstant(GetCompilerPC() + 8), lhs);
if (likely)
ir.Write(ComparisonToExit(cc), ir.AddConstant(notTakenTarget), lhs);
if (likely && !delaySlotIsBranch)
CompileDelaySlot();
if (delaySlotIsBranch) {
// We still link when the branch is taken (targetAddr case.)
// Remember, it's from the perspective of the delay slot, so +12.
if ((delaySlotInfo & OUT_RA) != 0)
ir.WriteSetConstant(MIPS_REG_RA, GetCompilerPC() + 12);
if ((delaySlotInfo & OUT_RD) != 0)
ir.WriteSetConstant(MIPS_GET_RD(delaySlotOp), GetCompilerPC() + 12);
}

// Taken
FlushAll();
ir.Write(IROp::ExitToConst, ir.AddConstant(targetAddr));
Expand Down Expand Up @@ -199,20 +245,35 @@ void IRFrontend::BranchFPFlag(MIPSOpcode op, IRComparison cc, bool likely) {
int offset = TARGET16;
u32 targetAddr = GetCompilerPC() + offset + 4;

MIPSOpcode delaySlotOp = GetOffsetInstruction(1);
MIPSInfo delaySlotInfo = MIPSGetInfo(delaySlotOp);
bool delaySlotIsBranch = (delaySlotInfo & (IS_JUMP | IS_CONDBRANCH)) != 0;

ir.Write(IROp::FpCondToReg, IRTEMP_LHS);
if (!likely)
if (!likely && !delaySlotIsBranch)
CompileDelaySlot();

int dcAmount = js.downcountAmount;
ir.Write(IROp::Downcount, 0, ir.AddConstant(dcAmount));
js.downcountAmount = 0;

u32 notTakenTarget = ResolveNotTakenTarget(GetCompilerPC(), op, likely, delaySlotOp, delaySlotInfo);

FlushAll();
// Not taken
ir.Write(ComparisonToExit(cc), ir.AddConstant(GetCompilerPC() + 8), IRTEMP_LHS, 0);
ir.Write(ComparisonToExit(cc), ir.AddConstant(notTakenTarget), IRTEMP_LHS, 0);
// Taken
if (likely)
if (likely && !delaySlotIsBranch)
CompileDelaySlot();
if (delaySlotIsBranch) {
// We still link when the branch is taken (targetAddr case.)
// Remember, it's from the perspective of the delay slot, so +12.
if ((delaySlotInfo & OUT_RA) != 0)
ir.WriteSetConstant(MIPS_REG_RA, GetCompilerPC() + 12);
if ((delaySlotInfo & OUT_RD) != 0)
ir.WriteSetConstant(MIPS_GET_RD(delaySlotOp), GetCompilerPC() + 12);
}

FlushAll();
ir.Write(IROp::ExitToConst, ir.AddConstant(targetAddr));

Expand Down Expand Up @@ -243,33 +304,39 @@ void IRFrontend::BranchVFPUFlag(MIPSOpcode op, IRComparison cc, bool likely) {
u32 targetAddr = GetCompilerPC() + offset + 4;

MIPSOpcode delaySlotOp = GetOffsetInstruction(1);
MIPSInfo delaySlotInfo = MIPSGetInfo(delaySlotOp);
js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp);
ir.Write(IROp::VfpuCtrlToReg, IRTEMP_LHS, VFPU_CTRL_CC);

// Sometimes there's a VFPU branch in a delay slot (Disgaea 2: Dark Hero Days, Zettai Hero Project, La Pucelle)
// The behavior is undefined - the CPU may take the second branch even if the first one passes.
// However, it does consistently try each branch, which these games seem to expect.
bool delaySlotIsBranch = MIPSCodeUtils::IsVFPUBranch(delaySlotOp);
if (!likely)
bool delaySlotIsBranch = (delaySlotInfo & (IS_JUMP | IS_CONDBRANCH)) != 0;
if (!likely && !delaySlotIsBranch)
CompileDelaySlot();

int dcAmount = js.downcountAmount;
ir.Write(IROp::Downcount, 0, ir.AddConstant(dcAmount));
js.downcountAmount = 0;

if (delaySlotIsBranch && (signed short)(delaySlotOp & 0xFFFF) != (signed short)(op & 0xFFFF) - 1)
ERROR_LOG_REPORT(JIT, "VFPU branch in VFPU delay slot at %08x with different target", GetCompilerPC());
u32 notTakenTarget = ResolveNotTakenTarget(GetCompilerPC(), op, likely, delaySlotOp, delaySlotInfo);

int imm3 = (op >> 18) & 7;

u32 notTakenTarget = GetCompilerPC() + (delaySlotIsBranch ? 4 : 8);

ir.Write(IROp::AndConst, IRTEMP_LHS, IRTEMP_LHS, ir.AddConstant(1 << imm3));
FlushAll();
ir.Write(ComparisonToExit(cc), ir.AddConstant(notTakenTarget), IRTEMP_LHS, 0);

if (likely)
if (likely && !delaySlotIsBranch)
CompileDelaySlot();
if (delaySlotIsBranch) {
// We still link when the branch is taken (targetAddr case.)
// Remember, it's from the perspective of the delay slot, so +12.
if ((delaySlotInfo & OUT_RA) != 0)
ir.WriteSetConstant(MIPS_REG_RA, GetCompilerPC() + 12);
if ((delaySlotInfo & OUT_RD) != 0)
ir.WriteSetConstant(MIPS_GET_RD(delaySlotOp), GetCompilerPC() + 12);
}

// Taken
FlushAll();
Expand Down

0 comments on commit d08ee44

Please sign in to comment.