Skip to content

Commit

Permalink
Merge pull request #19262 from hrydgard/ir-specialization
Browse files Browse the repository at this point in the history
IR: Add some interpreter-only IR instructions for faster interpretation
  • Loading branch information
hrydgard authored Jun 7, 2024
2 parents 55fecce + 0c24629 commit 27815c7
Showing 7 changed files with 163 additions and 7 deletions.
3 changes: 2 additions & 1 deletion Core/MIPS/IR/IRFrontend.cpp
Original file line number Diff line number Diff line change
@@ -284,14 +284,15 @@ void IRFrontend::DoJit(u32 em_address, std::vector<IRInst> &instructions, u32 &m
&PropagateConstants,
&PurgeTemps,
&ReduceVec4Flush,
&OptimizeLoadsAfterStores,
// &ReorderLoadStore,
// &MergeLoadStore,
// &ThreeOpToTwoOp,
};

if (opts.optimizeForInterpreter) {
// Add special passes here.
// passes.push_back(&ReorderLoadStore);
passes.push_back(&OptimizeForInterpreter);
}
if (IRApplyPasses(passes.data(), passes.size(), ir, simplified, opts))
logBlocks = 1;
20 changes: 17 additions & 3 deletions Core/MIPS/IR/IRInst.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "Common/CommonFuncs.h"
#include "Common/Log.h"
#include "Core/MIPS/IR/IRInst.h"
#include "Core/MIPS/MIPSDebugInterface.h"
#include "Core/HLE/ReplaceTables.h"
@@ -8,6 +9,7 @@
// _ = ignore
// G = GPR register
// C = 32-bit constant from array
// c = 8-bit constant from array
// I = immediate value from instruction
// F = FPR register, single
// V = FPR register, Vec4. Reg number always divisible by 4.
@@ -29,10 +31,13 @@ static const IRMeta irMeta[] = {
{ IROp::Or, "Or", "GGG" },
{ IROp::Xor, "Xor", "GGG" },
{ IROp::AddConst, "AddConst", "GGC" },
{ IROp::OptAddConst, "OptAddConst", "GC" },
{ IROp::SubConst, "SubConst", "GGC" },
{ IROp::AndConst, "AndConst", "GGC" },
{ IROp::OrConst, "OrConst", "GGC" },
{ IROp::XorConst, "XorConst", "GGC" },
{ IROp::OptAndConst, "OptAndConst", "GC" },
{ IROp::OptOrConst, "OptOrConst", "GC" },
{ IROp::Shl, "Shl", "GGG" },
{ IROp::Shr, "Shr", "GGG" },
{ IROp::Sar, "Sar", "GGG" },
@@ -115,6 +120,7 @@ static const IRMeta irMeta[] = {
{ IROp::FSatMinus1_1, "FSat(-1 - 1)", "FF" },
{ IROp::FMovFromGPR, "FMovFromGPR", "FG" },
{ IROp::FMovToGPR, "FMovToGPR", "GF" },
{ IROp::OptFMovToGPRShr8, "OptFMovToGPRShr8", "GF" },
{ IROp::FpCondFromReg, "FpCondFromReg", "_G" },
{ IROp::FpCondToReg, "FpCondToReg", "G" },
{ IROp::FpCtrlFromReg, "FpCtrlFromReg", "_G" },
@@ -128,7 +134,7 @@ static const IRMeta irMeta[] = {
{ IROp::FCmpVfpuAggregate, "FCmpVfpuAggregate", "I" },
{ IROp::Vec4Init, "Vec4Init", "Vv" },
{ IROp::Vec4Shuffle, "Vec4Shuffle", "VVs" },
{ IROp::Vec4Blend, "Vec4Blend", "VVVC" },
{ IROp::Vec4Blend, "Vec4Blend", "VVVc" },
{ IROp::Vec4Mov, "Vec4Mov", "VV" },
{ IROp::Vec4Add, "Vec4Add", "VVV" },
{ IROp::Vec4Sub, "Vec4Sub", "VVV" },
@@ -218,6 +224,11 @@ int IRWriter::AddConstantFloat(float value) {
return AddConstant(val);
}

void IRWriter::ReplaceConstant(size_t instNumber, u32 newConstant) {
_dbg_assert_(instNumber < insts_.size());
insts_[instNumber].constant = newConstant;
}

static std::string GetGPRName(int r) {
if (r < 32) {
return currentDebugMIPS->GetRegName(0, r);
@@ -293,10 +304,13 @@ void DisassembleParam(char *buf, int bufSize, u8 param, char type, u32 constant)
}
break;
case 'C':
snprintf(buf, bufSize, "%08x", constant);
snprintf(buf, bufSize, "0x%08x", constant);
break;
case 'c':
snprintf(buf, bufSize, "0x%02x", constant);
break;
case 'I':
snprintf(buf, bufSize, "%02x", param);
snprintf(buf, bufSize, "0x%02x", param);
break;
case 'm':
snprintf(buf, bufSize, "%d", param);
8 changes: 8 additions & 0 deletions Core/MIPS/IR/IRInst.h
Original file line number Diff line number Diff line change
@@ -17,6 +17,9 @@
// even be directly JIT-ed, but the gains will probably be tiny over our older direct
// MIPS->target JITs.

// Ops beginning with "OI" are specialized for IR Interpreter use. These will not be produced
// for the IR JITs.

enum class IROp : uint8_t {
SetConst,
SetConstF,
@@ -33,11 +36,14 @@ enum class IROp : uint8_t {
Xor,

AddConst,
OptAddConst,
SubConst,

AndConst,
OrConst,
XorConst,
OptAndConst,
OptOrConst,

Shl,
Shr,
@@ -133,6 +139,7 @@ enum class IROp : uint8_t {

FMovFromGPR,
FMovToGPR,
OptFMovToGPRShr8,

FSat0_1,
FSatMinus1_1,
@@ -391,6 +398,7 @@ class IRWriter {
void Clear() {
insts_.clear();
}
void ReplaceConstant(size_t instNumber, u32 newConstant);

const std::vector<IRInst> &GetInstructions() const { return insts_; }

21 changes: 20 additions & 1 deletion Core/MIPS/IR/IRInterpreter.cpp
Original file line number Diff line number Diff line change
@@ -120,15 +120,24 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst) {
case IROp::AddConst:
mips->r[inst->dest] = mips->r[inst->src1] + inst->constant;
break;
case IROp::OptAddConst: // For this one, it's worth having a "unary" variant of the above that only needs to read one register param.
mips->r[inst->dest] += inst->constant;
break;
case IROp::SubConst:
mips->r[inst->dest] = mips->r[inst->src1] - inst->constant;
break;
case IROp::AndConst:
mips->r[inst->dest] = mips->r[inst->src1] & inst->constant;
break;
case IROp::OptAndConst: // For this one, it's worth having a "unary" variant of the above that only needs to read one register param.
mips->r[inst->dest] &= inst->constant;
break;
case IROp::OrConst:
mips->r[inst->dest] = mips->r[inst->src1] | inst->constant;
break;
case IROp::OptOrConst:
mips->r[inst->dest] |= inst->constant;
break;
case IROp::XorConst:
mips->r[inst->dest] = mips->r[inst->src1] ^ inst->constant;
break;
@@ -431,6 +440,8 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst) {

case IROp::Vec2Pack31To16:
{
// Used in Tekken 6

u32 val = (mips->fi[inst->src1] >> 15) & 0xFFFF;
val |= (mips->fi[inst->src1 + 1] << 1) & 0xFFFF0000;
mips->fi[inst->dest] = val;
@@ -451,6 +462,8 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst) {

case IROp::Vec4Pack31To8:
{
// Used in Tekken 6

// Removed previous SSE code due to the need for unsigned 16-bit pack, which I'm too lazy to work around the lack of in SSE2.
// pshufb or SSE4 instructions can be used instead.
u32 val = (mips->fi[inst->src1] >> 23) & 0xFF;
@@ -987,7 +1000,13 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst) {
case IROp::FMovToGPR:
memcpy(&mips->r[inst->dest], &mips->f[inst->src1], 4);
break;

case IROp::OptFMovToGPRShr8:
{
u32 temp;
memcpy(&temp, &mips->f[inst->src1], 4);
mips->r[inst->dest] = temp >> 8;
break;
}
case IROp::ExitToConst:
return inst->constant;

8 changes: 6 additions & 2 deletions Core/MIPS/IR/IRJit.cpp
Original file line number Diff line number Diff line change
@@ -255,15 +255,19 @@ void IRJit::RunLoopUntil(u64 globalticks) {
u32 opcode = inst & 0xFF000000;
if (opcode == MIPS_EMUHACK_OPCODE) {
u32 offset = inst & 0x00FFFFFF; // Alternatively, inst - opcode
const IRInst *instPtr = blocks_.GetArenaPtr() + offset;
_dbg_assert_(instPtr->op == IROp::Downcount);
mips->downcount -= instPtr->constant;
instPtr++;
#ifdef IR_PROFILING
IRBlock *block = blocks_.GetBlock(blocks_.GetBlockNumFromOffset(offset));
TimeSpan span;
mips->pc = IRInterpret(mips, blocks_.GetArenaPtr() + offset);
mips->pc = IRInterpret(mips, instPtr);
int64_t elapsedNanos = span.ElapsedNanos();
block->profileStats_.executions += 1;
block->profileStats_.totalNanos += elapsedNanos;
#else
mips->pc = IRInterpret(mips, blocks_.GetArenaPtr() + offset);
mips->pc = IRInterpret(mips, instPtr);
#endif
// Note: this will "jump to zero" on a badly constructed block missing exits.
if (!Memory::IsValid4AlignedAddress(mips->pc)) {
107 changes: 107 additions & 0 deletions Core/MIPS/IR/IRPassSimplify.cpp
Original file line number Diff line number Diff line change
@@ -2149,3 +2149,110 @@ bool ReduceVec4Flush(const IRWriter &in, IRWriter &out, const IROptions &opts) {
}
return logBlocks;
}

// This optimizes away redundant loads-after-stores, which are surprisingly not that uncommon.
bool OptimizeLoadsAfterStores(const IRWriter &in, IRWriter &out, const IROptions &opts) {
CONDITIONAL_DISABLE;
// This tells us to skip an AND op that has been optimized out.
// Maybe we could skip multiple, but that'd slow things down and is pretty uncommon.
int nextSkip = -1;

bool logBlocks = false;
for (int i = 0, n = (int)in.GetInstructions().size(); i < n; i++) {
IRInst inst = in.GetInstructions()[i];

// Just copy the last instruction.
if (i == n - 1) {
out.Write(inst);
break;
}

out.Write(inst);

IRInst next = in.GetInstructions()[i + 1];
switch (inst.op) {
case IROp::Store32:
if (next.op == IROp::Load32 &&
next.constant == inst.constant &&
next.dest == inst.src3 &&
next.src1 == inst.src1) {
// The upcoming load is completely redundant.
// Skip it.
i++;
}
break;
default:
break;
}
}

return logBlocks;
}

bool OptimizeForInterpreter(const IRWriter &in, IRWriter &out, const IROptions &opts) {
CONDITIONAL_DISABLE;
// This tells us to skip an AND op that has been optimized out.
// Maybe we could skip multiple, but that'd slow things down and is pretty uncommon.
int nextSkip = -1;

bool logBlocks = false;
// We also move the downcount to the top so the interpreter can assume that it's there.
bool foundDowncount = false;
out.Write(IROp::Downcount);

for (int i = 0, n = (int)in.GetInstructions().size(); i < n; i++) {
IRInst inst = in.GetInstructions()[i];

bool last = i == n - 1;

// Specialize some instructions.
switch (inst.op) {
case IROp::Downcount:
if (!foundDowncount) {
// Move the value into the initial Downcount.
foundDowncount = true;
out.ReplaceConstant(0, inst.constant);
} else {
// Already had a downcount. Let's just re-emit it.
out.Write(inst);
}
break;
case IROp::AddConst:
if (inst.src1 == inst.dest) {
inst.op = IROp::OptAddConst;
}
out.Write(inst);
break;
case IROp::AndConst:
if (inst.src1 == inst.dest) {
inst.op = IROp::OptAndConst;
}
out.Write(inst);
break;
case IROp::OrConst:
if (inst.src1 == inst.dest) {
inst.op = IROp::OptOrConst;
}
out.Write(inst);
break;
case IROp::FMovToGPR:
if (!last) {
IRInst next = in.GetInstructions()[i + 1];
if (next.op == IROp::ShrImm && next.src2 == 8 && next.src1 == next.dest && next.src1 == inst.dest) {
// Heavily used when writing display lists.
inst.op = IROp::OptFMovToGPRShr8;
i++; // Skip the next instruction.
}
out.Write(inst);
} else {
out.Write(inst);
}
break;
default:
out.Write(inst);
break;
}
}

return logBlocks;
}
3 changes: 3 additions & 0 deletions Core/MIPS/IR/IRPassSimplify.h
Original file line number Diff line number Diff line change
@@ -16,3 +16,6 @@ bool ReorderLoadStore(const IRWriter &in, IRWriter &out, const IROptions &opts);
bool MergeLoadStore(const IRWriter &in, IRWriter &out, const IROptions &opts);
bool ApplyMemoryValidation(const IRWriter &in, IRWriter &out, const IROptions &opts);
bool ReduceVec4Flush(const IRWriter &in, IRWriter &out, const IROptions &opts);

bool OptimizeLoadsAfterStores(const IRWriter &in, IRWriter &out, const IROptions &opts);
bool OptimizeForInterpreter(const IRWriter &in, IRWriter &out, const IROptions &opts);

0 comments on commit 27815c7

Please sign in to comment.