Skip to content

Commit

Permalink
Merge pull request #19233 from hrydgard/minor-ir-opt
Browse files Browse the repository at this point in the history
Minor IR Interpreter optimizations, other bugfixes
  • Loading branch information
hrydgard authored Jun 2, 2024
2 parents 5da08ac + 7a32507 commit ac5c16f
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 25 deletions.
41 changes: 28 additions & 13 deletions Core/MIPS/IR/IRInterpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,16 +260,24 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst) {
float temp[4];
for (int i = 0; i < 4; i++)
temp[i] = mips->f[inst->src1 + ((inst->src2 >> (i * 2)) & 3)];
const int dest = inst->dest;
for (int i = 0; i < 4; i++)
mips->f[inst->dest + i] = temp[i];
mips->f[dest + i] = temp[i];
break;
}

case IROp::Vec4Blend:
{
const int dest = inst->dest;
const int src1 = inst->src1;
const int src2 = inst->src2;
const int constant = inst->constant;
// 90% of calls to this is inst->constant == 7 or inst->constant == 8. Some are 1 and 4, others very rare.
// Could use _mm_blendv_ps (SSE4+BMI), vbslq_f32 (ARM), __riscv_vmerge_vvm (RISC-V)
for (int i = 0; i < 4; i++)
mips->f[inst->dest + i] = ((inst->constant >> i) & 1) ? mips->f[inst->src2 + i] : mips->f[inst->src1 + i];
mips->f[dest + i] = ((constant >> i) & 1) ? mips->f[src2 + i] : mips->f[src1 + i];
break;
}

case IROp::Vec4Mov:
{
Expand Down Expand Up @@ -377,15 +385,19 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst) {

case IROp::Vec2Unpack16To31:
{
mips->fi[inst->dest] = (mips->fi[inst->src1] << 16) >> 1;
mips->fi[inst->dest + 1] = (mips->fi[inst->src1] & 0xFFFF0000) >> 1;
const int dest = inst->dest;
const int src1 = inst->src1;
mips->fi[dest] = (mips->fi[src1] << 16) >> 1;
mips->fi[dest + 1] = (mips->fi[src1] & 0xFFFF0000) >> 1;
break;
}

case IROp::Vec2Unpack16To32:
{
mips->fi[inst->dest] = (mips->fi[inst->src1] << 16);
mips->fi[inst->dest + 1] = (mips->fi[inst->src1] & 0xFFFF0000);
const int dest = inst->dest;
const int src1 = inst->src1;
mips->fi[dest] = (mips->fi[src1] << 16);
mips->fi[dest + 1] = (mips->fi[src1] & 0xFFFF0000);
break;
}

Expand Down Expand Up @@ -467,22 +479,26 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst) {
val = _mm_andnot_si128(mask, val);
_mm_store_si128((__m128i *)&mips->fi[inst->dest], val);
#else
const int src1 = inst->src1;
const int dest = inst->dest;
for (int i = 0; i < 4; i++) {
u32 val = mips->fi[inst->src1 + i];
mips->fi[inst->dest + i] = (int)val >= 0 ? val : 0;
u32 val = mips->fi[src1 + i];
mips->fi[dest + i] = (int)val >= 0 ? val : 0;
}
#endif
break;
}

case IROp::Vec4DuplicateUpperBitsAndShift1: // For vuc2i, the weird one.
{
const int src1 = inst->src1;
const int dest = inst->dest;
for (int i = 0; i < 4; i++) {
u32 val = mips->fi[inst->src1 + i];
u32 val = mips->fi[src1 + i];
val = val | (val >> 8);
val = val | (val >> 16);
val >>= 1;
mips->fi[inst->dest + i] = val;
mips->fi[dest + i] = val;
}
break;
}
Expand Down Expand Up @@ -1111,11 +1127,10 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst) {
break;

case IROp::Nop:
_assert_(false);
break;
default:
// Unimplemented IR op. Bad.
Crash();
break;
// Unimplemented IR op. Bad.
}

#ifdef _DEBUG
Expand Down
10 changes: 9 additions & 1 deletion GPU/Common/VertexDecoderCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1320,6 +1320,10 @@ void VertexDecoder::DecodeVerts(u8 *decodedptr, const void *verts, const UVScale
int count = indexUpperBound - indexLowerBound + 1;
int stride = decFmt.stride;

#ifdef _DEBUG
decodedCount += count;
#endif

// Check alignment before running the decoder, as we may crash if it's bad (as should the real PSP but doesn't always)
if (((uintptr_t)verts & (biggest - 1)) != 0) {
// Bad alignment. Not really sure what to do here... zero the verts to be safe?
Expand Down Expand Up @@ -1475,7 +1479,7 @@ static const char * const colnames[8] = { "", "?", "?", "?", "565", "5551", "444

int VertexDecoder::ToString(char *output, bool spaces) const {
char *start = output;

output += sprintf(output, "[%08x] ", fmt_);
output += sprintf(output, "P: %s ", posnames[pos]);
if (nrm)
output += sprintf(output, "N: %s ", nrmnames[nrm]);
Expand All @@ -1502,6 +1506,10 @@ int VertexDecoder::ToString(char *output, bool spaces) const {
}
}

#ifdef _DEBUG
output += sprintf(output, " (%llu)", (long long)decodedCount);
#endif

return output - start;
}

Expand Down
4 changes: 4 additions & 0 deletions GPU/Common/VertexDecoderCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,10 @@ class VertexDecoder {

u8 biggest; // in practice, alignment.

#ifdef _DEBUG
mutable u64 decodedCount = 0;
#endif

friend class VertexDecoderJitCache;

private:
Expand Down
20 changes: 19 additions & 1 deletion GPU/Common/VertexDecoderHandwritten.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,30 @@
#endif
#endif


// Candidates for hand-writing
// (found using our custom Very Sleepy).
// GPU::P:_f_N:_s8_C:_8888_T:_u16__(24b)_040001BE (5%+ of God of War execution)
// GPU::P:_f_N:_s8_C:_8888_T:_u16_W:_f_(1x)__(28b)_040007BE (1%+ of God of War execution)

// Tekken 6:
// (found using the vertex counter that's active in _DEBUG)
// [04000111] P: s16 C: 565 T: u8 (10b) (736949) // Also in Midnight Club

// Wipeout Pure:
// [0400013f] P: s16 N: s8 C: 8888 T: f (24b) (1495430)

// Flatout:
// [04000122] P: s16 N: s8 T: u16 (14b) (3901754)
// [04000116] P: s16 C: 5551 T: u16 (12b) (2225841)

// Test drive:
// [05000100] P: s16 (6b) (2827872)
// [050011ff] P: f N: f C: 8888 T: f I: u16 (36b) (3812112)

// Burnout Dominator:
// [04000122] P: s16 N: s8 T: u16 (14b) (1710813)
// [04000116] P: s16 C: 5551 T: u16 (12b) (7688298)

// This is the first GoW one.
void VtxDec_Tu16_C8888_Pfloat(const u8 *srcp, u8 *dstp, int count, const UVScale *uvScaleOffset) {
struct GOWVTX {
Expand Down
32 changes: 22 additions & 10 deletions UI/DevScreens.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1245,16 +1245,18 @@ void JitCompareScreen::OnRandomBlock(int flag) {
int tries = 0;
while (!anyWanted && tries < numBlocks) {
currentBlock_ = rand() % numBlocks;
JitBlockDebugInfo b = blockCache->GetBlockDebugInfo(currentBlock_);
u32 mipsBytes = (u32)b.origDisasm.size() * 4;
for (u32 addr = b.originalAddress; addr < b.originalAddress + mipsBytes; addr += 4) {
MIPSOpcode opcode = Memory::Read_Instruction(addr);
if (MIPSGetInfo(opcode) & flag) {
char temp[256];
MIPSDisAsm(opcode, addr, temp, sizeof(temp));
// INFO_LOG(HLE, "Stopping at random instruction: %08x %s", addr, temp);
anyWanted = true;
break;
if (blockCache->IsValidBlock(currentBlock_)) {
JitBlockDebugInfo b = blockCache->GetBlockDebugInfo(currentBlock_);
u32 mipsBytes = (u32)b.origDisasm.size() * 4;
for (u32 addr = b.originalAddress; addr < b.originalAddress + mipsBytes; addr += 4) {
MIPSOpcode opcode = Memory::Read_Instruction(addr);
if (MIPSGetInfo(opcode) & flag) {
char temp[256];
MIPSDisAsm(opcode, addr, temp, sizeof(temp));
// INFO_LOG(HLE, "Stopping at random instruction: %08x %s", addr, temp);
anyWanted = true;
break;
}
}
}
tries++;
Expand Down Expand Up @@ -1360,6 +1362,16 @@ void ShaderViewScreen::CreateViews() {
layout->Add(new Button(di->T("Back")))->OnClick.Handle<UIScreen>(this, &UIScreen::OnBack);
}

bool ShaderViewScreen::key(const KeyInput &ki) {
if (ki.flags & KEY_CHAR) {
if (ki.unicodeChar == 'C' || ki.unicodeChar == 'c') {
System_CopyStringToClipboard(gpu->DebugGetShaderString(id_, type_, SHADER_STRING_SHORT_DESC));
}
}
return UIDialogScreenWithBackground::key(ki);
}


const std::string framedumpsBaseUrl = "http://framedump.ppsspp.org/repro/";

FrameDumpTestScreen::FrameDumpTestScreen() {
Expand Down
1 change: 1 addition & 0 deletions UI/DevScreens.h
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ class ShaderViewScreen : public UIDialogScreenWithBackground {
: id_(id), type_(type) {}

void CreateViews() override;
bool key(const KeyInput &ki) override;

const char *tag() const override { return "ShaderView"; }

Expand Down

0 comments on commit ac5c16f

Please sign in to comment.