Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

IR Interpreter: Two small optimizations #19299

Merged
merged 2 commits into from
Jun 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Common/Math/math_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,10 @@ inline bool my_isinf(float f) {
f2u.u == 0xff800000;
}

inline bool my_isinf_u(uint32_t u) {
return u == 0x7f800000 || u == 0xff800000;
}

inline bool my_isnan(float f) {
FP32 f2u;
f2u.f = f;
Expand Down
12 changes: 10 additions & 2 deletions Core/MIPS/IR/IRCompALU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,14 @@ void IRFrontend::CompShiftVar(MIPSOpcode op, IROp shiftOp) {
MIPSGPReg rd = _RD;
MIPSGPReg rt = _RT;
MIPSGPReg rs = _RS;
ir.Write(IROp::AndConst, IRTEMP_0, rs, ir.AddConstant(31));
ir.Write(shiftOp, rd, rt, IRTEMP_0);

if (opts.optimizeForInterpreter) {
// The interpreter already masks where needed, don't need to generate extra ops.
ir.Write(shiftOp, rd, rt, rs);
} else {
ir.Write(IROp::AndConst, IRTEMP_0, rs, ir.AddConstant(31));
ir.Write(shiftOp, rd, rt, IRTEMP_0);
}
}

void IRFrontend::Comp_ShiftType(MIPSOpcode op) {
Expand Down Expand Up @@ -246,6 +252,8 @@ void IRFrontend::Comp_Special3(MIPSOpcode op) {

case 0x4: //ins
{
// TODO: Might be good to support natively in the interpreter. Though, would have to
// abuse a register as a constant
u32 sourcemask = mask >> pos;
u32 destmask = ~(sourcemask << pos);

Expand Down
37 changes: 32 additions & 5 deletions Core/MIPS/IR/IRInterpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -524,11 +524,20 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst) {

// Removed previous SSE code due to the need for unsigned 16-bit pack, which I'm too lazy to work around the lack of in SSE2.
// pshufb or SSE4 instructions can be used instead.
#if PPSSPP_ARCH(ARM_NEON) && 0
// Untested
uint32x4_t value = vld1q_u32(&mips->fi[inst->src1]);
value = vshlq_n_u32(value, 1);
uint32x2_t halved = vshrn_n_u32(value, 8);
uint32x2_t halvedAgain = vshrn_n_u32(vcombine_u32(halved, vdup_n_u32(0)), 8);
mips->fi[inst->dest] = vget_lane_u32(halvedAgain, 0);
#else
u32 val = (mips->fi[inst->src1] >> 23) & 0xFF;
val |= (mips->fi[inst->src1 + 1] >> 15) & 0xFF00;
val |= (mips->fi[inst->src1 + 2] >> 7) & 0xFF0000;
val |= (mips->fi[inst->src1 + 3] << 1) & 0xFF000000;
mips->fi[inst->dest] = val;
#endif
break;
}

Expand Down Expand Up @@ -627,7 +636,7 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst) {
{
// Not quickly implementable on all platforms, unfortunately.
// Though, this is still pretty fast compared to one split into multiple IR instructions.
// This might be good though: https://stackoverflow.com/a/17004629
// This might be good though: https://gist.github.com/rikusalminen/3040241
float dot = mips->f[inst->src1] * mips->f[inst->src2];
for (int i = 1; i < 4; i++)
dot += mips->f[inst->src1 + i] * mips->f[inst->src2 + i];
Expand Down Expand Up @@ -820,8 +829,7 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst) {
}
case IROp::BSwap32:
{
u32 x = mips->r[inst->src1];
mips->r[inst->dest] = swap32(x);
mips->r[inst->dest] = swap32(mips->r[inst->src1]);
break;
}

Expand All @@ -832,12 +840,31 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst) {
mips->f[inst->dest] = mips->f[inst->src1] - mips->f[inst->src2];
break;
case IROp::FMul:
if ((mips->f[inst->src2] == 0.0f && my_isinf(mips->f[inst->src1])) || (mips->f[inst->src1] == 0.0f && my_isinf(mips->f[inst->src2]))) {
#if 1
{
float a = mips->f[inst->src1];
float b = mips->f[inst->src2];
if ((b == 0.0f && my_isinf(a)) || (a == 0.0f && my_isinf(b))) {
mips->fi[inst->dest] = 0x7fc00000;
} else {
mips->f[inst->dest] = mips->f[inst->src1] * mips->f[inst->src2];
mips->f[inst->dest] = a * b;
}
}
break;
#else
// Not sure if faster since it needs to load the operands twice? But the code is simpler.
{
// Takes care of negative zero by masking away the top bit, which also makes the inf check shorter.
u32 a = mips->fi[inst->src1] & 0x7FFFFFFF;
u32 b = mips->fi[inst->src2] & 0x7FFFFFFF;
if ((a == 0 && b == 0x7F800000) || (b == 0 && a == 0x7F800000)) {
mips->fi[inst->dest] = 0x7fc00000;
} else {
mips->f[inst->dest] = mips->f[inst->src1] * mips->f[inst->src2];
}
break;
}
#endif
case IROp::FDiv:
mips->f[inst->dest] = mips->f[inst->src1] / mips->f[inst->src2];
break;
Expand Down
Loading