Skip to content

Commit

Permalink
n64: inline simple dual mode operations
Browse files Browse the repository at this point in the history
* Pass JITContext down to leaf emit functions.
* Emit inline implementations of basic 64-bit operations.
* Use block compile-time information to elide kernel mode checks of
  the now inlined operations.
  • Loading branch information
kannoneer committed Sep 8, 2024
1 parent 616b3b6 commit 382c633
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 66 deletions.
6 changes: 4 additions & 2 deletions ares/n64/cpu/cpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -917,9 +917,11 @@ struct CPU : Thread {
auto block(u64 vaddr, u32 address, JITContext ctx) -> Block*;

auto emit(u64 vaddr, u32 address, JITContext ctx) -> Block*;
auto emitOverflowCheck(reg temp) -> sljit_jump*;
auto emitZeroClear(u32 n) -> void;
auto emitEXECUTE(u32 instruction) -> bool;
auto emitSPECIAL(u32 instruction) -> bool;
auto checkDualAllowed(const JITContext& ctx) -> bool;
auto emitEXECUTE(u32 instruction, JITContext ctx) -> bool;
auto emitSPECIAL(u32 instruction, JITContext ctx) -> bool;
auto emitREGIMM(u32 instruction) -> bool;
auto emitSCC(u32 instruction) -> bool;
auto emitFPU(u32 instruction) -> bool;
Expand Down
137 changes: 73 additions & 64 deletions ares/n64/cpu/recompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ auto CPU::Recompiler::emit(u64 vaddr, u32 address, JITContext ctx) -> Block* {
mov32(reg(2), imm(instruction));
call(&CPU::instructionPrologue);
}
bool branched = emitEXECUTE(instruction);
bool branched = emitEXECUTE(instruction, ctx);
if(unlikely(instruction == branchToSelf || instruction == jumpToSelf)) {
//accelerate idle loops
mov32(reg(1), imm(64 * 2));
Expand Down Expand Up @@ -134,12 +134,31 @@ auto CPU::Recompiler::emitZeroClear(u32 n) -> void {
if(n == 0) mov64(mem(IpuReg(r[0])), imm(0));
}

auto CPU::Recompiler::emitEXECUTE(u32 instruction) -> bool {
auto CPU::Recompiler::emitOverflowCheck(reg temp) -> sljit_jump* {
// If overflow flag set: throw an exception, skip the instruction via the 'end' label.
mov32_f(temp, flag_o);
auto didntOverflow = cmp32_jump(temp, imm(0), flag_eq);
call(&CPU::Exception::arithmeticOverflow, &cpu.exception);
auto end = jump();
setLabel(didntOverflow);
return end;
}

auto CPU::Recompiler::checkDualAllowed(const JITContext& ctx) -> bool {
if (ctx.mode != Context::Mode::Kernel && !ctx.is64bit) {
call(&CPU::Exception::reservedInstruction, &self.exception);
return false;
}

return true;
}

auto CPU::Recompiler::emitEXECUTE(u32 instruction, JITContext ctx) -> bool {
switch(instruction >> 26) {

//SPECIAL
case 0x00: {
return emitSPECIAL(instruction);
return emitSPECIAL(instruction, ctx);
}

//REGIMM
Expand Down Expand Up @@ -315,21 +334,19 @@ auto CPU::Recompiler::emitEXECUTE(u32 instruction) -> bool {

//DADDI Rt,Rs,i16
case 0x18: {
lea(reg(1), Rt);
lea(reg(2), Rs);
mov32(reg(3), imm(i16));
call(&CPU::DADDI);
emitZeroClear(Rtn);
if (!checkDualAllowed(ctx)) return 1;
add64(reg(0), mem(Rs), imm(i16), set_o);
auto skip = emitOverflowCheck(reg(2));
if(Rtn > 0) mov64(mem(Rt), reg(0));
setLabel(skip);
return 0;
}

//DADDIU Rt,Rs,i16
case 0x19: {
lea(reg(1), Rt);
lea(reg(2), Rs);
mov32(reg(3), imm(i16));
call(&CPU::DADDIU);
emitZeroClear(Rtn);
if (!checkDualAllowed(ctx)) return 1;
add64(reg(0), mem(Rs), imm(i16), set_o);
if(Rtn > 0) mov64(mem(Rt), reg(0));
return 0;
}

Expand Down Expand Up @@ -647,7 +664,7 @@ auto CPU::Recompiler::emitEXECUTE(u32 instruction) -> bool {
return 0;
}

auto CPU::Recompiler::emitSPECIAL(u32 instruction) -> bool {
auto CPU::Recompiler::emitSPECIAL(u32 instruction, JITContext ctx) -> bool {
switch(instruction & 0x3f) {

//SLL Rd,Rt,Sa
Expand Down Expand Up @@ -791,11 +808,10 @@ auto CPU::Recompiler::emitSPECIAL(u32 instruction) -> bool {

//DSLLV Rd,Rt,Rs
case 0x14: {
lea(reg(1), Rd);
lea(reg(2), Rt);
lea(reg(3), Rs);
call(&CPU::DSLLV);
emitZeroClear(Rdn);
if (!checkDualAllowed(ctx)) return 1;
if (Rdn == 0) return 0;
and64(reg(0), mem(Rs32), imm(63));
shl64(mem(Rd), mem(Rt), reg(0));
return 0;
}

Expand All @@ -807,21 +823,19 @@ auto CPU::Recompiler::emitSPECIAL(u32 instruction) -> bool {

//DSRLV Rd,Rt,Rs
case 0x16: {
lea(reg(1), Rd);
lea(reg(2), Rt);
lea(reg(3), Rs);
call(&CPU::DSRLV);
emitZeroClear(Rdn);
if (!checkDualAllowed(ctx)) return 1;
if (Rdn == 0) return 0;
and64(reg(0), mem(Rs32), imm(63));
lshr64(mem(Rd), mem(Rt), reg(0));
return 0;
}

//DSRAV Rd,Rt,Rs
case 0x17: {
lea(reg(1), Rd);
lea(reg(2), Rt);
lea(reg(3), Rs);
call(&CPU::DSRAV);
emitZeroClear(Rdn);
if (!checkDualAllowed(ctx)) return 1;
if (Rdn == 0) return 0;
and64(reg(0), mem(Rs32), imm(63));
ashr64(mem(Rd), mem(Rt), reg(0));
return 0;
}

Expand Down Expand Up @@ -981,41 +995,42 @@ auto CPU::Recompiler::emitSPECIAL(u32 instruction) -> bool {

//DADD Rd,Rs,Rt
case 0x2c: {
lea(reg(1), Rd);
lea(reg(2), Rs);
lea(reg(3), Rt);
call(&CPU::DADD);
emitZeroClear(Rdn);
if (!checkDualAllowed(ctx)) return 1;
add64(reg(0), mem(Rs), mem(Rt), set_o);
auto skip = emitOverflowCheck(reg(2));
if(Rdn > 0) mov64(mem(Rd), reg(0));
setLabel(skip);
return 0;
}

//DADDU Rd,Rs,Rt
case 0x2d: {
lea(reg(1), Rd);
lea(reg(2), Rs);
lea(reg(3), Rt);
call(&CPU::DADDU);
emitZeroClear(Rdn);
if (!checkDualAllowed(ctx)) {
return 1;
}

if(Rdn == 0) return 0;

add64(reg(0), mem(Rs), mem(Rt));
mov64(mem(Rd), reg(0));
return 0;
}

//DSUB Rd,Rs,Rt
case 0x2e: {
lea(reg(1), Rd);
lea(reg(2), Rs);
lea(reg(3), Rt);
call(&CPU::DSUB);
emitZeroClear(Rdn);
if (!checkDualAllowed(ctx)) return 1;
sub64(reg(0), mem(Rs), mem(Rt), set_o);
auto skip = emitOverflowCheck(reg(2));
if(Rdn > 0) mov64(mem(Rd), reg(0));
setLabel(skip);
return 0;
}

//DSUBU Rd,Rs,Rt
case 0x2f: {
lea(reg(1), Rd);
lea(reg(2), Rs);
lea(reg(3), Rt);
call(&CPU::DSUBU);
emitZeroClear(Rdn);
if (!checkDualAllowed(ctx)) return 1;
sub64(reg(0), mem(Rs), mem(Rt), set_o);
if(Rdn > 0) mov64(mem(Rd), reg(0));
return 0;
}

Expand Down Expand Up @@ -1081,11 +1096,9 @@ auto CPU::Recompiler::emitSPECIAL(u32 instruction) -> bool {

//DSLL Rd,Rt,Sa
case 0x38: {
lea(reg(1), Rd);
lea(reg(2), Rt);
mov32(reg(3), imm(Sa));
call(&CPU::DSLL);
emitZeroClear(Rdn);
if (!checkDualAllowed(ctx)) return 1;
if (Rdn == 0) return 0;
shl64(mem(Rd), mem(Rt), imm(Sa));
return 0;
}

Expand All @@ -1107,21 +1120,17 @@ auto CPU::Recompiler::emitSPECIAL(u32 instruction) -> bool {

//DSRA Rd,Rt,Sa
case 0x3b: {
lea(reg(1), Rd);
lea(reg(2), Rt);
mov32(reg(3), imm(Sa));
call(&CPU::DSRA);
emitZeroClear(Rdn);
if (!checkDualAllowed(ctx)) return 1;
if (Rdn == 0) return 0;
ashr64(mem(Rd), mem(Rt), imm(Sa));
return 0;
}

//DSLL32 Rd,Rt,Sa
case 0x3c: {
lea(reg(1), Rd);
lea(reg(2), Rt);
mov32(reg(3), imm(Sa+32));
call(&CPU::DSLL);
emitZeroClear(Rdn);
if (!checkDualAllowed(ctx)) return 1;
if (Rdn == 0) return 0;
shl64(mem(Rd), mem(Rt), imm(Sa+32));
return 0;
}

Expand Down

0 comments on commit 382c633

Please sign in to comment.