Skip to content

Commit

Permalink
[DYNAREC] Optimized fxam
Browse files Browse the repository at this point in the history
  • Loading branch information
ptitSeb committed Nov 2, 2021
1 parent 428d59a commit 9f4c371
Show file tree
Hide file tree
Showing 6 changed files with 130 additions and 40 deletions.
8 changes: 7 additions & 1 deletion src/dynarec/arm_emitter.h
Original file line number Diff line number Diff line change
Expand Up @@ -185,11 +185,14 @@ Op is 20-27
// and dst, src, #(imm8)
#define AND_IMM8(dst, src, imm8) \
EMIT(0xe2000000 | ((dst) << 12) | ((src) << 16) | brIMM(imm8) )
// and dst, src1, #imm ror rot*2
#define AND_IMM8_ROR(dst, src, imm8, rot) \
EMIT(0xe2000000 | ((dst) << 12) | ((src) << 16) | ((rot)<<8) | brIMM(imm8) )
// and.s dst, src, #(imm8)
#define ANDS_IMM8(dst, src, imm8) \
EMIT(0xe2100000 | ((dst) << 12) | ((src) << 16) | brIMM(imm8) )
// and.s dst, src1, #imm ror rot*2
#define ANDS_IMM8_ROR(src, imm8, rot) \
#define ANDS_IMM8_ROR(dst, src, imm8, rot) \
EMIT(0xe2100000 | ((dst) << 12) | ((src) << 16) | ((rot)<<8) | brIMM(imm8) )
// add dst, src, #(imm8)
#define ADD_IMM8(dst, src, imm8) \
Expand Down Expand Up @@ -308,6 +311,9 @@ Op is 20-27
// bic.cond dst, src, IMM8
#define BIC_IMM8_COND(cond, dst, src, imm8, rot) \
EMIT((cond) | 0x03c00000 | ((dst) << 12) | ((src) << 16) | ((rot)<<8) | imm8 )
// bic dst, src1, #imm ror rot*2
#define BIC_IMM8_ROR(dst, src, imm8, rot) \
EMIT(0xe3c00000 | ((dst) << 12) | ((src) << 16) | ((rot)<<8) | brIMM(imm8) )
// bic.s dst, src1, #imm ror rot*2
#define BICS_IMM8_ROR(dst, src, imm8, rot) \
EMIT(0xe3d00000 | ((dst) << 12) | ((src) << 16) | ((rot)<<8) | brIMM(imm8) )
Expand Down
57 changes: 57 additions & 0 deletions src/dynarec/dynarec_arm_d9.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,66 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
break;
case 0xE5:
INST_NAME("FXAM");
#if 1
i1 = x87_get_current_cache(dyn, ninst, 0, NEON_CACHE_ST_D);
// value put in x14
if(i1==-1) {
// not in cache, so check Empty status and load it
// x14 will be the actual top
LDR_IMM9(x14, xEmu, offsetof(x86emu_t, top));
i2 = -dyn->x87stack;
if(i2) {
if(i2<0) {
SUB_IMM8(x14, x14, -i2);
} else {
ADD_IMM8(x14, x14, i2);
}
AND_IMM8(x14, x14, 7); // (emu->top + i)&7
}
ADD_REG_LSL_IMM5(x1, xEmu, x14, 3);
LDRD_IMM8(x2, x1, offsetof(x86emu_t, x87)); // load r2/r3 with ST0 anyway, for sign extraction
ADD_REG_LSL_IMM5(x1, xEmu, x14, 2);
LDR_IMM9(x1, x1, offsetof(x86emu_t, p_regs));
CMPS_IMM8(x1, 0b11);
MOVW_COND(cEQ, x14, 0b100000100000000); // empty: C3,C2,C0 = 101
B_MARK3(cEQ);
} else {
// simply move from cache reg to r2/r3
v1 = dyn->x87reg[i1];
VMOVfrV_D(x2, x3, v1);
}
// get exponant in r1
MOV_REG_LSR_IMM5(x1, x3, 20);
MOVW(x14, 0x7ff);
ANDS_REG_LSL_IMM5(x1, x1, x14, 0);
B_MARK(cNE); // not zero or denormal
BIC_IMM8_ROR(x1, x3, 0b10, 1); // remove sign bit
ORRS_REG_LSL_IMM5(x1, x1, x2, 0);
MOVW_COND(cEQ, x14, 0b100000000000000); // Zero: C3,C2,C0 = 100
MOVW_COND(cNE, x14, 0b100010000000000); // Denormal: C3,C2,C0 = 110
B_MARK3(c__);
MARK;
CMPS_REG_LSL_IMM5(x1, x14, 0); // infinite/NaN?
MOVW_COND(cNE, x14, 0b000010000000000); // normal: C3,C2,C0 = 010
B_MARK3(cNE);
ORR_IMM8(x1, x1, 0x08, 12); //prepare mask, 0x7ff | 0x800 => 0xfff
BIC_REG_LSL_IMM5(x1, x3, x1, 20);
ORRS_REG_LSL_IMM5(x1, x1, x2, 0);
MOVW_COND(cEQ, x14, 0b000010100000000); // infinity: C3,C2,C0 = 011
MOVW_COND(cNE, x14, 0b000000100000000); // NaN: C3,C2,C0 = 001
MARK3;
// Extract signa & Update SW
MOV_REG_LSR_IMM5(x1, x3, 31);
BFI(x14, x1, 9, 1); //C1
LDRH_IMM8(x1, xEmu, offsetof(x86emu_t, sw));
BIC_IMM8(x1, x1, 0b01000111, 12);
ORR_REG_LSL_IMM5(x14, x14, x1, 0);
STRH_IMM8(x14, xEmu, offsetof(x86emu_t, sw));
#else
MESSAGE(LOG_DUMP, "Need Optimization\n");
x87_refresh(dyn, ninst, x1, x2, 0);
CALL(fpu_fxam, -1, 0); // should be possible inline, but is it worth it?
#endif
break;

case 0xE8:
Expand Down
19 changes: 15 additions & 4 deletions src/dynarec/dynarec_arm_helper.c
Original file line number Diff line number Diff line change
Expand Up @@ -723,11 +723,9 @@ static void x87_reflectcache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int
}
#endif

int x87_get_cache(dynarec_arm_t* dyn, int ninst, int populate, int s1, int s2, int st, int t)
int x87_get_current_cache(dynarec_arm_t* dyn, int ninst, int st, int t)
{
#if STEP > 0
if(dyn->mmxcount)
mmx_purgecache(dyn, ninst, 0, s1);
// search in cache first
for (int i=0; i<8; ++i)
if(dyn->x87cache[i]==st) {
Expand All @@ -737,9 +735,22 @@ int x87_get_cache(dynarec_arm_t* dyn, int ninst, int populate, int s1, int s2, i
#endif
return i;
}
return -1;
#else
return 0;
#endif
}

int x87_get_cache(dynarec_arm_t* dyn, int ninst, int populate, int s1, int s2, int st, int t)
{
#if STEP > 0
if(dyn->mmxcount)
mmx_purgecache(dyn, ninst, 0, s1);
int ret = x87_get_current_cache(dyn, ninst, st, t);
if(ret!=-1)
return ret;
MESSAGE(LOG_DUMP, "\tCreate %sx87 Cache for ST%d\n", populate?"and populate ":"", st);
// get a free spot
int ret = -1;
for (int i=0; (i<8) && (ret==-1); ++i)
if(dyn->x87cache[i]==-1)
ret = i;
Expand Down
3 changes: 3 additions & 0 deletions src/dynarec/dynarec_arm_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,7 @@ void* arm_next(x86emu_t* emu, uintptr_t addr);
#define x87_do_push STEPNAME(x87_do_push)
#define x87_do_push_empty STEPNAME(x87_do_push_empty)
#define x87_do_pop STEPNAME(x87_do_pop)
#define x87_get_current_cache STEPNAME(x87_get_current_cache)
#define x87_get_cache STEPNAME(x87_get_cache)
#define x87_get_neoncache STEPNAME(x87_get_neoncache)
#define x87_get_st STEPNAME(x87_get_st)
Expand Down Expand Up @@ -592,6 +593,8 @@ int x87_do_push(dynarec_arm_t* dyn, int ninst, int s1, int t);
void x87_do_push_empty(dynarec_arm_t* dyn, int ninst, int s1);
// fpu pop. All previous returned Dd should be considered invalid
void x87_do_pop(dynarec_arm_t* dyn, int ninst, int s1);
// get cache index for a x87 reg, return -1 if cache doesn't exist
int x87_get_current_cache(dynarec_arm_t* dyn, int ninst, int st, int t);
// get cache index for a x87 reg, create the entry if needed
int x87_get_cache(dynarec_arm_t* dyn, int ninst, int populate, int s1, int s2, int a, int t);
// get neoncache index for a x87 reg
Expand Down
47 changes: 47 additions & 0 deletions src/emu/x87emu_private.c
Original file line number Diff line number Diff line change
Expand Up @@ -313,3 +313,50 @@ void fpu_fxrstor(x86emu_t* emu, void* ed)
// copy SSE regs
memcpy(&emu->xmm[0], &p->XmmRegisters[0], sizeof(emu->xmm));
}

void fpu_fxam(x86emu_t* emu) {
emu->sw.f.F87_C1 = (ST0.ud[1]&0x80000000)?1:0;
if(emu->p_regs[emu->top].tag == 0b11) {
//Empty
emu->sw.f.F87_C3 = 1;
emu->sw.f.F87_C2 = 0;
emu->sw.f.F87_C0 = 1;
return;
}
if(isinf(ST0.d))
{
//Infinity
emu->sw.f.F87_C3 = 0;
emu->sw.f.F87_C2 = 1;
emu->sw.f.F87_C0 = 1;
return;
}
if(isnan(ST0.d))
{
//NaN
emu->sw.f.F87_C3 = 0;
emu->sw.f.F87_C2 = 0;
emu->sw.f.F87_C0 = 1;
return;
}
if((ST0.ud[0]|(ST0.ud[1]&0x7fffffff))==0)
{
//Zero
emu->sw.f.F87_C3 = 1;
emu->sw.f.F87_C2 = 0;
emu->sw.f.F87_C0 = 0;
return;
}
if((ST0.ud[1]&0x7FF00000)==0)
{
// denormals
emu->sw.f.F87_C3 = 1;
emu->sw.f.F87_C2 = 1;
emu->sw.f.F87_C0 = 0;
return;
}
// normal...
emu->sw.f.F87_C3 = 0;
emu->sw.f.F87_C2 = 1;
emu->sw.f.F87_C0 = 0;
}
36 changes: 1 addition & 35 deletions src/emu/x87emu_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,41 +132,7 @@ static inline double fpu_round(x86emu_t* emu, double d) {
}
}

static inline void fpu_fxam(x86emu_t* emu) {
emu->sw.f.F87_C1 = (ST0.ud[1]&0x80000000)?1:0;
if(!emu->fpu_stack) {
emu->sw.f.F87_C3 = 1;
emu->sw.f.F87_C2 = 0;
emu->sw.f.F87_C0 = 1;
return;
}
if(isinf(ST0.d))
{ // TODO: Unsuported and denormal not analysed...
emu->sw.f.F87_C3 = 0;
emu->sw.f.F87_C2 = 1;
emu->sw.f.F87_C0 = 1;
return;
}
if(isnan(ST0.d))
{ // TODO: Unsuported and denormal not analysed...
emu->sw.f.F87_C3 = 0;
emu->sw.f.F87_C2 = 0;
emu->sw.f.F87_C0 = 1;
return;
}
if(ST0.d==0.0)
{
emu->sw.f.F87_C3 = 1;
emu->sw.f.F87_C2 = 0;
emu->sw.f.F87_C0 = 0;
return;
}
// normal...
emu->sw.f.F87_C3 = 0;
emu->sw.f.F87_C2 = 1;
emu->sw.f.F87_C0 = 0;

}
void fpu_fxam(x86emu_t* emu);

static inline void fpu_ftst(x86emu_t* emu) {
emu->sw.f.F87_C1 = 0;
Expand Down

0 comments on commit 9f4c371

Please sign in to comment.