diff --git a/src/dynarec/arm_emitter.h b/src/dynarec/arm_emitter.h index 9519b619b7..4ecd7e5e95 100755 --- a/src/dynarec/arm_emitter.h +++ b/src/dynarec/arm_emitter.h @@ -185,11 +185,14 @@ Op is 20-27 // and dst, src, #(imm8) #define AND_IMM8(dst, src, imm8) \ EMIT(0xe2000000 | ((dst) << 12) | ((src) << 16) | brIMM(imm8) ) +// and dst, src1, #imm ror rot*2 +#define AND_IMM8_ROR(dst, src, imm8, rot) \ + EMIT(0xe2000000 | ((dst) << 12) | ((src) << 16) | ((rot)<<8) | brIMM(imm8) ) // and.s dst, src, #(imm8) #define ANDS_IMM8(dst, src, imm8) \ EMIT(0xe2100000 | ((dst) << 12) | ((src) << 16) | brIMM(imm8) ) // and.s dst, src1, #imm ror rot*2 -#define ANDS_IMM8_ROR(src, imm8, rot) \ +#define ANDS_IMM8_ROR(dst, src, imm8, rot) \ EMIT(0xe2100000 | ((dst) << 12) | ((src) << 16) | ((rot)<<8) | brIMM(imm8) ) // add dst, src, #(imm8) #define ADD_IMM8(dst, src, imm8) \ @@ -308,6 +311,9 @@ Op is 20-27 // bic.cond dst, src, IMM8 #define BIC_IMM8_COND(cond, dst, src, imm8, rot) \ EMIT((cond) | 0x03c00000 | ((dst) << 12) | ((src) << 16) | ((rot)<<8) | imm8 ) +// bic dst, src1, #imm ror rot*2 +#define BIC_IMM8_ROR(dst, src, imm8, rot) \ + EMIT(0xe3c00000 | ((dst) << 12) | ((src) << 16) | ((rot)<<8) | brIMM(imm8) ) // bic.s dst, src1, #imm ror rot*2 #define BICS_IMM8_ROR(dst, src, imm8, rot) \ EMIT(0xe3d00000 | ((dst) << 12) | ((src) << 16) | ((rot)<<8) | brIMM(imm8) ) diff --git a/src/dynarec/dynarec_arm_d9.c b/src/dynarec/dynarec_arm_d9.c index c9d639227a..6847afae2d 100755 --- a/src/dynarec/dynarec_arm_d9.c +++ b/src/dynarec/dynarec_arm_d9.c @@ -129,9 +129,66 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, break; case 0xE5: INST_NAME("FXAM"); + #if 1 + i1 = x87_get_current_cache(dyn, ninst, 0, NEON_CACHE_ST_D); + // value put in x14 + if(i1==-1) { + // not in cache, so check Empty status and load it + // x14 will be the actual top + LDR_IMM9(x14, xEmu, offsetof(x86emu_t, top)); + i2 = -dyn->x87stack; + if(i2) { + if(i2<0) { + SUB_IMM8(x14, x14, -i2); + } else { + ADD_IMM8(x14, x14, i2); + } + AND_IMM8(x14, x14, 7); // (emu->top + i)&7 + } + ADD_REG_LSL_IMM5(x1, xEmu, x14, 3); + LDRD_IMM8(x2, x1, offsetof(x86emu_t, x87)); // load r2/r3 with ST0 anyway, for sign extraction + ADD_REG_LSL_IMM5(x1, xEmu, x14, 2); + LDR_IMM9(x1, x1, offsetof(x86emu_t, p_regs)); + CMPS_IMM8(x1, 0b11); + MOVW_COND(cEQ, x14, 0b100000100000000); // empty: C3,C2,C0 = 101 + B_MARK3(cEQ); + } else { + // simply move from cache reg to r2/r3 + v1 = dyn->x87reg[i1]; + VMOVfrV_D(x2, x3, v1); + } + // get exponant in r1 + MOV_REG_LSR_IMM5(x1, x3, 20); + MOVW(x14, 0x7ff); + ANDS_REG_LSL_IMM5(x1, x1, x14, 0); + B_MARK(cNE); // not zero or denormal + BIC_IMM8_ROR(x1, x3, 0b10, 1); // remove sign bit + ORRS_REG_LSL_IMM5(x1, x1, x2, 0); + MOVW_COND(cEQ, x14, 0b100000000000000); // Zero: C3,C2,C0 = 100 + MOVW_COND(cNE, x14, 0b100010000000000); // Denormal: C3,C2,C0 = 110 + B_MARK3(c__); + MARK; + CMPS_REG_LSL_IMM5(x1, x14, 0); // infinite/NaN? + MOVW_COND(cNE, x14, 0b000010000000000); // normal: C3,C2,C0 = 010 + B_MARK3(cNE); + ORR_IMM8(x1, x1, 0x08, 12); //prepare mask, 0x7ff | 0x800 => 0xfff + BIC_REG_LSL_IMM5(x1, x3, x1, 20); + ORRS_REG_LSL_IMM5(x1, x1, x2, 0); + MOVW_COND(cEQ, x14, 0b000010100000000); // infinity: C3,C2,C0 = 011 + MOVW_COND(cNE, x14, 0b000000100000000); // NaN: C3,C2,C0 = 001 + MARK3; + // Extract signa & Update SW + MOV_REG_LSR_IMM5(x1, x3, 31); + BFI(x14, x1, 9, 1); //C1 + LDRH_IMM8(x1, xEmu, offsetof(x86emu_t, sw)); + BIC_IMM8(x1, x1, 0b01000111, 12); + ORR_REG_LSL_IMM5(x14, x14, x1, 0); + STRH_IMM8(x14, xEmu, offsetof(x86emu_t, sw)); + #else MESSAGE(LOG_DUMP, "Need Optimization\n"); x87_refresh(dyn, ninst, x1, x2, 0); CALL(fpu_fxam, -1, 0); // should be possible inline, but is it worth it? + #endif break; case 0xE8: diff --git a/src/dynarec/dynarec_arm_helper.c b/src/dynarec/dynarec_arm_helper.c index ea99ce6fdb..551df84d46 100755 --- a/src/dynarec/dynarec_arm_helper.c +++ b/src/dynarec/dynarec_arm_helper.c @@ -723,11 +723,9 @@ static void x87_reflectcache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int } #endif -int x87_get_cache(dynarec_arm_t* dyn, int ninst, int populate, int s1, int s2, int st, int t) +int x87_get_current_cache(dynarec_arm_t* dyn, int ninst, int st, int t) { #if STEP > 0 - if(dyn->mmxcount) - mmx_purgecache(dyn, ninst, 0, s1); // search in cache first for (int i=0; i<8; ++i) if(dyn->x87cache[i]==st) { @@ -737,9 +735,22 @@ int x87_get_cache(dynarec_arm_t* dyn, int ninst, int populate, int s1, int s2, i #endif return i; } + return -1; +#else + return 0; +#endif +} + +int x87_get_cache(dynarec_arm_t* dyn, int ninst, int populate, int s1, int s2, int st, int t) +{ +#if STEP > 0 + if(dyn->mmxcount) + mmx_purgecache(dyn, ninst, 0, s1); + int ret = x87_get_current_cache(dyn, ninst, st, t); + if(ret!=-1) + return ret; MESSAGE(LOG_DUMP, "\tCreate %sx87 Cache for ST%d\n", populate?"and populate ":"", st); // get a free spot - int ret = -1; for (int i=0; (i<8) && (ret==-1); ++i) if(dyn->x87cache[i]==-1) ret = i; diff --git a/src/dynarec/dynarec_arm_helper.h b/src/dynarec/dynarec_arm_helper.h index f1cbcc7ef4..80c17cf4d8 100755 --- a/src/dynarec/dynarec_arm_helper.h +++ b/src/dynarec/dynarec_arm_helper.h @@ -459,6 +459,7 @@ void* arm_next(x86emu_t* emu, uintptr_t addr); #define x87_do_push STEPNAME(x87_do_push) #define x87_do_push_empty STEPNAME(x87_do_push_empty) #define x87_do_pop STEPNAME(x87_do_pop) +#define x87_get_current_cache STEPNAME(x87_get_current_cache) #define x87_get_cache STEPNAME(x87_get_cache) #define x87_get_neoncache STEPNAME(x87_get_neoncache) #define x87_get_st STEPNAME(x87_get_st) @@ -592,6 +593,8 @@ int x87_do_push(dynarec_arm_t* dyn, int ninst, int s1, int t); void x87_do_push_empty(dynarec_arm_t* dyn, int ninst, int s1); // fpu pop. All previous returned Dd should be considered invalid void x87_do_pop(dynarec_arm_t* dyn, int ninst, int s1); +// get cache index for a x87 reg, return -1 if cache doesn't exist +int x87_get_current_cache(dynarec_arm_t* dyn, int ninst, int st, int t); // get cache index for a x87 reg, create the entry if needed int x87_get_cache(dynarec_arm_t* dyn, int ninst, int populate, int s1, int s2, int a, int t); // get neoncache index for a x87 reg diff --git a/src/emu/x87emu_private.c b/src/emu/x87emu_private.c index 00c747b6f8..fa1214f8ea 100755 --- a/src/emu/x87emu_private.c +++ b/src/emu/x87emu_private.c @@ -313,3 +313,50 @@ void fpu_fxrstor(x86emu_t* emu, void* ed) // copy SSE regs memcpy(&emu->xmm[0], &p->XmmRegisters[0], sizeof(emu->xmm)); } + +void fpu_fxam(x86emu_t* emu) { + emu->sw.f.F87_C1 = (ST0.ud[1]&0x80000000)?1:0; + if(emu->p_regs[emu->top].tag == 0b11) { + //Empty + emu->sw.f.F87_C3 = 1; + emu->sw.f.F87_C2 = 0; + emu->sw.f.F87_C0 = 1; + return; + } + if(isinf(ST0.d)) + { + //Infinity + emu->sw.f.F87_C3 = 0; + emu->sw.f.F87_C2 = 1; + emu->sw.f.F87_C0 = 1; + return; + } + if(isnan(ST0.d)) + { + //NaN + emu->sw.f.F87_C3 = 0; + emu->sw.f.F87_C2 = 0; + emu->sw.f.F87_C0 = 1; + return; + } + if((ST0.ud[0]|(ST0.ud[1]&0x7fffffff))==0) + { + //Zero + emu->sw.f.F87_C3 = 1; + emu->sw.f.F87_C2 = 0; + emu->sw.f.F87_C0 = 0; + return; + } + if((ST0.ud[1]&0x7FF00000)==0) + { + // denormals + emu->sw.f.F87_C3 = 1; + emu->sw.f.F87_C2 = 1; + emu->sw.f.F87_C0 = 0; + return; + } + // normal... + emu->sw.f.F87_C3 = 0; + emu->sw.f.F87_C2 = 1; + emu->sw.f.F87_C0 = 0; +} diff --git a/src/emu/x87emu_private.h b/src/emu/x87emu_private.h index 655fb41932..c85d35edd9 100755 --- a/src/emu/x87emu_private.h +++ b/src/emu/x87emu_private.h @@ -132,41 +132,7 @@ static inline double fpu_round(x86emu_t* emu, double d) { } } -static inline void fpu_fxam(x86emu_t* emu) { - emu->sw.f.F87_C1 = (ST0.ud[1]&0x80000000)?1:0; - if(!emu->fpu_stack) { - emu->sw.f.F87_C3 = 1; - emu->sw.f.F87_C2 = 0; - emu->sw.f.F87_C0 = 1; - return; - } - if(isinf(ST0.d)) - { // TODO: Unsuported and denormal not analysed... - emu->sw.f.F87_C3 = 0; - emu->sw.f.F87_C2 = 1; - emu->sw.f.F87_C0 = 1; - return; - } - if(isnan(ST0.d)) - { // TODO: Unsuported and denormal not analysed... - emu->sw.f.F87_C3 = 0; - emu->sw.f.F87_C2 = 0; - emu->sw.f.F87_C0 = 1; - return; - } - if(ST0.d==0.0) - { - emu->sw.f.F87_C3 = 1; - emu->sw.f.F87_C2 = 0; - emu->sw.f.F87_C0 = 0; - return; - } - // normal... - emu->sw.f.F87_C3 = 0; - emu->sw.f.F87_C2 = 1; - emu->sw.f.F87_C0 = 0; - -} +void fpu_fxam(x86emu_t* emu); static inline void fpu_ftst(x86emu_t* emu) { emu->sw.f.F87_C1 = 0;