diff --git a/src/dynarec/dynarec_arm_d9.c b/src/dynarec/dynarec_arm_d9.c index 740ea50bd..819eb993c 100755 --- a/src/dynarec/dynarec_arm_d9.c +++ b/src/dynarec/dynarec_arm_d9.c @@ -281,48 +281,21 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, CALL(arm_f2xm1, -1, 0); #else v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); - if(!box86_dynarec_fastround) { - LDRH_IMM8(x1, xEmu, offsetof(x86emu_t, cw)); // hopefully cw is not too far for an imm8 - UBFX(x1, x1, 10, 2); // extract round... - UBFX(x2, x1, 1, 1); // swap bits 0 and 1 - BFI(x2, x1, 1, 1); - VMRS(x14); // get fpscr - MOV_REG(x3, x14); - } if((PK(0)==0xD9 && PK(1)==0xE8) && // next inst is FLD1 (PK(2)==0xDE && PK(3)==0xC1)) { MESSAGE(LOG_DUMP, "Hack for fld1 / faddp st1, st0\n"); - if(!box86_dynarec_fastround) { - VCMP_F64_0(v1); - B_MARK(cGE); // if ST0 < 0 and if the rounding mode is toward 0, then use upward - TSTS_IMM8(x2, 0b01); - AND_IMM8_COND(cNE, x2, x2, 0b01); // 11 (TOWARDZERO) -> 01 (UPWARD), 01 -> 01 - MARK; - BFI(x3, x2, 22, 2); // inject new round - VMSR(x3); - } - VMOV_64(0, v1); - CALL_1D(exp2, box86_dynarec_fastround ? 0 : (1 << x14)); // return is d0 + CALL_1D(exp2, 0); // return is d0 VMOV_64(v1, 0); addr+=4; } else { - if(!box86_dynarec_fastround) { - BFI(x3, x2, 22, 2); // inject new round - VMSR(x3); // put new fpscr - } - //ST0.d = expm1(LN2 * ST0.d); MOV32(x2, (&d_ln2)); VLDR_64(0, x2, 0); VMUL_F64(0, 0, v1); - if(!box86_dynarec_fastround) - x87_setround(dyn, ninst, x1, x2, -1); - CALL_1D(expm1, box86_dynarec_fastround ? 0 : (1 << x14)); // return is d0 + CALL_1D(expm1, 0); // return is d0 VMOV_64(v1, 0); } - if(!box86_dynarec_fastround) - VMSR(x14); #endif // should set C1 to 0 break; @@ -331,36 +304,9 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); v2 = x87_get_st(dyn, ninst, x1, x2, 1, NEON_CACHE_ST_D); - if(!box86_dynarec_fastround) { - LDRH_IMM8(x1, xEmu, offsetof(x86emu_t, cw)); // hopefully cw is not too far for an imm8 - UBFX(x1, x1, 10, 2); // extract round... - VCMP_F64_0(v2); - VMRS_APSR(); - B_MARK(cLT); // if ST1.d < 0 then don't swap bits 0 and 1 - BFI(x1, x1, 2, 1); // if ST1.d >= 0 then swap bits 0 and 1 - UBFX(x1, x1, 1, 2); - MARK; - s0 = fpu_get_scratch_double(dyn); - VMOV_i_64(s0, 0b01110000); // = 1.0 - VCMP_F64(v1, s0); - VMRS_APSR(); - B_MARK2(cGE); // if ST0 < 1 and if the rounding mode is toward 0, then use upward - TSTS_IMM8(x1, 0b01); - AND_IMM8_COND(cNE, x1, x1, 0b01); // 11 (TOWARDZERO) -> 01 (UPWARD), 01 -> 01 - MARK2; - VMRS(x14); // get fpscr - MOV_REG(x3, x14); - BFI(x3, x1, 22, 2); // inject new round - VMSR(x3); // put new fpscr - } - VMOV_64(0, v1); // prepare call to log2 - CALL_1D(log2, box86_dynarec_fastround ? 0 : (1 << x14)); - if(!box86_dynarec_fastround) - x87_setround(dyn, ninst, x1, x2, -1); + CALL_1D(log2, 0); VMUL_F64(v2, v2, 0); //ST(1).d = log2(ST0.d)*ST(1).d - if(!box86_dynarec_fastround) - VMSR(x14); x87_do_pop(dyn, ninst, x3); // should set C1 to 0 break; @@ -515,32 +461,14 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FYL2XP1"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); v2 = x87_get_st(dyn, ninst, x1, x2, 1, NEON_CACHE_ST_D); - if(!box86_dynarec_fastround) { - LDRH_IMM8(x1, xEmu, offsetof(x86emu_t, cw)); // hopefully cw is not too far for an imm8 - UBFX(x1, x1, 10, 2); // extract round... - VCMP_F64_0(v2); - VMRS_APSR(); - B_MARK(cLT); // if ST1.d < 0 then don't swap bits 0 and 1 - BFI(x1, x1, 2, 1); // if ST1.d >= 0 then swap bits 0 and 1 - UBFX(x1, x1, 1, 2); - MARK; - VMRS(x14); // get fpscr - MOV_REG(x3, x14); - BFI(x3, x1, 22, 2); // inject new round - VMSR(x3); // put new fpscr - } //ST(1).d = (ST(1).d * log1p(ST0.d)) / M_LN2; VMOV_64(0, v1); // prepare call to log1p - CALL_1D(log1p, box86_dynarec_fastround ? 0 : (1 << x14)); - if(!box86_dynarec_fastround) - x87_setround(dyn, ninst, x1, x2, -1); + CALL_1D(log1p, 0); VMUL_F64(v2, v2, 0); MOV32(x2, (&d_ln2)); VLDR_64(0, x2, 0); VDIV_F64(v2, v2, 0); - if(!box86_dynarec_fastround) - VMSR(x14); x87_do_pop(dyn, ninst, x3); // should set C1 to 0 break; diff --git a/src/emu/x86rund9.c b/src/emu/x86rund9.c index 87e74158c..aebc2c866 100755 --- a/src/emu/x86rund9.c +++ b/src/emu/x86rund9.c @@ -123,20 +123,7 @@ uintptr_t RunD9(x86emu_t *emu, uintptr_t addr) emu->sw.f.F87_C1 = 0; break; case 0xF1: /* FYL2X */ - if (ST1.d < 0) { - switch (emu->cw.f.C87_RD) { - case ROUND_Up: - fesetround(FE_DOWNWARD); - break; - case ROUND_Down: - fesetround(FE_UPWARD); - } - } - if (ST0.d < 1 && emu->cw.f.C87_RD == ROUND_Chop) - fesetround(FE_UPWARD); - const double log2_st0 = log2(ST0.d); - setround(emu); - ST(1).d *= log2_st0; + ST(1).d *= log2(ST0.d); fpu_do_pop(emu); emu->sw.f.F87_C1 = 0; break; @@ -209,20 +196,9 @@ uintptr_t RunD9(x86emu_t *emu, uintptr_t addr) emu->top=(emu->top+1)&7; // this will probably break a few things break; case 0xF9: /* FYL2XP1 */ - if (ST1.d < 0) { - switch (emu->cw.f.C87_RD) { - case ROUND_Up: - fesetround(FE_DOWNWARD); - break; - case ROUND_Down: - fesetround(FE_UPWARD); - } - } // Using the log1p instead of log2(ST0+1) can avoid losing precision much, // expecially when ST0 is close to zero (which loses the precise when +1). - const double log1p_st0 = log1p(ST0.d); - setround(emu); - ST(1).d = (ST(1).d * log1p_st0) / M_LN2; + ST(1).d = (ST(1).d * log1p(ST0.d)) / M_LN2; // = ST1 * log2(ST0 + 1) + error. (in math) fpu_do_pop(emu); emu->sw.f.F87_C1 = 0;