From 6dddbb8c8acabdee1ab17cb6eb994a22885cc5c8 Mon Sep 17 00:00:00 2001 From: Yang Liu Date: Thu, 26 Dec 2024 03:39:44 +0800 Subject: [PATCH] [LA64_DYNAREC] Added more opcodes and fixed CVTTPD2DQ --- src/dynarec/la64/dynarec_la64_00.c | 20 ++++++- src/dynarec/la64/dynarec_la64_0f.c | 32 ++++++++++ src/dynarec/la64/dynarec_la64_660f.c | 52 ++++++++++++++++ src/dynarec/la64/dynarec_la64_emit_shift.c | 70 +++++++++++++++++++++- src/dynarec/la64/dynarec_la64_helper.h | 2 + src/dynarec/la64/la64_emitter.h | 4 ++ 6 files changed, 175 insertions(+), 5 deletions(-) diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c index 8a8636676..3da344399 100644 --- a/src/dynarec/la64/dynarec_la64_00.c +++ b/src/dynarec/la64/dynarec_la64_00.c @@ -1912,6 +1912,23 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0xD2: // TODO: Jump if CL is 0 nextop = F8; switch ((nextop >> 3) & 7) { + case 4: + case 6: + if (opcode == 0xD0) { + INST_NAME("SHL Eb, 1"); + GETEB(x1, 0); + MOV32w(x2, 1); + } else { + INST_NAME("SHL Eb, CL"); + GETEB(x1, 0); + ANDI(x2, xRCX, 0x1F); + BEQ_NEXT(x2, xZR); + } + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined + if (box64_dynarec_safeflags > 1) MAYSETFLAGS(); + emit_shl8(dyn, ninst, x1, x2, x5, x4, x6); + EBBACK(); + break; case 5: if (opcode == 0xD0) { INST_NAME("SHR Eb, 1"); @@ -1924,8 +1941,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni BEQ_NEXT(x2, xZR); } SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined - if (box64_dynarec_safeflags > 1) - MAYSETFLAGS(); + if (box64_dynarec_safeflags > 1) MAYSETFLAGS(); emit_shr8(dyn, ninst, x1, x2, x5, x4, x6); EBBACK(); break; diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c index f0da22d81..4e80972f7 100644 --- a/src/dynarec/la64/dynarec_la64_0f.c +++ b/src/dynarec/la64/dynarec_la64_0f.c @@ -968,6 +968,38 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ZEROUP(gd); } break; + case 0xB3: + INST_NAME("BTR Ed, Gd"); + SETFLAGS(X_CF, SF_SUBSET, NAT_FLAGS_NOFUSION); + SET_DFNONE(); + nextop = F8; + GETGD; + if (MODREG) { + ed = TO_NAT((nextop & 7) + (rex.b << 3)); + wback = 0; + } else { + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0); + SRAIxw(x1, gd, 5 + rex.w); + ADDSL(x3, wback, x1, 2 + rex.w, x1); + LDxw(x1, x3, fixedaddress); + ed = x1; + wback = x3; + } + ANDI(x2, gd, rex.w ? 0x3f : 0x1f); + SRL_D(x4, ed, x2); + BSTRINS_D(xFlags, x4, 0, 0); + ADDI_D(x4, xZR, 1); + ANDI(x2, gd, rex.w ? 0x3f : 0x1f); + SLL_D(x4, x4, x2); + ANDN(ed, ed, x4); + if (wback) { + SDxw(ed, wback, fixedaddress); + SMWRITE(); + } else if (!rex.w) { + ZEROUP(ed); + } + break; case 0xB6: INST_NAME("MOVZX Gd, Eb"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c index 6d760bb95..d2024ba3e 100644 --- a/src/dynarec/la64/dynarec_la64_660f.c +++ b/src/dynarec/la64/dynarec_la64_660f.c @@ -1634,6 +1634,7 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGX_empty(v0); // TODO: fastround VFTINTRZ_W_D(v0, v1, v1); + VINSGR2VR_D(v0, xZR, 1); break; case 0xE7: INST_NAME("MOVNTDQ Ex, Gx"); @@ -1710,6 +1711,57 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int VXOR_V(q0, q0, q1); } break; + case 0xF1: + INST_NAME("PSLLW Gx, Ex"); + nextop = F8; + GETGX(q0, 1); + GETEX(q1, 0, 0); + v0 = fpu_get_scratch(dyn); + v1 = fpu_get_scratch(dyn); + VREPLVE_H(v1, q1, xZR); + VPICKVE2GR_DU(x4, q1, 0); + SLTUI(x3, x4, 16); + SUB_D(x3, xZR, x3); + NOR(x3, x3, xZR); + VREPLGR2VR_D(v0, x3); + VSLL_H(q0, q0, v1); + VAND_V(v0, q0, v0); + VXOR_V(q0, q0, v0); + break; + case 0xF2: + INST_NAME("PSLLD Gx, Ex"); + nextop = F8; + GETGX(q0, 1); + GETEX(q1, 0, 0); + v0 = fpu_get_scratch(dyn); + v1 = fpu_get_scratch(dyn); + VREPLVE_W(v1, q1, xZR); + VPICKVE2GR_DU(x4, q1, 0); + SLTUI(x3, x4, 32); + SUB_D(x3, xZR, x3); + NOR(x3, x3, xZR); + VREPLGR2VR_D(v0, x3); + VSLL_W(q0, q0, v1); + VAND_V(v0, q0, v0); + VXOR_V(q0, q0, v0); + break; + case 0xF3: + INST_NAME("PSLLQ Gx, Ex"); + nextop = F8; + GETGX(q0, 1); + GETEX(q1, 0, 0); + v0 = fpu_get_scratch(dyn); + v1 = fpu_get_scratch(dyn); + VREPLVE_D(v1, q1, xZR); + VPICKVE2GR_DU(x4, q1, 0); + SLTUI(x3, x4, 64); + SUB_D(x3, xZR, x3); + NOR(x3, x3, xZR); + VREPLGR2VR_D(v0, x3); + VSLL_D(q0, q0, v1); + VAND_V(v0, q0, v0); + VXOR_V(q0, q0, v0); + break; case 0xF4: INST_NAME("PMULUDQ Gx,Ex"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_emit_shift.c b/src/dynarec/la64/dynarec_la64_emit_shift.c index d1fd72904..9c7845ddf 100644 --- a/src/dynarec/la64/dynarec_la64_emit_shift.c +++ b/src/dynarec/la64/dynarec_la64_emit_shift.c @@ -336,13 +336,77 @@ void emit_shl32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, } } -// emit SHR8 instruction, from s1 , shift s2 (!0 and and'd already), store result in s1 using s3 and s4 as scratch -void emit_shr8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) +// emit SHL8 instruction, from s1 , shift s2, store result in s1 using s3, s4 and s5 as scratch +void emit_shl8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) { - int64_t j64; + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + + IFX (X_PEND) { + ST_B(s1, xEmu, offsetof(x64emu_t, op1)); + ST_B(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, d_shl8); + } else IFXORNAT (X_ALL) { + SET_DFNONE(); + } + + if (la64_lbt) { + IFX (X_ALL) { + X64_SLL_B(s1, s2); + } + SLL_D(s1, s1, s2); + ANDI(s1, s1, 0xff); + + IFX (X_PEND) { + ST_B(s1, xEmu, offsetof(x64emu_t, res)); + } + return; + } + SLL_D(s1, s1, s2); + // s2 is not 0 here and is 1..1f/3f + CLEAR_FLAGS(s3); + IFX (X_CF | X_OF) { + SRLI_D(s5, s1, 8); + ANDI(s5, s5, 1); // LSB == F_CF + IFX (X_CF) { + OR(xFlags, xFlags, s5); + } + } + + SLLI_D(s1, s1, 56); + IFX (X_SF) { + BGE(s1, xZR, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + SRLI_D(s1, s1, 56); + + IFX (X_PEND) { + ST_B(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX (X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX (X_OF) { + // OF flag is affected only on 1-bit shifts + ADDI_D(s3, s2, -1); + BNEZ(s3, 4 + 4 * 4); + SRLI_D(s3, s1, 7); + XOR(s3, s3, s5); + SLLI_D(s3, s3, F_OF); + OR(xFlags, xFlags, s3); + } + IFX (X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit SHR8 instruction, from s1 , shift s2 (!0 and and'd already), store result in s1 using s3 and s4 as scratch +void emit_shr8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) +{ if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX (X_PEND) { ST_B(s2, xEmu, offsetof(x64emu_t, op2)); ST_B(s1, xEmu, offsetof(x64emu_t, op1)); diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h index 15f9bb154..71c701982 100644 --- a/src/dynarec/la64/dynarec_la64_helper.h +++ b/src/dynarec/la64/dynarec_la64_helper.h @@ -896,6 +896,7 @@ void* la64_next(x64emu_t* emu, uintptr_t addr); #define emit_shl16c STEPNAME(emit_shl16c) #define emit_shl32 STEPNAME(emit_shl32) #define emit_shl32c STEPNAME(emit_shl32c) +#define emit_shl8 STEPNAME(emit_shl8) #define emit_shr8 STEPNAME(emit_shr8) #define emit_shr16 STEPNAME(emit_shr16) #define emit_shr16c STEPNAME(emit_shr16c) @@ -1004,6 +1005,7 @@ void emit_shl16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, void emit_shl16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5); void emit_shl32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5); void emit_shl32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4, int s5); +void emit_shl8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); void emit_shr8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); void emit_shr16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); void emit_shr16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5); diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h index 1cccad6af..cddcb1c17 100644 --- a/src/dynarec/la64/la64_emitter.h +++ b/src/dynarec/la64/la64_emitter.h @@ -1898,6 +1898,10 @@ LSX instruction starts with V, LASX instruction starts with XV. #define VPICKVE2GR_DU(rd, vj, imm1) EMIT(type_2RI1(0b011100101111001111110, imm1, vj, rd)) #define VFRINT_D(vd, vj) EMIT(type_2R(0b0111001010011101001110, vj, vd)) #define VFRINTRRD_D(vd, vj, imm4) EMIT(type_2RI4(0b011100101001110101, imm4, vj, vd)) +#define VREPLGR2VR_B(vd, rj) EMIT(type_2R(0b0111001010011111000000, rj, vd)) +#define VREPLGR2VR_H(vd, rj) EMIT(type_2R(0b0111001010011111000001, rj, vd)) +#define VREPLGR2VR_W(vd, rj) EMIT(type_2R(0b0111001010011111000010, rj, vd)) +#define VREPLGR2VR_D(vd, rj) EMIT(type_2R(0b0111001010011111000011, rj, vd)) //////////////////////////////////////////////////////////////////////////////// // (undocumented) LBT extension instructions