Skip to content

Commit

Permalink
[LA64_DYNAREC] Added more opcodes and fixed CVTTPD2DQ (#2211)
Browse files Browse the repository at this point in the history
  • Loading branch information
ksco authored Dec 25, 2024
1 parent 21a21b0 commit 1456897
Show file tree
Hide file tree
Showing 6 changed files with 175 additions and 5 deletions.
20 changes: 18 additions & 2 deletions src/dynarec/la64/dynarec_la64_00.c
Original file line number Diff line number Diff line change
Expand Up @@ -1920,6 +1920,23 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
case 0xD2: // TODO: Jump if CL is 0
nextop = F8;
switch ((nextop >> 3) & 7) {
case 4:
case 6:
if (opcode == 0xD0) {
INST_NAME("SHL Eb, 1");
GETEB(x1, 0);
MOV32w(x2, 1);
} else {
INST_NAME("SHL Eb, CL");
GETEB(x1, 0);
ANDI(x2, xRCX, 0x1F);
BEQ_NEXT(x2, xZR);
}
SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined
if (box64_dynarec_safeflags > 1) MAYSETFLAGS();
emit_shl8(dyn, ninst, x1, x2, x5, x4, x6);
EBBACK();
break;
case 5:
if (opcode == 0xD0) {
INST_NAME("SHR Eb, 1");
Expand All @@ -1932,8 +1949,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
BEQ_NEXT(x2, xZR);
}
SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined
if (box64_dynarec_safeflags > 1)
MAYSETFLAGS();
if (box64_dynarec_safeflags > 1) MAYSETFLAGS();
emit_shr8(dyn, ninst, x1, x2, x5, x4, x6);
EBBACK();
break;
Expand Down
32 changes: 32 additions & 0 deletions src/dynarec/la64/dynarec_la64_0f.c
Original file line number Diff line number Diff line change
Expand Up @@ -968,6 +968,38 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
ZEROUP(gd);
}
break;
case 0xB3:
INST_NAME("BTR Ed, Gd");
SETFLAGS(X_CF, SF_SUBSET, NAT_FLAGS_NOFUSION);
SET_DFNONE();
nextop = F8;
GETGD;
if (MODREG) {
ed = TO_NAT((nextop & 7) + (rex.b << 3));
wback = 0;
} else {
SMREAD();
addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0);
SRAIxw(x1, gd, 5 + rex.w);
ADDSL(x3, wback, x1, 2 + rex.w, x1);
LDxw(x1, x3, fixedaddress);
ed = x1;
wback = x3;
}
ANDI(x2, gd, rex.w ? 0x3f : 0x1f);
SRL_D(x4, ed, x2);
BSTRINS_D(xFlags, x4, 0, 0);
ADDI_D(x4, xZR, 1);
ANDI(x2, gd, rex.w ? 0x3f : 0x1f);
SLL_D(x4, x4, x2);
ANDN(ed, ed, x4);
if (wback) {
SDxw(ed, wback, fixedaddress);
SMWRITE();
} else if (!rex.w) {
ZEROUP(ed);
}
break;
case 0xB6:
INST_NAME("MOVZX Gd, Eb");
nextop = F8;
Expand Down
52 changes: 52 additions & 0 deletions src/dynarec/la64/dynarec_la64_660f.c
Original file line number Diff line number Diff line change
Expand Up @@ -1634,6 +1634,7 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
GETGX_empty(v0);
// TODO: fastround
VFTINTRZ_W_D(v0, v1, v1);
VINSGR2VR_D(v0, xZR, 1);
break;
case 0xE7:
INST_NAME("MOVNTDQ Ex, Gx");
Expand Down Expand Up @@ -1710,6 +1711,57 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
VXOR_V(q0, q0, q1);
}
break;
case 0xF1:
INST_NAME("PSLLW Gx, Ex");
nextop = F8;
GETGX(q0, 1);
GETEX(q1, 0, 0);
v0 = fpu_get_scratch(dyn);
v1 = fpu_get_scratch(dyn);
VREPLVE_H(v1, q1, xZR);
VPICKVE2GR_DU(x4, q1, 0);
SLTUI(x3, x4, 16);
SUB_D(x3, xZR, x3);
NOR(x3, x3, xZR);
VREPLGR2VR_D(v0, x3);
VSLL_H(q0, q0, v1);
VAND_V(v0, q0, v0);
VXOR_V(q0, q0, v0);
break;
case 0xF2:
INST_NAME("PSLLD Gx, Ex");
nextop = F8;
GETGX(q0, 1);
GETEX(q1, 0, 0);
v0 = fpu_get_scratch(dyn);
v1 = fpu_get_scratch(dyn);
VREPLVE_W(v1, q1, xZR);
VPICKVE2GR_DU(x4, q1, 0);
SLTUI(x3, x4, 32);
SUB_D(x3, xZR, x3);
NOR(x3, x3, xZR);
VREPLGR2VR_D(v0, x3);
VSLL_W(q0, q0, v1);
VAND_V(v0, q0, v0);
VXOR_V(q0, q0, v0);
break;
case 0xF3:
INST_NAME("PSLLQ Gx, Ex");
nextop = F8;
GETGX(q0, 1);
GETEX(q1, 0, 0);
v0 = fpu_get_scratch(dyn);
v1 = fpu_get_scratch(dyn);
VREPLVE_D(v1, q1, xZR);
VPICKVE2GR_DU(x4, q1, 0);
SLTUI(x3, x4, 64);
SUB_D(x3, xZR, x3);
NOR(x3, x3, xZR);
VREPLGR2VR_D(v0, x3);
VSLL_D(q0, q0, v1);
VAND_V(v0, q0, v0);
VXOR_V(q0, q0, v0);
break;
case 0xF4:
INST_NAME("PMULUDQ Gx,Ex");
nextop = F8;
Expand Down
70 changes: 67 additions & 3 deletions src/dynarec/la64/dynarec_la64_emit_shift.c
Original file line number Diff line number Diff line change
Expand Up @@ -336,13 +336,77 @@ void emit_shl32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
}
}

// emit SHR8 instruction, from s1 , shift s2 (!0 and and'd already), store result in s1 using s3 and s4 as scratch
void emit_shr8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
// emit SHL8 instruction, from s1 , shift s2, store result in s1 using s3, s4 and s5 as scratch
void emit_shl8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
{
int64_t j64;
if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR);

IFX (X_PEND) {
ST_B(s1, xEmu, offsetof(x64emu_t, op1));
ST_B(s2, xEmu, offsetof(x64emu_t, op2));
SET_DF(s4, d_shl8);
} else IFXORNAT (X_ALL) {
SET_DFNONE();
}

if (la64_lbt) {
IFX (X_ALL) {
X64_SLL_B(s1, s2);
}
SLL_D(s1, s1, s2);
ANDI(s1, s1, 0xff);

IFX (X_PEND) {
ST_B(s1, xEmu, offsetof(x64emu_t, res));
}
return;
}

SLL_D(s1, s1, s2);

// s2 is not 0 here and is 1..1f/3f
CLEAR_FLAGS(s3);
IFX (X_CF | X_OF) {
SRLI_D(s5, s1, 8);
ANDI(s5, s5, 1); // LSB == F_CF
IFX (X_CF) {
OR(xFlags, xFlags, s5);
}
}

SLLI_D(s1, s1, 56);
IFX (X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
SRLI_D(s1, s1, 56);

IFX (X_PEND) {
ST_B(s1, xEmu, offsetof(x64emu_t, res));
}
IFX (X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX (X_OF) {
// OF flag is affected only on 1-bit shifts
ADDI_D(s3, s2, -1);
BNEZ(s3, 4 + 4 * 4);
SRLI_D(s3, s1, 7);
XOR(s3, s3, s5);
SLLI_D(s3, s3, F_OF);
OR(xFlags, xFlags, s3);
}
IFX (X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
}
}

// emit SHR8 instruction, from s1 , shift s2 (!0 and and'd already), store result in s1 using s3 and s4 as scratch
void emit_shr8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
{
if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR);

IFX (X_PEND) {
ST_B(s2, xEmu, offsetof(x64emu_t, op2));
ST_B(s1, xEmu, offsetof(x64emu_t, op1));
Expand Down
2 changes: 2 additions & 0 deletions src/dynarec/la64/dynarec_la64_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -897,6 +897,7 @@ void* la64_next(x64emu_t* emu, uintptr_t addr);
#define emit_shl16c STEPNAME(emit_shl16c)
#define emit_shl32 STEPNAME(emit_shl32)
#define emit_shl32c STEPNAME(emit_shl32c)
#define emit_shl8 STEPNAME(emit_shl8)
#define emit_shr8 STEPNAME(emit_shr8)
#define emit_shr16 STEPNAME(emit_shr16)
#define emit_shr16c STEPNAME(emit_shr16c)
Expand Down Expand Up @@ -1006,6 +1007,7 @@ void emit_shl16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
void emit_shl16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5);
void emit_shl32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
void emit_shl32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4, int s5);
void emit_shl8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
void emit_shr8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
void emit_shr16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
void emit_shr16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5);
Expand Down
4 changes: 4 additions & 0 deletions src/dynarec/la64/la64_emitter.h
Original file line number Diff line number Diff line change
Expand Up @@ -1898,6 +1898,10 @@ LSX instruction starts with V, LASX instruction starts with XV.
#define VPICKVE2GR_DU(rd, vj, imm1) EMIT(type_2RI1(0b011100101111001111110, imm1, vj, rd))
#define VFRINT_D(vd, vj) EMIT(type_2R(0b0111001010011101001110, vj, vd))
#define VFRINTRRD_D(vd, vj, imm4) EMIT(type_2RI4(0b011100101001110101, imm4, vj, vd))
#define VREPLGR2VR_B(vd, rj) EMIT(type_2R(0b0111001010011111000000, rj, vd))
#define VREPLGR2VR_H(vd, rj) EMIT(type_2R(0b0111001010011111000001, rj, vd))
#define VREPLGR2VR_W(vd, rj) EMIT(type_2R(0b0111001010011111000010, rj, vd))
#define VREPLGR2VR_D(vd, rj) EMIT(type_2R(0b0111001010011111000011, rj, vd))

////////////////////////////////////////////////////////////////////////////////
// (undocumented) LBT extension instructions
Expand Down

0 comments on commit 1456897

Please sign in to comment.