Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[LA64_DYNAREC] Added more opcodes and fixed CVTTPD2DQ #2211

Merged
merged 1 commit into from
Dec 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 18 additions & 2 deletions src/dynarec/la64/dynarec_la64_00.c
Original file line number Diff line number Diff line change
Expand Up @@ -1912,6 +1912,23 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
case 0xD2: // TODO: Jump if CL is 0
nextop = F8;
switch ((nextop >> 3) & 7) {
case 4:
case 6:
if (opcode == 0xD0) {
INST_NAME("SHL Eb, 1");
GETEB(x1, 0);
MOV32w(x2, 1);
} else {
INST_NAME("SHL Eb, CL");
GETEB(x1, 0);
ANDI(x2, xRCX, 0x1F);
BEQ_NEXT(x2, xZR);
}
SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined
if (box64_dynarec_safeflags > 1) MAYSETFLAGS();
emit_shl8(dyn, ninst, x1, x2, x5, x4, x6);
EBBACK();
break;
case 5:
if (opcode == 0xD0) {
INST_NAME("SHR Eb, 1");
Expand All @@ -1924,8 +1941,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
BEQ_NEXT(x2, xZR);
}
SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined
if (box64_dynarec_safeflags > 1)
MAYSETFLAGS();
if (box64_dynarec_safeflags > 1) MAYSETFLAGS();
emit_shr8(dyn, ninst, x1, x2, x5, x4, x6);
EBBACK();
break;
Expand Down
32 changes: 32 additions & 0 deletions src/dynarec/la64/dynarec_la64_0f.c
Original file line number Diff line number Diff line change
Expand Up @@ -968,6 +968,38 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
ZEROUP(gd);
}
break;
case 0xB3:
INST_NAME("BTR Ed, Gd");
SETFLAGS(X_CF, SF_SUBSET, NAT_FLAGS_NOFUSION);
SET_DFNONE();
nextop = F8;
GETGD;
if (MODREG) {
ed = TO_NAT((nextop & 7) + (rex.b << 3));
wback = 0;
} else {
SMREAD();
addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0);
SRAIxw(x1, gd, 5 + rex.w);
ADDSL(x3, wback, x1, 2 + rex.w, x1);
LDxw(x1, x3, fixedaddress);
ed = x1;
wback = x3;
}
ANDI(x2, gd, rex.w ? 0x3f : 0x1f);
SRL_D(x4, ed, x2);
BSTRINS_D(xFlags, x4, 0, 0);
ADDI_D(x4, xZR, 1);
ANDI(x2, gd, rex.w ? 0x3f : 0x1f);
SLL_D(x4, x4, x2);
ANDN(ed, ed, x4);
if (wback) {
SDxw(ed, wback, fixedaddress);
SMWRITE();
} else if (!rex.w) {
ZEROUP(ed);
}
break;
case 0xB6:
INST_NAME("MOVZX Gd, Eb");
nextop = F8;
Expand Down
52 changes: 52 additions & 0 deletions src/dynarec/la64/dynarec_la64_660f.c
Original file line number Diff line number Diff line change
Expand Up @@ -1634,6 +1634,7 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
GETGX_empty(v0);
// TODO: fastround
VFTINTRZ_W_D(v0, v1, v1);
VINSGR2VR_D(v0, xZR, 1);
break;
case 0xE7:
INST_NAME("MOVNTDQ Ex, Gx");
Expand Down Expand Up @@ -1710,6 +1711,57 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
VXOR_V(q0, q0, q1);
}
break;
case 0xF1:
INST_NAME("PSLLW Gx, Ex");
nextop = F8;
GETGX(q0, 1);
GETEX(q1, 0, 0);
v0 = fpu_get_scratch(dyn);
v1 = fpu_get_scratch(dyn);
VREPLVE_H(v1, q1, xZR);
VPICKVE2GR_DU(x4, q1, 0);
SLTUI(x3, x4, 16);
SUB_D(x3, xZR, x3);
NOR(x3, x3, xZR);
VREPLGR2VR_D(v0, x3);
VSLL_H(q0, q0, v1);
VAND_V(v0, q0, v0);
VXOR_V(q0, q0, v0);
break;
case 0xF2:
INST_NAME("PSLLD Gx, Ex");
nextop = F8;
GETGX(q0, 1);
GETEX(q1, 0, 0);
v0 = fpu_get_scratch(dyn);
v1 = fpu_get_scratch(dyn);
VREPLVE_W(v1, q1, xZR);
VPICKVE2GR_DU(x4, q1, 0);
SLTUI(x3, x4, 32);
SUB_D(x3, xZR, x3);
NOR(x3, x3, xZR);
VREPLGR2VR_D(v0, x3);
VSLL_W(q0, q0, v1);
VAND_V(v0, q0, v0);
VXOR_V(q0, q0, v0);
break;
case 0xF3:
INST_NAME("PSLLQ Gx, Ex");
nextop = F8;
GETGX(q0, 1);
GETEX(q1, 0, 0);
v0 = fpu_get_scratch(dyn);
v1 = fpu_get_scratch(dyn);
VREPLVE_D(v1, q1, xZR);
VPICKVE2GR_DU(x4, q1, 0);
SLTUI(x3, x4, 64);
SUB_D(x3, xZR, x3);
NOR(x3, x3, xZR);
VREPLGR2VR_D(v0, x3);
VSLL_D(q0, q0, v1);
VAND_V(v0, q0, v0);
VXOR_V(q0, q0, v0);
break;
case 0xF4:
INST_NAME("PMULUDQ Gx,Ex");
nextop = F8;
Expand Down
70 changes: 67 additions & 3 deletions src/dynarec/la64/dynarec_la64_emit_shift.c
Original file line number Diff line number Diff line change
Expand Up @@ -336,13 +336,77 @@ void emit_shl32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
}
}

// emit SHR8 instruction, from s1 , shift s2 (!0 and and'd already), store result in s1 using s3 and s4 as scratch
void emit_shr8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
// emit SHL8 instruction, from s1 , shift s2, store result in s1 using s3, s4 and s5 as scratch
void emit_shl8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
{
int64_t j64;
if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR);

IFX (X_PEND) {
ST_B(s1, xEmu, offsetof(x64emu_t, op1));
ST_B(s2, xEmu, offsetof(x64emu_t, op2));
SET_DF(s4, d_shl8);
} else IFXORNAT (X_ALL) {
SET_DFNONE();
}

if (la64_lbt) {
IFX (X_ALL) {
X64_SLL_B(s1, s2);
}
SLL_D(s1, s1, s2);
ANDI(s1, s1, 0xff);

IFX (X_PEND) {
ST_B(s1, xEmu, offsetof(x64emu_t, res));
}
return;
}

SLL_D(s1, s1, s2);

// s2 is not 0 here and is 1..1f/3f
CLEAR_FLAGS(s3);
IFX (X_CF | X_OF) {
SRLI_D(s5, s1, 8);
ANDI(s5, s5, 1); // LSB == F_CF
IFX (X_CF) {
OR(xFlags, xFlags, s5);
}
}

SLLI_D(s1, s1, 56);
IFX (X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
SRLI_D(s1, s1, 56);

IFX (X_PEND) {
ST_B(s1, xEmu, offsetof(x64emu_t, res));
}
IFX (X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX (X_OF) {
// OF flag is affected only on 1-bit shifts
ADDI_D(s3, s2, -1);
BNEZ(s3, 4 + 4 * 4);
SRLI_D(s3, s1, 7);
XOR(s3, s3, s5);
SLLI_D(s3, s3, F_OF);
OR(xFlags, xFlags, s3);
}
IFX (X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
}
}

// emit SHR8 instruction, from s1 , shift s2 (!0 and and'd already), store result in s1 using s3 and s4 as scratch
void emit_shr8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
{
if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR);

IFX (X_PEND) {
ST_B(s2, xEmu, offsetof(x64emu_t, op2));
ST_B(s1, xEmu, offsetof(x64emu_t, op1));
Expand Down
2 changes: 2 additions & 0 deletions src/dynarec/la64/dynarec_la64_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -896,6 +896,7 @@ void* la64_next(x64emu_t* emu, uintptr_t addr);
#define emit_shl16c STEPNAME(emit_shl16c)
#define emit_shl32 STEPNAME(emit_shl32)
#define emit_shl32c STEPNAME(emit_shl32c)
#define emit_shl8 STEPNAME(emit_shl8)
#define emit_shr8 STEPNAME(emit_shr8)
#define emit_shr16 STEPNAME(emit_shr16)
#define emit_shr16c STEPNAME(emit_shr16c)
Expand Down Expand Up @@ -1004,6 +1005,7 @@ void emit_shl16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
void emit_shl16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5);
void emit_shl32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
void emit_shl32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4, int s5);
void emit_shl8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
void emit_shr8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
void emit_shr16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
void emit_shr16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5);
Expand Down
4 changes: 4 additions & 0 deletions src/dynarec/la64/la64_emitter.h
Original file line number Diff line number Diff line change
Expand Up @@ -1898,6 +1898,10 @@ LSX instruction starts with V, LASX instruction starts with XV.
#define VPICKVE2GR_DU(rd, vj, imm1) EMIT(type_2RI1(0b011100101111001111110, imm1, vj, rd))
#define VFRINT_D(vd, vj) EMIT(type_2R(0b0111001010011101001110, vj, vd))
#define VFRINTRRD_D(vd, vj, imm4) EMIT(type_2RI4(0b011100101001110101, imm4, vj, vd))
#define VREPLGR2VR_B(vd, rj) EMIT(type_2R(0b0111001010011111000000, rj, vd))
#define VREPLGR2VR_H(vd, rj) EMIT(type_2R(0b0111001010011111000001, rj, vd))
#define VREPLGR2VR_W(vd, rj) EMIT(type_2R(0b0111001010011111000010, rj, vd))
#define VREPLGR2VR_D(vd, rj) EMIT(type_2R(0b0111001010011111000011, rj, vd))

////////////////////////////////////////////////////////////////////////////////
// (undocumented) LBT extension instructions
Expand Down
Loading