Skip to content

Commit

Permalink
[DYNAREC] More changes to flags optimisations
Browse files Browse the repository at this point in the history
  • Loading branch information
ptitSeb committed Nov 1, 2021
1 parent f3532cc commit 428d59a
Show file tree
Hide file tree
Showing 8 changed files with 82 additions and 31 deletions.
57 changes: 53 additions & 4 deletions src/dynarec/dynarec_arm.c
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,22 @@ dynarec_log(LOG_DEBUG, "Asked to Fill block %p with %p\n", block, (void*)addr);
// already protect the block and compute hash signature
protectDB(addr, end-addr); //end is 1byte after actual end
uint32_t hash = X31_hash_code((void*)addr, end-addr);
// Compute flag_need, without taking into account any barriers
uint32_t last_need = X_PEND;
for(int i = helper.size; i-- > 0;) {
last_need |= helper.insts[i].x86.use_flags;
if (last_need == (X_PEND | X_ALL)) {
last_need = X_ALL;
}
helper.insts[i].x86.need_flags = last_need;
if ((helper.insts[i].x86.set_flags) && !(helper.insts[i].x86.state_flags & SF_MAYSET)) {
if (last_need & X_PEND) {
last_need = (~helper.insts[i].x86.set_flags) & X_ALL;
} else {
last_need &= ~helper.insts[i].x86.set_flags;
}
}
}
// calculate barriers
for(int i=0; i<helper.size; ++i)
if(helper.insts[i].x86.jmp) {
Expand All @@ -311,7 +327,6 @@ dynarec_log(LOG_DEBUG, "Asked to Fill block %p with %p\n", block, (void*)addr);
}
// check for the optionnal barriers now
for(int i=helper.size-1; i>=0; --i) {
if(helper.insts[i].x86.barrier) helper.insts[i].x86.use_flags |= X_PEND;
if(helper.insts[i].x86.barrier==3)
if(helper.insts[i].x86.jmp_insts == -1) {
if(i==helper.size-1 || helper.insts[i+1].x86.barrier)
Expand All @@ -321,9 +336,40 @@ dynarec_log(LOG_DEBUG, "Asked to Fill block %p with %p\n", block, (void*)addr);
} else
helper.insts[i].x86.barrier=2;
}
// pass 1, flags
arm_pass1(&helper, addr);
uint32_t last_need = X_PEND;
// check to remove useless barrier, in case of jump when destination doesn't needs flags
for(int i=helper.size-1; i>=0; --i) {
if(helper.insts[i].x86.jmp
&& helper.insts[i].x86.jmp_insts>=0
&& helper.insts[helper.insts[i].x86.jmp_insts].x86.barrier==1) {
int k = helper.insts[i].x86.jmp_insts;
//TODO: optimize FPU barrier too
if((!helper.insts[k].x86.need_flags)
||(helper.insts[k].x86.set_flags==X_ALL
&& helper.insts[k].x86.state_flags==SF_SET)
||(helper.insts[k].x86.state_flags==SF_SET_PENDING)) {
//if(box86_dynarec_dump) dynarec_log(LOG_NONE, "Removed barrier for inst %d\n", k);
helper.insts[k].x86.barrier = 4; // remove barrier (keep FPU barrier, and still reset state flag)
}
}
}
// reset need_flags and compute again, now taking barrier into account (because barrier change use_flags)
for(int i = helper.size; i-- > 0;) {
if(helper.insts[i].x86.barrier==1)
// immediate barrier
helper.insts[i].x86.use_flags |= X_PEND;
else if(helper.insts[i].x86.jmp
&& helper.insts[i].x86.jmp_insts>=0
) {
if(helper.insts[helper.insts[i].x86.jmp_insts].x86.barrier==1)
// jumpto barrier
helper.insts[i].x86.use_flags |= X_PEND;
else
helper.insts[i].x86.use_flags |= helper.insts[helper.insts[i].x86.jmp_insts].x86.need_flags;
}
}
for(int i = helper.size; i-- > 0;)
helper.insts[i].x86.need_flags = 0;
last_need = X_PEND;
for(int i = helper.size; i-- > 0;) {
helper.insts[i].x86.need_flags = last_need;
if ((helper.insts[i].x86.set_flags) && !(helper.insts[i].x86.state_flags & SF_MAYSET)) {
Expand All @@ -338,6 +384,9 @@ dynarec_log(LOG_DEBUG, "Asked to Fill block %p with %p\n", block, (void*)addr);
last_need = X_ALL;
}
}

// pass 1, float optimisations
arm_pass1(&helper, addr);

// pass 2, instruction size
arm_pass2(&helper, addr);
Expand Down
10 changes: 5 additions & 5 deletions src/dynarec/dynarec_arm_00.c
Original file line number Diff line number Diff line change
Expand Up @@ -694,7 +694,7 @@ uintptr_t dynarec00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
READFLAGS(F|(dyn->insts[ninst].x86.barrier?0:X_PEND)); \
i8 = F8S; \
BARRIER(3); \
JUMP(addr+i8); \
JUMP(addr+i8, 1); \
GETFLAGS; \
if(dyn->insts) { \
if(dyn->insts[ninst].x86.jmp_insts==-1) { \
Expand Down Expand Up @@ -1993,9 +1993,9 @@ uintptr_t dynarec00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
case 0xDF:
addr = dynarecDF(dyn, addr, ip, ninst, ok, need_epilog);
break;
#define GO(NO, YES) \
BARRIER(2); \
JUMP(addr+i8);\
#define GO(NO, YES) \
BARRIER(2); \
JUMP(addr+i8, 1); \
if(dyn->insts) { \
if(dyn->insts[ninst].x86.jmp_insts==-1) { \
/* out of the block */ \
Expand Down Expand Up @@ -2150,7 +2150,7 @@ uintptr_t dynarec00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
INST_NAME("JMP Ib");
i32 = F8S;
}
JUMP(addr+i32);
JUMP(addr+i32, 0);
if(dyn->insts) {
PASS2IF(dyn->insts[ninst].x86.jmp_insts==-1, 1) {
// out of the block
Expand Down
8 changes: 4 additions & 4 deletions src/dynarec/dynarec_arm_0f.c
Original file line number Diff line number Diff line change
Expand Up @@ -1131,10 +1131,10 @@ uintptr_t dynarec0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,

#define GO(GETFLAGS, NO, YES, F) \
READFLAGS(F|(dyn->insts[ninst].x86.barrier?0:X_PEND)); \
i32_ = F32S; \
BARRIER(3); \
JUMP(addr+i32_);\
GETFLAGS; \
i32_ = F32S; \
BARRIER(3); \
JUMP(addr+i32_, 1); \
GETFLAGS; \
if(dyn->insts) { \
if(dyn->insts[ninst].x86.jmp_insts==-1) { \
/* out of the block */ \
Expand Down
6 changes: 3 additions & 3 deletions src/dynarec/dynarec_arm_67.c
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,9 @@ uintptr_t dynarec67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
}
break;

#define GO(NO, YES) \
BARRIER(2); \
JUMP(addr+i8);\
#define GO(NO, YES) \
BARRIER(2); \
JUMP(addr+i8, 1); \
if(dyn->insts) { \
if(dyn->insts[ninst].x86.jmp_insts==-1) { \
/* out of the block */ \
Expand Down
2 changes: 1 addition & 1 deletion src/dynarec/dynarec_arm_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@
((B==SF_SET_PENDING && !(dyn->insts[ninst].x86.need_flags&X_PEND)?SF_SET:B))
#endif
#ifndef JUMP
#define JUMP(A)
#define JUMP(A, C)
#endif
#ifndef BARRIER
#define BARRIER(A)
Expand Down
16 changes: 11 additions & 5 deletions src/dynarec/dynarec_arm_pass0.h
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@

#define INIT uintptr_t sav_addr=addr
#define FINI dyn->isize = addr-sav_addr
#define FINI \
dyn->isize = addr-sav_addr;\
dyn->insts[ninst].x86.addr = addr;\
if(ninst) dyn->insts[ninst-1].x86.size = dyn->insts[ninst].x86.addr - dyn->insts[ninst-1].x86.addr
#define MESSAGE(A, ...)
#define SETFLAGS(A, B)
#define READFLAGS(A)
#define READFLAGS(A) dyn->insts[ninst].x86.use_flags = A
#define SETFLAGS(A,B) {dyn->insts[ninst].x86.set_flags = A; dyn->insts[ninst].x86.state_flags = B;}
#define EMIT(A)
#define JUMP(A) add_next(dyn, (uintptr_t)A); dyn->insts[ninst].x86.jmp = A
#define JUMP(A, C) if((A)>addr) add_next(dyn, (uintptr_t)A); dyn->insts[ninst].x86.jmp = A; dyn->insts[ninst].x86.jmp_cond = C
#define BARRIER(A) dyn->insts[ninst].x86.barrier = A
#define BARRIER_NEXT(A) if(ninst<dyn->size) dyn->insts[ninst+1].x86.barrier = A
#define NEW_INST \
Expand All @@ -14,7 +17,10 @@
dyn->insts = (instruction_arm_t*)realloc(dyn->insts, sizeof(instruction_arm_t)*dyn->cap*2); \
memset(&dyn->insts[dyn->cap], 0, sizeof(instruction_arm_t)*dyn->cap); \
dyn->cap *= 2; \
}
} \
dyn->insts[ninst].x86.addr = ip; \
if(ninst) {dyn->insts[ninst-1].x86.size = dyn->insts[ninst].x86.addr - dyn->insts[ninst-1].x86.addr;}

#define INST_EPILOG
#define INST_NAME(name)
#define DEFAULT \
Expand Down
13 changes: 4 additions & 9 deletions src/dynarec/dynarec_arm_pass1.h
Original file line number Diff line number Diff line change
@@ -1,16 +1,11 @@
#define INIT
#define FINI \
dyn->insts[ninst].x86.addr = addr; \
if(ninst) dyn->insts[ninst-1].x86.size = dyn->insts[ninst].x86.addr - dyn->insts[ninst-1].x86.addr;
#define FINI
#define MESSAGE(A, ...)
#define EMIT(A)
#define READFLAGS(A) dyn->insts[ninst].x86.use_flags = A
#define SETFLAGS(A,B) {dyn->insts[ninst].x86.set_flags = A; dyn->insts[ninst].x86.state_flags = B;}
#define READFLAGS(A)
#define SETFLAGS(A,B)

#define NEW_INST \
dyn->insts[ninst].x86.addr = ip;\
dyn->n.combined1 = dyn->n.combined2 = 0;\
if(ninst) {dyn->insts[ninst-1].x86.size = dyn->insts[ninst].x86.addr - dyn->insts[ninst-1].x86.addr;}
#define NEW_INST dyn->n.combined1 = dyn->n.combined2 = 0

#define INST_EPILOG dyn->insts[ninst].n = dyn->n

Expand Down
1 change: 1 addition & 0 deletions src/dynarec/dynarec_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ typedef struct instruction_x86_s {
int32_t size; // size of the instruction
int barrier; // next instruction is a jump point, so no optim allowed
uintptr_t jmp; // offset to jump to, even if conditionnal (0 if not), no relative offset here
int jmp_cond; // 1 of conditionnal jump
int jmp_insts; // instuction to jump to (-1 if out of the block)
uint32_t use_flags; // 0 or combination of X_?F
uint32_t set_flags; // 0 or combination of X_?F
Expand Down

0 comments on commit 428d59a

Please sign in to comment.