From 428d59a1b0237f88fdc2da15ac3783d4e9b2ddb6 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Mon, 1 Nov 2021 17:52:49 +0100 Subject: [PATCH] [DYNAREC] More changes to flags optimisations --- src/dynarec/dynarec_arm.c | 57 +++++++++++++++++++++++++++++--- src/dynarec/dynarec_arm_00.c | 10 +++--- src/dynarec/dynarec_arm_0f.c | 8 ++--- src/dynarec/dynarec_arm_67.c | 6 ++-- src/dynarec/dynarec_arm_helper.h | 2 +- src/dynarec/dynarec_arm_pass0.h | 16 ++++++--- src/dynarec/dynarec_arm_pass1.h | 13 +++----- src/dynarec/dynarec_private.h | 1 + 8 files changed, 82 insertions(+), 31 deletions(-) diff --git a/src/dynarec/dynarec_arm.c b/src/dynarec/dynarec_arm.c index 270bbcdc76..c40fed34dd 100755 --- a/src/dynarec/dynarec_arm.c +++ b/src/dynarec/dynarec_arm.c @@ -291,6 +291,22 @@ dynarec_log(LOG_DEBUG, "Asked to Fill block %p with %p\n", block, (void*)addr); // already protect the block and compute hash signature protectDB(addr, end-addr); //end is 1byte after actual end uint32_t hash = X31_hash_code((void*)addr, end-addr); + // Compute flag_need, without taking into account any barriers + uint32_t last_need = X_PEND; + for(int i = helper.size; i-- > 0;) { + last_need |= helper.insts[i].x86.use_flags; + if (last_need == (X_PEND | X_ALL)) { + last_need = X_ALL; + } + helper.insts[i].x86.need_flags = last_need; + if ((helper.insts[i].x86.set_flags) && !(helper.insts[i].x86.state_flags & SF_MAYSET)) { + if (last_need & X_PEND) { + last_need = (~helper.insts[i].x86.set_flags) & X_ALL; + } else { + last_need &= ~helper.insts[i].x86.set_flags; + } + } + } // calculate barriers for(int i=0; i=0; --i) { - if(helper.insts[i].x86.barrier) helper.insts[i].x86.use_flags |= X_PEND; if(helper.insts[i].x86.barrier==3) if(helper.insts[i].x86.jmp_insts == -1) { if(i==helper.size-1 || helper.insts[i+1].x86.barrier) @@ -321,9 +336,40 @@ dynarec_log(LOG_DEBUG, "Asked to Fill block %p with %p\n", block, (void*)addr); } else helper.insts[i].x86.barrier=2; } - // pass 1, flags - arm_pass1(&helper, addr); - uint32_t last_need = X_PEND; + // check to remove useless barrier, in case of jump when destination doesn't needs flags + for(int i=helper.size-1; i>=0; --i) { + if(helper.insts[i].x86.jmp + && helper.insts[i].x86.jmp_insts>=0 + && helper.insts[helper.insts[i].x86.jmp_insts].x86.barrier==1) { + int k = helper.insts[i].x86.jmp_insts; + //TODO: optimize FPU barrier too + if((!helper.insts[k].x86.need_flags) + ||(helper.insts[k].x86.set_flags==X_ALL + && helper.insts[k].x86.state_flags==SF_SET) + ||(helper.insts[k].x86.state_flags==SF_SET_PENDING)) { + //if(box86_dynarec_dump) dynarec_log(LOG_NONE, "Removed barrier for inst %d\n", k); + helper.insts[k].x86.barrier = 4; // remove barrier (keep FPU barrier, and still reset state flag) + } + } + } + // reset need_flags and compute again, now taking barrier into account (because barrier change use_flags) + for(int i = helper.size; i-- > 0;) { + if(helper.insts[i].x86.barrier==1) + // immediate barrier + helper.insts[i].x86.use_flags |= X_PEND; + else if(helper.insts[i].x86.jmp + && helper.insts[i].x86.jmp_insts>=0 + ) { + if(helper.insts[helper.insts[i].x86.jmp_insts].x86.barrier==1) + // jumpto barrier + helper.insts[i].x86.use_flags |= X_PEND; + else + helper.insts[i].x86.use_flags |= helper.insts[helper.insts[i].x86.jmp_insts].x86.need_flags; + } + } + for(int i = helper.size; i-- > 0;) + helper.insts[i].x86.need_flags = 0; + last_need = X_PEND; for(int i = helper.size; i-- > 0;) { helper.insts[i].x86.need_flags = last_need; if ((helper.insts[i].x86.set_flags) && !(helper.insts[i].x86.state_flags & SF_MAYSET)) { @@ -338,6 +384,9 @@ dynarec_log(LOG_DEBUG, "Asked to Fill block %p with %p\n", block, (void*)addr); last_need = X_ALL; } } + + // pass 1, float optimisations + arm_pass1(&helper, addr); // pass 2, instruction size arm_pass2(&helper, addr); diff --git a/src/dynarec/dynarec_arm_00.c b/src/dynarec/dynarec_arm_00.c index 40dac45cb6..6097220639 100755 --- a/src/dynarec/dynarec_arm_00.c +++ b/src/dynarec/dynarec_arm_00.c @@ -694,7 +694,7 @@ uintptr_t dynarec00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, READFLAGS(F|(dyn->insts[ninst].x86.barrier?0:X_PEND)); \ i8 = F8S; \ BARRIER(3); \ - JUMP(addr+i8); \ + JUMP(addr+i8, 1); \ GETFLAGS; \ if(dyn->insts) { \ if(dyn->insts[ninst].x86.jmp_insts==-1) { \ @@ -1993,9 +1993,9 @@ uintptr_t dynarec00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, case 0xDF: addr = dynarecDF(dyn, addr, ip, ninst, ok, need_epilog); break; - #define GO(NO, YES) \ - BARRIER(2); \ - JUMP(addr+i8);\ + #define GO(NO, YES) \ + BARRIER(2); \ + JUMP(addr+i8, 1); \ if(dyn->insts) { \ if(dyn->insts[ninst].x86.jmp_insts==-1) { \ /* out of the block */ \ @@ -2150,7 +2150,7 @@ uintptr_t dynarec00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("JMP Ib"); i32 = F8S; } - JUMP(addr+i32); + JUMP(addr+i32, 0); if(dyn->insts) { PASS2IF(dyn->insts[ninst].x86.jmp_insts==-1, 1) { // out of the block diff --git a/src/dynarec/dynarec_arm_0f.c b/src/dynarec/dynarec_arm_0f.c index cd647548b9..a5b1325d79 100755 --- a/src/dynarec/dynarec_arm_0f.c +++ b/src/dynarec/dynarec_arm_0f.c @@ -1131,10 +1131,10 @@ uintptr_t dynarec0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, #define GO(GETFLAGS, NO, YES, F) \ READFLAGS(F|(dyn->insts[ninst].x86.barrier?0:X_PEND)); \ - i32_ = F32S; \ - BARRIER(3); \ - JUMP(addr+i32_);\ - GETFLAGS; \ + i32_ = F32S; \ + BARRIER(3); \ + JUMP(addr+i32_, 1); \ + GETFLAGS; \ if(dyn->insts) { \ if(dyn->insts[ninst].x86.jmp_insts==-1) { \ /* out of the block */ \ diff --git a/src/dynarec/dynarec_arm_67.c b/src/dynarec/dynarec_arm_67.c index a40eb4205f..db0774fe6b 100755 --- a/src/dynarec/dynarec_arm_67.c +++ b/src/dynarec/dynarec_arm_67.c @@ -132,9 +132,9 @@ uintptr_t dynarec67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, } break; - #define GO(NO, YES) \ - BARRIER(2); \ - JUMP(addr+i8);\ + #define GO(NO, YES) \ + BARRIER(2); \ + JUMP(addr+i8, 1); \ if(dyn->insts) { \ if(dyn->insts[ninst].x86.jmp_insts==-1) { \ /* out of the block */ \ diff --git a/src/dynarec/dynarec_arm_helper.h b/src/dynarec/dynarec_arm_helper.h index b26f7a9b76..f1cbcc7ef4 100755 --- a/src/dynarec/dynarec_arm_helper.h +++ b/src/dynarec/dynarec_arm_helper.h @@ -312,7 +312,7 @@ ((B==SF_SET_PENDING && !(dyn->insts[ninst].x86.need_flags&X_PEND)?SF_SET:B)) #endif #ifndef JUMP -#define JUMP(A) +#define JUMP(A, C) #endif #ifndef BARRIER #define BARRIER(A) diff --git a/src/dynarec/dynarec_arm_pass0.h b/src/dynarec/dynarec_arm_pass0.h index a9ad0bf86d..fce81f1b03 100755 --- a/src/dynarec/dynarec_arm_pass0.h +++ b/src/dynarec/dynarec_arm_pass0.h @@ -1,11 +1,14 @@ #define INIT uintptr_t sav_addr=addr -#define FINI dyn->isize = addr-sav_addr +#define FINI \ + dyn->isize = addr-sav_addr;\ + dyn->insts[ninst].x86.addr = addr;\ + if(ninst) dyn->insts[ninst-1].x86.size = dyn->insts[ninst].x86.addr - dyn->insts[ninst-1].x86.addr #define MESSAGE(A, ...) -#define SETFLAGS(A, B) -#define READFLAGS(A) +#define READFLAGS(A) dyn->insts[ninst].x86.use_flags = A +#define SETFLAGS(A,B) {dyn->insts[ninst].x86.set_flags = A; dyn->insts[ninst].x86.state_flags = B;} #define EMIT(A) -#define JUMP(A) add_next(dyn, (uintptr_t)A); dyn->insts[ninst].x86.jmp = A +#define JUMP(A, C) if((A)>addr) add_next(dyn, (uintptr_t)A); dyn->insts[ninst].x86.jmp = A; dyn->insts[ninst].x86.jmp_cond = C #define BARRIER(A) dyn->insts[ninst].x86.barrier = A #define BARRIER_NEXT(A) if(ninstsize) dyn->insts[ninst+1].x86.barrier = A #define NEW_INST \ @@ -14,7 +17,10 @@ dyn->insts = (instruction_arm_t*)realloc(dyn->insts, sizeof(instruction_arm_t)*dyn->cap*2); \ memset(&dyn->insts[dyn->cap], 0, sizeof(instruction_arm_t)*dyn->cap); \ dyn->cap *= 2; \ - } + } \ + dyn->insts[ninst].x86.addr = ip; \ + if(ninst) {dyn->insts[ninst-1].x86.size = dyn->insts[ninst].x86.addr - dyn->insts[ninst-1].x86.addr;} + #define INST_EPILOG #define INST_NAME(name) #define DEFAULT \ diff --git a/src/dynarec/dynarec_arm_pass1.h b/src/dynarec/dynarec_arm_pass1.h index 3dbc0c69af..fbd02e5601 100755 --- a/src/dynarec/dynarec_arm_pass1.h +++ b/src/dynarec/dynarec_arm_pass1.h @@ -1,16 +1,11 @@ #define INIT -#define FINI \ - dyn->insts[ninst].x86.addr = addr; \ - if(ninst) dyn->insts[ninst-1].x86.size = dyn->insts[ninst].x86.addr - dyn->insts[ninst-1].x86.addr; +#define FINI #define MESSAGE(A, ...) #define EMIT(A) -#define READFLAGS(A) dyn->insts[ninst].x86.use_flags = A -#define SETFLAGS(A,B) {dyn->insts[ninst].x86.set_flags = A; dyn->insts[ninst].x86.state_flags = B;} +#define READFLAGS(A) +#define SETFLAGS(A,B) -#define NEW_INST \ - dyn->insts[ninst].x86.addr = ip;\ - dyn->n.combined1 = dyn->n.combined2 = 0;\ - if(ninst) {dyn->insts[ninst-1].x86.size = dyn->insts[ninst].x86.addr - dyn->insts[ninst-1].x86.addr;} +#define NEW_INST dyn->n.combined1 = dyn->n.combined2 = 0 #define INST_EPILOG dyn->insts[ninst].n = dyn->n diff --git a/src/dynarec/dynarec_private.h b/src/dynarec/dynarec_private.h index 42df4e3059..8c779ec1d9 100755 --- a/src/dynarec/dynarec_private.h +++ b/src/dynarec/dynarec_private.h @@ -24,6 +24,7 @@ typedef struct instruction_x86_s { int32_t size; // size of the instruction int barrier; // next instruction is a jump point, so no optim allowed uintptr_t jmp; // offset to jump to, even if conditionnal (0 if not), no relative offset here + int jmp_cond; // 1 of conditionnal jump int jmp_insts; // instuction to jump to (-1 if out of the block) uint32_t use_flags; // 0 or combination of X_?F uint32_t set_flags; // 0 or combination of X_?F