From 16121f24743d92789cf2aa06ca7b6b25a8a1107e Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Tue, 28 Nov 2017 13:07:38 +0000 Subject: [PATCH] Statically allocate JIT temp buffers If we only need a few ~64K element arrays then just allocate those up front and don't worry about dynamically growing things. This is simpler, requires less code, and has no obvious practical downside. --- src/lj_ir.c | 53 ++--------------------------------------------- src/lj_iropt.h | 5 +---- src/lj_jit.h | 2 -- src/lj_opt_loop.c | 8 ------- src/lj_snap.c | 27 ------------------------ src/lj_snap.h | 13 ------------ src/lj_state.c | 14 +++++++++++++ src/lj_trace.c | 3 --- 8 files changed, 17 insertions(+), 108 deletions(-) diff --git a/src/lj_ir.c b/src/lj_ir.c index 0e56de9c1a..ad44ea14a8 100644 --- a/src/lj_ir.c +++ b/src/lj_ir.c @@ -65,49 +65,6 @@ IRCALLDEF(IRCALLCI) /* -- IR emitter ---------------------------------------------------------- */ -/* Grow IR buffer at the top. */ -void lj_ir_growtop(jit_State *J) -{ - IRIns *baseir = J->irbuf + J->irbotlim; - MSize szins = J->irtoplim - J->irbotlim; - if (szins) { - baseir = (IRIns *)lj_mem_realloc(J->L, baseir, szins*sizeof(IRIns), - 2*szins*sizeof(IRIns)); - J->irtoplim = J->irbotlim + 2*szins; - } else { - baseir = (IRIns *)lj_mem_realloc(J->L, NULL, 0, LJ_MIN_IRSZ*sizeof(IRIns)); - J->irbotlim = REF_BASE - LJ_MIN_IRSZ/4; - J->irtoplim = J->irbotlim + LJ_MIN_IRSZ; - } - J->cur.ir = J->irbuf = baseir - J->irbotlim; -} - -/* Grow IR buffer at the bottom or shift it up. */ -static void lj_ir_growbot(jit_State *J) -{ - IRIns *baseir = J->irbuf + J->irbotlim; - MSize szins = J->irtoplim - J->irbotlim; - lua_assert(szins != 0); - lua_assert(J->cur.nk == J->irbotlim || J->cur.nk-1 == J->irbotlim); - if (J->cur.nins + (szins >> 1) < J->irtoplim) { - /* More than half of the buffer is free on top: shift up by a quarter. */ - MSize ofs = szins >> 2; - memmove(baseir + ofs, baseir, (J->cur.nins - J->irbotlim)*sizeof(IRIns)); - J->irbotlim -= ofs; - J->irtoplim -= ofs; - J->cur.ir = J->irbuf = baseir - J->irbotlim; - } else { - /* Double the buffer size, but split the growth amongst top/bottom. */ - IRIns *newbase = lj_mem_newt(J->L, 2*szins*sizeof(IRIns), IRIns); - MSize ofs = szins >= 256 ? 128 : (szins >> 1); /* Limit bottom growth. */ - memcpy(newbase + ofs, baseir, (J->cur.nins - J->irbotlim)*sizeof(IRIns)); - lj_mem_free(G(J->L), baseir, szins*sizeof(IRIns)); - J->irbotlim -= ofs; - J->irtoplim = J->irbotlim + 2*szins; - J->cur.ir = J->irbuf = newbase - J->irbotlim; - } -} - /* Emit IR without any optimizations. */ TRef lj_ir_emit(jit_State *J) { @@ -161,25 +118,19 @@ LJ_FUNC TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs) ** comparisons. The same constant must get the same reference. */ -/* Get ref of next IR constant and optionally grow IR. -** Note: this may invalidate all IRIns *! -*/ +/* Get ref of next IR constant. */ static LJ_AINLINE IRRef ir_nextk(jit_State *J) { IRRef ref = J->cur.nk; - if (LJ_UNLIKELY(ref <= J->irbotlim)) lj_ir_growbot(J); J->cur.nk = --ref; return ref; } -/* Get ref of next 64 bit IR constant and optionally grow IR. -** Note: this may invalidate all IRIns *! -*/ +/* Get ref of next 64 bit IR constant. */ static LJ_AINLINE IRRef ir_nextk64(jit_State *J) { IRRef ref = J->cur.nk - 2; lua_assert(J->state != LJ_TRACE_ASM); - if (LJ_UNLIKELY(ref < J->irbotlim)) lj_ir_growbot(J); J->cur.nk = ref; return ref; } diff --git a/src/lj_iropt.h b/src/lj_iropt.h index 214fb1a2c4..1305541607 100644 --- a/src/lj_iropt.h +++ b/src/lj_iropt.h @@ -24,13 +24,10 @@ static LJ_AINLINE void lj_ir_set_(jit_State *J, uint16_t ot, IRRef1 a, IRRef1 b) #define lj_ir_set(J, ot, a, b) \ lj_ir_set_(J, (uint16_t)(ot), (IRRef1)(a), (IRRef1)(b)) -/* Get ref of next IR instruction and optionally grow IR. -** Note: this may invalidate all IRIns*! -*/ +/* Get ref of next IR instruction. */ static LJ_AINLINE IRRef lj_ir_nextins(jit_State *J) { IRRef ref = J->cur.nins; - if (LJ_UNLIKELY(ref >= J->irtoplim)) lj_ir_growtop(J); J->cur.nins = ref + 1; return ref; } diff --git a/src/lj_jit.h b/src/lj_jit.h index b3408e9bb2..75f5804b01 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -333,8 +333,6 @@ typedef struct jit_State { uint32_t k32[LJ_K32__MAX]; /* Ditto for 4 byte constants. */ IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */ - IRRef irtoplim; /* Upper limit of instuction buffer (biased). */ - IRRef irbotlim; /* Lower limit of instuction buffer (biased). */ IRRef loopref; /* Last loop reference or ref of final LOOP (or 0). */ MSize sizesnap; /* Size of temp. snapshot buffer. */ diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c index 697089ab2e..6bfd541eff 100644 --- a/src/lj_opt_loop.c +++ b/src/lj_opt_loop.c @@ -283,15 +283,7 @@ static void loop_unroll(LoopState *lps) /* LOOP separates the pre-roll from the loop body. */ emitir_raw(IRTG(IR_LOOP, IRT_NIL), 0, 0); - /* Grow snapshot buffer and map for copy-substituted snapshots. - ** Need up to twice the number of snapshots minus #0 and loop snapshot. - ** Need up to twice the number of entries plus fallback substitutions - ** from the loop snapshot entries for each new snapshot. - ** Caveat: both calls may reallocate J->cur.snap and J->cur.snapmap! - */ onsnap = J->cur.nsnap; - lj_snap_grow_buf(J, 2*onsnap-2); - lj_snap_grow_map(J, J->cur.nsnapmap*2+(onsnap-2)*J->cur.snap[onsnap-1].nent); /* The loop snapshot is used for fallback substitutions. */ loopsnap = &J->cur.snap[onsnap-1]; diff --git a/src/lj_snap.c b/src/lj_snap.c index 4b28bb9004..0390853684 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c @@ -29,31 +29,6 @@ /* Emit raw IR without passing through optimizations. */ #define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J)) -/* -- Snapshot buffer allocation ------------------------------------------ */ - -/* Grow snapshot buffer. */ -void lj_snap_grow_buf_(jit_State *J, MSize need) -{ - MSize maxsnap = (MSize)J->param[JIT_P_maxsnap]; - if (need > maxsnap) - lj_trace_err(J, LJ_TRERR_SNAPOV); - lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot); - J->cur.snap = J->snapbuf; -} - -/* Grow snapshot map buffer. */ -void lj_snap_grow_map_(jit_State *J, MSize need) -{ - if (need < 2*J->sizesnapmap) - need = 2*J->sizesnapmap; - else if (need < 64) - need = 64; - J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf, - J->sizesnapmap*sizeof(SnapEntry), need*sizeof(SnapEntry)); - J->cur.snapmap = J->snapmapbuf; - J->sizesnapmap = need; -} - /* -- Snapshot generation ------------------------------------------------- */ /* Add all modified slots to the snapshot. */ @@ -130,7 +105,6 @@ static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap) MSize nent; SnapEntry *p; /* Conservative estimate. */ - lj_snap_grow_map(J, nsnapmap + nslots + (MSize)(LJ_FR2?2:J->framedepth+1)); p = &J->cur.snapmap[nsnapmap]; nent = snapshot_slots(J, p, nslots); snap->nent = (uint8_t)nent; @@ -157,7 +131,6 @@ void lj_snap_add(jit_State *J) nsnapmap = J->cur.snap[--nsnap].mapofs; } else { nomerge: - lj_snap_grow_buf(J, nsnap+1); J->cur.nsnap = (uint16_t)(nsnap+1); } J->mergesnap = 0; diff --git a/src/lj_snap.h b/src/lj_snap.h index 509742ea74..11c8b669e2 100644 --- a/src/lj_snap.h +++ b/src/lj_snap.h @@ -15,18 +15,5 @@ LJ_FUNC void lj_snap_shrink(jit_State *J); LJ_FUNC IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir); LJ_FUNC void lj_snap_replay(jit_State *J, GCtrace *T); LJ_FUNC const BCIns *lj_snap_restore(jit_State *J, void *exptr); -LJ_FUNC void lj_snap_grow_buf_(jit_State *J, MSize need); -LJ_FUNC void lj_snap_grow_map_(jit_State *J, MSize need); - -static LJ_AINLINE void lj_snap_grow_buf(jit_State *J, MSize need) -{ - if (LJ_UNLIKELY(need > J->sizesnap)) lj_snap_grow_buf_(J, need); -} - -static LJ_AINLINE void lj_snap_grow_map(jit_State *J, MSize need) -{ - if (LJ_UNLIKELY(need > J->sizesnapmap)) lj_snap_grow_map_(J, need); -} - #endif diff --git a/src/lj_state.c b/src/lj_state.c index 600d56f493..f91f2c41dd 100644 --- a/src/lj_state.c +++ b/src/lj_state.c @@ -158,6 +158,7 @@ static TValue *cpluaopen(lua_State *L, lua_CFunction dummy, void *ud) static void close_state(lua_State *L) { global_State *g = G(L); + jit_State *J = L2J(L); lj_func_closeuv(L, tvref(L->stack)); lj_gc_freeall(g); lua_assert(gcref(g->gc.root) == obj2gco(L)); @@ -167,6 +168,9 @@ static void close_state(lua_State *L) lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef); lj_buf_free(g, &g->tmpbuf); lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); + lj_mem_free(g, J->snapmapbuf, J->sizesnapmap); + lj_mem_free(g, J->snapbuf, J->sizesnap); + lj_mem_free(g, J->irbuf-REF_BIAS, 65536*sizeof(IRIns)); lua_assert(g->gc.total == sizeof(GG_State)); #ifndef LUAJIT_USE_SYSMALLOC if (g->allocf == lj_alloc_f) @@ -181,6 +185,7 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) GG_State *GG = (GG_State *)f(ud, NULL, 0, sizeof(GG_State)); lua_State *L = &GG->L; global_State *g = &GG->g; + jit_State *J = &GG->J; if (GG == NULL || !checkptrGC(GG)) return NULL; memset(GG, 0, sizeof(GG_State)); L->gct = ~LJ_TTHREAD; @@ -206,6 +211,15 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) g->gc.total = sizeof(GG_State); g->gc.pause = LUAI_GCPAUSE; g->gc.stepmul = LUAI_GCMUL; + /* Statically allocate generous JIT scratch buffers. */ + J->sizesnap = sizeof(SnapShot)*65536; + J->sizesnapmap = sizeof(SnapEntry)*65536; + J->snapbuf = (SnapShot *)lj_mem_new(L, J->sizesnap); + J->snapmapbuf = (SnapEntry *)lj_mem_new(L, J->sizesnapmap); + IRIns *irbufmem = (IRIns *)lj_mem_new(L, sizeof(IRIns)*65536); + if (irbufmem == NULL || J->snapbuf == NULL || J->snapmapbuf == NULL) + return NULL; + J->irbuf = irbufmem + REF_BIAS; lj_dispatch_init((GG_State *)L); L->status = LUA_ERRERR+1; /* Avoid touching the stack upon memory error. */ if (lj_vm_cpcall(L, NULL, NULL, cpluaopen) != 0) { diff --git a/src/lj_trace.c b/src/lj_trace.c index 3bca11ceed..ade66b27d1 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -292,9 +292,6 @@ void lj_trace_freestate(global_State *g) } #endif lj_mcode_free(J); - lj_mem_freevec(g, J->snapmapbuf, J->sizesnapmap, SnapEntry); - lj_mem_freevec(g, J->snapbuf, J->sizesnap, SnapShot); - lj_mem_freevec(g, J->irbuf + J->irbotlim, J->irtoplim - J->irbotlim, IRIns); lj_mem_freevec(g, J->trace, J->sizetrace, GCRef); }