From 6abbb8b4a296574c88be1b8e520f3a90b8a583d3 Mon Sep 17 00:00:00 2001 From: YAMAMOTO Takashi Date: Sat, 5 Aug 2023 13:17:23 +0900 Subject: [PATCH] TOYWASM_USE_LOCALS_FAST_PATH --- lib/cell.c | 66 +++++++++++++++++++++-------------------- lib/exec.c | 40 +++++++++++++++++-------- lib/exec_context.h | 22 ++++++++++---- lib/toywasm_config.c.in | 1 + lib/toywasm_config.h.in | 1 + 5 files changed, 79 insertions(+), 51 deletions(-) diff --git a/lib/cell.c b/lib/cell.c index 5f4c25ac..48dfea95 100644 --- a/lib/cell.c +++ b/lib/cell.c @@ -59,13 +59,7 @@ static uint32_t localcellidx_lookup(const struct localcellidx *lci, uint32_t idx, uint32_t *cszp) { - const uint16_t *p = &lci->cellidxes[idx]; - uint16_t cidx = *p; - if (cszp != NULL) { - uint16_t next_cidx = p[1]; - *cszp = next_cidx - cidx; - } - return cidx; + return cellidx_lookup(lci->cellidxes, idx, cszp); } #endif @@ -158,66 +152,74 @@ localtype_cellsize(const struct localtype *lt) return localtype_cellidx(lt, lt->nlocals, NULL); } -/* - * frame_locals_cellidx: calculate the index and size of a local - * for the given localidx - * - * as this is called on every `local.get`, it is one of - * the most performance critical code in the interpreter. - */ -uint32_t +#if defined(TOYWASM_USE_SMALL_CELLS) +static uint32_t frame_locals_cellidx_slow(struct exec_context *ctx, uint32_t localidx, uint32_t *cszp) { xassert(cszp != NULL); -#if defined(TOYWASM_USE_SMALL_CELLS) + struct local_info_slow *slow = &ctx->local_u.slow; uint32_t cidx; - uint32_t nparams = ctx->nparams; + uint32_t nparams = slow->paramtype->ntypes; if (localidx < nparams) { - cidx = resulttype_cellidx(ctx->paramtype, localidx, cszp); + cidx = resulttype_cellidx(slow->paramtype, localidx, cszp); } else { - assert(localidx < nparams + ctx->localtype->nlocals); - cidx = ctx->paramcsz; - cidx += localtype_cellidx(ctx->localtype, localidx - nparams, + assert(localidx < nparams + slow->localtype->nlocals); + cidx = resulttype_cellsize(slow->paramtype); + cidx += localtype_cellidx(slow->localtype, localidx - nparams, cszp); } return cidx; -#else - *cszp = 1; - return localidx; -#endif } -uint32_t +#if defined(TOYWASM_USE_LOCALS_FAST_PATH) +static uint32_t frame_locals_cellidx_fast(struct exec_context *ctx, uint32_t localidx, uint32_t *cszp) { xassert(cszp != NULL); - xassert(ctx->paramtype_cellidxes != NULL); - xassert(ctx->localtype_cellidxes != NULL); + const struct local_info_fast *fast = &ctx->local_u.fast; + xassert(fast->paramtype_cellidxes != NULL); + xassert(fast->localtype_cellidxes != NULL); uint32_t cidx; - uint32_t nparams = ctx->nparams; + uint32_t nparams = fast->nparams; if (localidx < nparams) { - cidx = cellidx_lookup(ctx->paramtype_cellidxes, localidx, + cidx = cellidx_lookup(fast->paramtype_cellidxes, localidx, cszp); } else { assert(localidx < nparams + ctx->localtype->nlocals); - cidx = ctx->paramcsz; - cidx += cellidx_lookup(ctx->localtype_cellidxes, + cidx = fast->paramcsz; + cidx += cellidx_lookup(fast->localtype_cellidxes, localidx - nparams, cszp); } return cidx; } +#endif +#endif /* defined(TOYWASM_USE_SMALL_CELLS) */ +/* + * frame_locals_cellidx: calculate the index and size of a local + * for the given localidx + * + * as this is called on every `local.get`, it is one of + * the most performance critical code in the interpreter. + */ uint32_t frame_locals_cellidx(struct exec_context *ctx, uint32_t localidx, uint32_t *cszp) { xassert(cszp != NULL); +#if defined(TOYWASM_USE_SMALL_CELLS) +#if defined(TOYWASM_USE_LOCALS_FAST_PATH) if (__predict_true(ctx->fast)) { return frame_locals_cellidx_fast(ctx, localidx, cszp); } +#endif return frame_locals_cellidx_slow(ctx, localidx, cszp); +#else /* defined(TOYWASM_USE_SMALL_CELLS) */ + *cszp = 1; + return localidx; +#endif /* defined(TOYWASM_USE_SMALL_CELLS) */ } void diff --git a/lib/exec.c b/lib/exec.c index 19d4ce14..262acefa 100644 --- a/lib/exec.c +++ b/lib/exec.c @@ -256,29 +256,43 @@ set_current_frame(struct exec_context *ctx, const struct funcframe *frame, * * Note: such exprs do never have function calls. * (thus ei != NULL) + * + * Note: such exprs do never have parameters or locals. + * (thus no need to set ctx->fast or ctx->local_u) */ assert(ei != NULL); - ctx->paramtype = NULL; - ctx->localtype = NULL; - ctx->nparams = 0; - ctx->paramcsz = 0; - ctx->fast = false; ctx->ei = ei; } else { const struct module *m = inst->module; const struct functype *ft = module_functype(m, funcidx); const struct func *func = &m->funcs[funcidx - m->nimportedfuncs]; - ctx->paramtype = &ft->parameter; assert(frame->nresults == resulttype_cellsize(&ft->result)); - ctx->localtype = &func->localtype; assert(ei == NULL || ei == &func->e.ei); - ctx->nparams = ft->parameter.ntypes; - ctx->paramcsz = resulttype_cellsize(&ft->parameter); - ctx->fast = ctx->paramtype->cellidx.cellidxes != NULL && - ctx->localtype->cellidx.cellidxes != NULL; - ctx->paramtype_cellidxes = ctx->paramtype->cellidx.cellidxes; - ctx->localtype_cellidxes = ctx->localtype->cellidx.cellidxes; +#if defined(TOYWASM_USE_LOCALS_FAST_PATH) + const uint16_t *paramtype_cellidxes = + ft->parameter.cellidx.cellidxes; + const uint16_t *localtype_cellidxes = + func->localtype.cellidx.cellidxes; + /* + * if we have both of indexes, use the fast path. + */ + ctx->fast = paramtype_cellidxes != NULL && + localtype_cellidxes != NULL; + if (ctx->fast) { + struct local_info_fast *fast = &ctx->local_u.fast; + fast->nparams = ft->parameter.ntypes; + fast->paramcsz = resulttype_cellsize(&ft->parameter); + fast->paramtype_cellidxes = paramtype_cellidxes; + fast->localtype_cellidxes = localtype_cellidxes; + } else { +#endif + struct local_info_slow *slow = &ctx->local_u.slow; + slow->paramtype = &ft->parameter; + slow->localtype = &func->localtype; +#if defined(TOYWASM_USE_LOCALS_FAST_PATH) + } +#endif ctx->ei = &func->e.ei; } #if defined(TOYWASM_USE_LOCALS_CACHE) diff --git a/lib/exec_context.h b/lib/exec_context.h index 7bcfb953..d77515d6 100644 --- a/lib/exec_context.h +++ b/lib/exec_context.h @@ -146,14 +146,24 @@ struct context; struct exec_context { /* Some cached info about the current frame. */ struct instance *instance; - const struct resulttype *paramtype; - const struct localtype *localtype; const struct expr_exec_info *ei; - uint32_t nparams; - uint32_t paramcsz; - const uint16_t *paramtype_cellidxes; - const uint16_t *localtype_cellidxes; +#if defined(TOYWASM_USE_LOCALS_FAST_PATH) bool fast; +#endif + union { +#if defined(TOYWASM_USE_LOCALS_FAST_PATH) + struct local_info_fast { + const uint16_t *paramtype_cellidxes; + const uint16_t *localtype_cellidxes; + uint32_t nparams; + uint32_t paramcsz; + } fast; +#endif + struct local_info_slow { + const struct resulttype *paramtype; + const struct localtype *localtype; + } slow; + } local_u; /* The instruction pointer */ const uint8_t *p; diff --git a/lib/toywasm_config.c.in b/lib/toywasm_config.c.in index 67c2efb8..3efb85ae 100644 --- a/lib/toywasm_config.c.in +++ b/lib/toywasm_config.c.in @@ -10,6 +10,7 @@ const char *const toywasm_config_string = "\tTOYWASM_USE_JUMP_BINARY_SEARCH = @TOYWASM_USE_JUMP_BINARY_SEARCH@\n" "\tTOYWASM_USE_JUMP_CACHE = @TOYWASM_USE_JUMP_CACHE@\n" "\tTOYWASM_JUMP_CACHE2_SIZE = @TOYWASM_JUMP_CACHE2_SIZE@\n" +"\tTOYWASM_USE_LOCALS_FAST_PATH = @TOYWASM_USE_LOCALS_FAST_PATH@\n" "\tTOYWASM_USE_LOCALS_CACHE = @TOYWASM_USE_LOCALS_CACHE@\n" "\tTOYWASM_USE_SEPARATE_LOCALS = @TOYWASM_USE_SEPARATE_LOCALS@\n" "\tTOYWASM_USE_SMALL_CELLS = @TOYWASM_USE_SMALL_CELLS@\n" diff --git a/lib/toywasm_config.h.in b/lib/toywasm_config.h.in index f61e2602..4a4d1a4c 100644 --- a/lib/toywasm_config.h.in +++ b/lib/toywasm_config.h.in @@ -12,6 +12,7 @@ #cmakedefine TOYWASM_USE_JUMP_BINARY_SEARCH #cmakedefine TOYWASM_USE_JUMP_CACHE #define TOYWASM_JUMP_CACHE2_SIZE @TOYWASM_JUMP_CACHE2_SIZE@ +#cmakedefine TOYWASM_USE_LOCALS_FAST_PATH #cmakedefine TOYWASM_USE_LOCALS_CACHE #cmakedefine TOYWASM_USE_SEPARATE_LOCALS #cmakedefine TOYWASM_USE_SMALL_CELLS