diff --git a/src/ifdef-defile b/src/ifdef-defile new file mode 100644 index 0000000000..c1e8faf734 --- /dev/null +++ b/src/ifdef-defile @@ -0,0 +1,125 @@ +// Config file for the 'unifdef' program to strip out unwanted source code. + +#define LJ_GC64 1 +#define LJ_HASFFI 1 +#define LJ_HASJIT 1 +#define LUAJIT_DISABLE_PROFILE 1 +#define LUAJIT_DISABLE_VMEVENT 1 +#define LUAJIT_ENABLE_GC64 1 +#define LUAJIT_ENABLE_JIT 1 +#undef LJ_HASPROFILE +#undef LUAJIT_DISABLE_FFI +#undef LUAJIT_DISABLE_JIT +#undef LUAJIT_NOFFI +#undef LUAJIT_USE_GDBJIT + +#define LJ_64 1 +#define LJ_ARCH_BITS 64 +#define LJ_ARCH_ENDIAN == LUAJIT_LE +#define LJ_FR2 1 +#define LJ_NUMMODE LJ_NUMMODE_SINGLE_DUAL +#define LJ_NUMMODE_DUAL 2 +#define LJ_NUMMODE_DUAL_SINGLE 3 +#define LJ_NUMMODE_SINGLE 0 +#define LJ_NUMMODE_SINGLE_DUAL 1 +#define LJ_TARGET_DLOPEN 1 +#define LJ_TARGET_GC64 1 +#define LJ_TARGET_LINUX 1 +#define LJ_TARGET_POSIX 1 +#define LJ_TARGET_X64 1 +#define LJ_TARGET_X86ORX64 1 +#define LUAJIT_ARCH_ARM 3 +#define LUAJIT_ARCH_ARM64 4 +#define LUAJIT_ARCH_MIPS 6 +#define LUAJIT_ARCH_MIPS32 6 +#define LUAJIT_ARCH_MIPS64 7 +#define LUAJIT_ARCH_PPC 5 +#define LUAJIT_ARCH_X64 2 +#define LUAJIT_ARCH_X86 1 +#define LUAJIT_ARCH_arm 3 +#define LUAJIT_ARCH_arm64 4 +#define LUAJIT_ARCH_mips 6 +#define LUAJIT_ARCH_mips32 6 +#define LUAJIT_ARCH_mips64 7 +#define LUAJIT_ARCH_ppc 5 +#define LUAJIT_ARCH_x64 2 +#define LUAJIT_ARCH_x86 1 +#define LUAJIT_LE 1 +#define LUAJIT_OS LUAJIT_OS_LINUX +#define LUAJIT_OS_LINUX 2 +#define LUAJIT_OS_WINDOWS 3 +#define LUAJIT_TARGET LUAJIT_ARCH_X64 +#define LUAJIT_TARGET_POSIX 1 +#define __linux__ +#undef JL_TARGET_PPC +#undef LJ_32 +#undef LJ_ABI_EABI +#undef LJ_ABI_SOFTFP +#undef LJ_ABI_WIN +#undef LJ_ALLOC_NTAVM +#undef LJ_ALLOC_VIRTUALALLOC +#undef LJ_ARCH_NOFFI +#undef LJ_ARCH_PPC +#undef LJ_BE +#undef LJ_NOUNWIND +#undef LJ_OS_NOJIT +#undef LJ_SOFTFP +#undef LJ_TARGET_ARM +#undef LJ_TARGET_ARM64 +#undef LJ_TARGET_CONSOLE +#undef LJ_TARGET_CYGWIN +#undef LJ_TARGET_IOS +#undef LJ_TARGET_MIPS +#undef LJ_TARGET_MIPS64 +#undef LJ_TARGET_PPC +#undef LJ_TARGET_PS3 +#undef LJ_TARGET_PS4 +#undef LJ_TARGET_PSVITA +#undef LJ_TARGET_WINDOWS +#undef LJ_TARGET_WINDOWS +#undef LJ_TARGET_X86 +#undef LJ_TARGET_XBOX360 +#undef LJ_TARGET_XBOX360 +#undef LUAJIT_ARCH_MIPS32 +#undef LUAJIT_ARCH_X86 +#undef LUAJIT_BE +#undef MINGW_SDK_INIT +#undef _ARCH_PPCSQ +#undef _ARCH_PWR4 +#undef _ARCH_PWR5 +#undef _ARCH_PWR5X +#undef _ARCH_PWR6 +#undef _ARCH_PWR7 +#undef _DURANGO +#undef _MIPSEL +#undef _MSC_VER +#undef _XBOX_VER +#undef __ANDROID__ +#undef __ARM_ARCH_6T2__ +#undef __ARM_ARCH_6T2__ +#undef __ARM_ARCH_6Z__ +#undef __ARM_ARCH_6__ +#undef __ARM_ARCH_7A__ +#undef __ARM_ARCH_7R__ +#undef __ARM_ARCH_7S__ +#undef __ARM_ARCH_7VE__ +#undef __ARM_ARCH_7__ +#undef __ARM_ARCH_8A__ +#undef __ARM_ARCH____ARM_ARCH_8__ +#undef __ARM_ARH_6J__ +#undef __ARM_PCS_VFP +#undef __CELLOS_LV2__ +#undef __FreeBSD__ +#undef __FreeBSD_kernel__ +#undef __MIPSEL +#undef __MIPSEL__ +#undef __ORBIS__ +#undef __OpenBSD__ +#undef __arm__ +#undef __i386__ +#undef __mips_soft_float +#undef __psp2__ +#undef __sun__ +#undef __symbian__ +#undef __symbian__ +#undef ljamalg_c diff --git a/src/lib_aux.c b/src/lib_aux.c index 4e137949ec..01df53fdae 100644 --- a/src/lib_aux.c +++ b/src/lib_aux.c @@ -22,9 +22,7 @@ #include "lj_trace.h" #include "lj_lib.h" -#if LJ_TARGET_POSIX #include -#endif /* -- I/O error handling -------------------------------------------------- */ @@ -49,7 +47,6 @@ LUALIB_API int luaL_fileresult(lua_State *L, int stat, const char *fname) LUALIB_API int luaL_execresult(lua_State *L, int stat) { if (stat != -1) { -#if LJ_TARGET_POSIX if (WIFSIGNALED(stat)) { stat = WTERMSIG(stat); setnilV(L->top++); @@ -63,13 +60,6 @@ LUALIB_API int luaL_execresult(lua_State *L, int stat) setnilV(L->top++); lua_pushliteral(L, "exit"); } -#else - if (stat == 0) - setboolV(L->top++, 1); - else - setnilV(L->top++); - lua_pushliteral(L, "exit"); -#endif setintV(L->top++, stat); return 3; } @@ -302,9 +292,6 @@ static int panic(lua_State *L) #ifdef LUAJIT_USE_SYSMALLOC -#if LJ_64 && !LJ_GC64 && !defined(LUAJIT_USE_VALGRIND) -#error "Must use builtin allocator for 64 bit target" -#endif static void *mem_alloc(void *ud, void *ptr, size_t osize, size_t nsize) { @@ -334,23 +321,11 @@ LUALIB_API lua_State *luaL_newstate(void) lua_State *L; void *ud = lj_alloc_create(); if (ud == NULL) return NULL; -#if LJ_64 && !LJ_GC64 - L = lj_state_newstate(lj_alloc_f, ud); -#else L = lua_newstate(lj_alloc_f, ud); -#endif if (L) G(L)->panic = panic; return L; } -#if LJ_64 && !LJ_GC64 -LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) -{ - UNUSED(f); UNUSED(ud); - fputs("Must use luaL_newstate() for 64 bit target\n", stderr); - return NULL; -} -#endif #endif diff --git a/src/lib_base.c b/src/lib_base.c index 44014f9b67..3281412c96 100644 --- a/src/lib_base.c +++ b/src/lib_base.c @@ -24,10 +24,8 @@ #include "lj_meta.h" #include "lj_state.h" #include "lj_frame.h" -#if LJ_HASFFI #include "lj_ctype.h" #include "lj_cconv.h" -#endif #include "lj_bc.h" #include "lj_ff.h" #include "lj_dispatch.h" @@ -81,7 +79,6 @@ LJLIB_ASM(next) return FFH_UNREACHABLE; } -#if LJ_52 || LJ_HASFFI static int ffh_pairs(lua_State *L, MMS mm) { TValue *o = lj_lib_checkany(L, 1); @@ -98,9 +95,6 @@ static int ffh_pairs(lua_State *L, MMS mm) return FFH_RES(3); } } -#else -#define ffh_pairs(L, mm) (lj_lib_checktab(L, 1), FFH_UNREACHABLE) -#endif LJLIB_PUSH(lastcl) LJLIB_ASM(pairs) LJLIB_REC(xpairs 0) @@ -265,7 +259,6 @@ LJLIB_ASM(tonumber) LJLIB_REC(.) copyTV(L, L->base-1-LJ_FR2, o); return FFH_RES(1); } -#if LJ_HASFFI if (tviscdata(o)) { CTState *cts = ctype_cts(L); CType *ct = lj_ctype_rawref(cts, cdataV(o)->ctypeid); @@ -283,7 +276,6 @@ LJLIB_ASM(tonumber) LJLIB_REC(.) return FFH_RES(1); } } -#endif } else { const char *p = strdata(lj_lib_checkstr(L, 1)); char *ep; @@ -608,10 +600,8 @@ LJLIB_NOREG LJLIB_ASM(coroutine_wrap_aux) /* Inline declarations. */ LJ_ASMF void lj_ff_coroutine_wrap_aux(void); -#if !(LJ_TARGET_MIPS && defined(ljamalg_c)) LJ_FUNCA_NORET void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L, lua_State *co); -#endif /* Error handler, called from assembler VM. */ void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L, lua_State *co) diff --git a/src/lib_bit.c b/src/lib_bit.c index c979a44839..36f98acc7d 100644 --- a/src/lib_bit.c +++ b/src/lib_bit.c @@ -15,12 +15,10 @@ #include "lj_buf.h" #include "lj_strscan.h" #include "lj_strfmt.h" -#if LJ_HASFFI #include "lj_ctype.h" #include "lj_cdata.h" #include "lj_cconv.h" #include "lj_carith.h" -#endif #include "lj_ff.h" #include "lj_lib.h" @@ -28,7 +26,6 @@ #define LJLIB_MODULE_bit -#if LJ_HASFFI static int bit_result64(lua_State *L, CTypeID id, uint64_t x) { GCcdata *cd = lj_cdata_new_(L, id, 8); @@ -36,61 +33,30 @@ static int bit_result64(lua_State *L, CTypeID id, uint64_t x) setcdataV(L, L->base-1-LJ_FR2, cd); return FFH_RES(1); } -#else -static int32_t bit_checkbit(lua_State *L, int narg) -{ - TValue *o = L->base + narg-1; - if (!(o < L->top && lj_strscan_numberobj(o))) - lj_err_argt(L, narg, LUA_TNUMBER); - if (LJ_LIKELY(tvisint(o))) { - return intV(o); - } else { - int32_t i = lj_num2bit(numV(o)); - if (LJ_DUALNUM) setintV(o, i); - return i; - } -} -#endif LJLIB_ASM(bit_tobit) LJLIB_REC(bit_tobit) { -#if LJ_HASFFI CTypeID id = 0; setintV(L->base-1-LJ_FR2, (int32_t)lj_carith_check64(L, 1, &id)); return FFH_RES(1); -#else - lj_lib_checknumber(L, 1); - return FFH_RETRY; -#endif } LJLIB_ASM(bit_bnot) LJLIB_REC(bit_unary IR_BNOT) { -#if LJ_HASFFI CTypeID id = 0; uint64_t x = lj_carith_check64(L, 1, &id); return id ? bit_result64(L, id, ~x) : FFH_RETRY; -#else - lj_lib_checknumber(L, 1); - return FFH_RETRY; -#endif } LJLIB_ASM(bit_bswap) LJLIB_REC(bit_unary IR_BSWAP) { -#if LJ_HASFFI CTypeID id = 0; uint64_t x = lj_carith_check64(L, 1, &id); return id ? bit_result64(L, id, lj_bswap64(x)) : FFH_RETRY; -#else - lj_lib_checknumber(L, 1); - return FFH_RETRY; -#endif } LJLIB_ASM(bit_lshift) LJLIB_REC(bit_shift IR_BSHL) { -#if LJ_HASFFI CTypeID id = 0, id2 = 0; uint64_t x = lj_carith_check64(L, 1, &id); int32_t sh = (int32_t)lj_carith_check64(L, 2, &id2); @@ -100,11 +66,6 @@ LJLIB_ASM(bit_lshift) LJLIB_REC(bit_shift IR_BSHL) } if (id2) setintV(L->base+1, sh); return FFH_RETRY; -#else - lj_lib_checknumber(L, 1); - bit_checkbit(L, 2); - return FFH_RETRY; -#endif } LJLIB_ASM_(bit_rshift) LJLIB_REC(bit_shift IR_BSHR) LJLIB_ASM_(bit_arshift) LJLIB_REC(bit_shift IR_BSAR) @@ -113,7 +74,6 @@ LJLIB_ASM_(bit_ror) LJLIB_REC(bit_shift IR_BROR) LJLIB_ASM(bit_band) LJLIB_REC(bit_nary IR_BAND) { -#if LJ_HASFFI CTypeID id = 0; TValue *o = L->base, *top = L->top; int i = 0; @@ -131,11 +91,6 @@ LJLIB_ASM(bit_band) LJLIB_REC(bit_nary IR_BAND) return bit_result64(L, id, y); } return FFH_RETRY; -#else - int i = 0; - do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top); - return FFH_RETRY; -#endif } LJLIB_ASM_(bit_bor) LJLIB_REC(bit_nary IR_BOR) LJLIB_ASM_(bit_bxor) LJLIB_REC(bit_nary IR_BXOR) @@ -144,24 +99,15 @@ LJLIB_ASM_(bit_bxor) LJLIB_REC(bit_nary IR_BXOR) LJLIB_CF(bit_tohex) LJLIB_REC(.) { -#if LJ_HASFFI CTypeID id = 0, id2 = 0; uint64_t b = lj_carith_check64(L, 1, &id); int32_t n = L->base+1>=L->top ? (id ? 16 : 8) : (int32_t)lj_carith_check64(L, 2, &id2); -#else - uint32_t b = (uint32_t)bit_checkbit(L, 1); - int32_t n = L->base+1>=L->top ? 8 : bit_checkbit(L, 2); -#endif SBuf *sb = lj_buf_tmp_(L); SFormat sf = (STRFMT_UINT|STRFMT_T_HEX); if (n < 0) { n = -n; sf |= STRFMT_F_UPPER; } sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC); -#if LJ_HASFFI if (n < 16) b &= ((uint64_t)1 << 4*n)-1; -#else - if (n < 8) b &= (1u << 4*n)-1; -#endif sb = lj_strfmt_putfxint(sb, sf, b); setstrV(L, L->top-1, lj_buf_str(L, sb)); lj_gc_check(L); diff --git a/src/lib_ffi.c b/src/lib_ffi.c index 136e98e896..ea9cb05d43 100644 --- a/src/lib_ffi.c +++ b/src/lib_ffi.c @@ -14,7 +14,6 @@ #include "lj_obj.h" -#if LJ_HASFFI #include "lj_gc.h" #include "lj_err.h" @@ -728,29 +727,13 @@ LJLIB_CF(ffi_abi) LJLIB_REC(.) GCstr *s = lj_lib_checkstr(L, 1); int b = 0; switch (s->hash) { -#if LJ_64 case H_(849858eb,ad35fd06): b = 1; break; /* 64bit */ -#else - case H_(662d3c79,d0e22477): b = 1; break; /* 32bit */ -#endif #if LJ_ARCH_HASFPU case H_(e33ee463,e33ee463): b = 1; break; /* fpu */ #endif -#if LJ_ABI_SOFTFP - case H_(61211a23,c2e8c81c): b = 1; break; /* softfp */ -#else case H_(539417a8,8ce0812f): b = 1; break; /* hardfp */ -#endif -#if LJ_ABI_EABI - case H_(2182df8f,f2ed1152): b = 1; break; /* eabi */ -#endif -#if LJ_ABI_WIN - case H_(4ab624a8,4ab624a8): b = 1; break; /* win */ -#endif case H_(3af93066,1f001464): b = 1; break; /* le/be */ -#if LJ_GC64 case H_(9e89d2c9,13c83c92): b = 1; break; /* gc64 */ -#endif default: break; } @@ -866,4 +849,3 @@ LUALIB_API int luaopen_ffi(lua_State *L) return 1; } -#endif diff --git a/src/lib_init.c b/src/lib_init.c index 2ed370e967..8fbb351c12 100644 --- a/src/lib_init.c +++ b/src/lib_init.c @@ -30,9 +30,7 @@ static const luaL_Reg lj_lib_load[] = { }; static const luaL_Reg lj_lib_preload[] = { -#if LJ_HASFFI { LUA_FFILIBNAME, luaopen_ffi }, -#endif { NULL, NULL } }; diff --git a/src/lib_io.c b/src/lib_io.c index 9763ed466f..75de25f773 100644 --- a/src/lib_io.c +++ b/src/lib_io.c @@ -97,14 +97,7 @@ static int io_file_close(lua_State *L, IOFileUD *iof) ok = (fclose(iof->fp) == 0); } else if ((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_PIPE) { int stat = -1; -#if LJ_TARGET_POSIX stat = pclose(iof->fp); -#elif LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE - stat = _pclose(iof->fp); -#else - lua_assert(0); - return 0; -#endif #if LJ_52 iof->fp = NULL; return luaL_execresult(L, stat); @@ -325,26 +318,10 @@ LJLIB_CF(io_method_seek) else if (!tvisnil(o)) lj_err_argt(L, 3, LUA_TNUMBER); } -#if LJ_TARGET_POSIX res = fseeko(fp, ofs, opt); -#elif _MSC_VER >= 1400 - res = _fseeki64(fp, ofs, opt); -#elif defined(__MINGW32__) - res = fseeko64(fp, ofs, opt); -#else - res = fseek(fp, (long)ofs, opt); -#endif if (res) return luaL_fileresult(L, 0, NULL); -#if LJ_TARGET_POSIX ofs = ftello(fp); -#elif _MSC_VER >= 1400 - ofs = _ftelli64(fp); -#elif defined(__MINGW32__) - ofs = ftello64(fp); -#else - ofs = (int64_t)ftell(fp); -#endif setint64V(L->top-1, ofs); return 1; } @@ -406,32 +383,20 @@ LJLIB_CF(io_open) LJLIB_CF(io_popen) { -#if LJ_TARGET_POSIX || (LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE) const char *fname = strdata(lj_lib_checkstr(L, 1)); GCstr *s = lj_lib_optstr(L, 2); const char *mode = s ? strdata(s) : "r"; IOFileUD *iof = io_file_new(L); iof->type = IOFILE_TYPE_PIPE; -#if LJ_TARGET_POSIX fflush(NULL); iof->fp = popen(fname, mode); -#else - iof->fp = _popen(fname, mode); -#endif return iof->fp != NULL ? 1 : luaL_fileresult(L, 0, fname); -#else - return luaL_error(L, LUA_QL("popen") " not supported"); -#endif } LJLIB_CF(io_tmpfile) { IOFileUD *iof = io_file_new(L); -#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PSVITA - iof->fp = NULL; errno = ENOSYS; -#else iof->fp = tmpfile(); -#endif return iof->fp != NULL ? 1 : luaL_fileresult(L, 0, NULL); } diff --git a/src/lib_jit.c b/src/lib_jit.c index 22ca0a1a24..3d3dd0cbbb 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c @@ -18,16 +18,12 @@ #include "lj_tab.h" #include "lj_state.h" #include "lj_bc.h" -#if LJ_HASFFI #include "lj_ctype.h" -#endif -#if LJ_HASJIT #include "lj_ir.h" #include "lj_jit.h" #include "lj_ircall.h" #include "lj_iropt.h" #include "lj_target.h" -#endif #include "lj_trace.h" #include "lj_dispatch.h" #include "lj_vm.h" @@ -77,17 +73,14 @@ LJLIB_CF(jit_off) LJLIB_CF(jit_flush) { -#if LJ_HASJIT if (L->base < L->top && tvisnumber(L->base)) { int traceno = lj_lib_checkint(L, 1); luaJIT_setmode(L, traceno, LUAJIT_MODE_FLUSH|LUAJIT_MODE_TRACE); return 0; } -#endif return setjitmode(L, LUAJIT_MODE_FLUSH); } -#if LJ_HASJIT /* Push a string for every flag bit that is set. */ static void flagbits_to_strings(lua_State *L, uint32_t flags, uint32_t base, const char *str) @@ -96,48 +89,20 @@ static void flagbits_to_strings(lua_State *L, uint32_t flags, uint32_t base, if (flags & base) setstrV(L, L->top++, lj_str_new(L, str+1, *(uint8_t *)str)); } -#endif LJLIB_CF(jit_status) { -#if LJ_HASJIT jit_State *J = L2J(L); L->top = L->base; setboolV(L->top++, (J->flags & JIT_F_ON) ? 1 : 0); flagbits_to_strings(L, J->flags, JIT_F_CPU_FIRST, JIT_F_CPUSTRING); flagbits_to_strings(L, J->flags, JIT_F_OPT_FIRST, JIT_F_OPTSTRING); return (int)(L->top - L->base); -#else - setboolV(L->top++, 0); - return 1; -#endif } LJLIB_CF(jit_attach) { -#ifdef LUAJIT_DISABLE_VMEVENT luaL_error(L, "vmevent API disabled"); -#else - GCfunc *fn = lj_lib_checkfunc(L, 1); - GCstr *s = lj_lib_optstr(L, 2); - luaL_findtable(L, LUA_REGISTRYINDEX, LJ_VMEVENTS_REGKEY, LJ_VMEVENTS_HSIZE); - if (s) { /* Attach to given event. */ - const uint8_t *p = (const uint8_t *)strdata(s); - uint32_t h = s->len; - while (*p) h = h ^ (lj_rol(h, 6) + *p++); - lua_pushvalue(L, 1); - lua_rawseti(L, -2, VMEVENT_HASHIDX(h)); - G(L)->vmevmask = VMEVENT_NOCACHE; /* Invalidate cache. */ - } else { /* Detach if no event given. */ - setnilV(L->top++); - while (lua_next(L, -2)) { - L->top--; - if (tvisfunc(L->top) && funcV(L->top) == fn) { - setnilV(lj_tab_set(L, tabV(L->top-2), L->top-1)); - } - } - } -#endif return 0; } @@ -270,7 +235,6 @@ LJLIB_CF(jit_util_funcuvname) /* -- Reflection API for traces ------------------------------------------- */ -#if LJ_HASJIT /* Check trace argument. Must not throw for non-existent trace numbers. */ static GCtrace *jit_checktrace(lua_State *L) @@ -338,13 +302,11 @@ LJLIB_CF(jit_util_tracek) slot = ir->op2; ir = &T->ir[ir->op1]; } -#if LJ_HASFFI if (ir->o == IR_KINT64 && !ctype_ctsG(G(L))) { ptrdiff_t oldtop = savestack(L, L->top); luaopen_ffi(L); /* Load FFI library on-demand. */ L->top = restorestack(L, oldtop); } -#endif lj_ir_kvalue(L, L->top-2, ir); setintV(L->top-1, (int32_t)irt_type(ir->t)); if (slot == -1) @@ -425,7 +387,6 @@ LJLIB_CF(jit_util_ircalladdr) return 0; } -#endif #include "lj_libdef.h" @@ -437,7 +398,6 @@ static int luaopen_jit_util(lua_State *L) /* -- jit.opt module ------------------------------------------------------ */ -#if LJ_HASJIT #define LJLIB_MODULE_jit_opt @@ -530,109 +490,12 @@ LJLIB_CF(jit_opt_start) #include "lj_libdef.h" -#endif /* -- jit.profile module -------------------------------------------------- */ -#if LJ_HASPROFILE - -#define LJLIB_MODULE_jit_profile - -/* Not loaded by default, use: local profile = require("jit.profile") */ - -static const char KEY_PROFILE_THREAD = 't'; -static const char KEY_PROFILE_FUNC = 'f'; - -static void jit_profile_callback(lua_State *L2, lua_State *L, int samples, - int vmstate) -{ - TValue key; - cTValue *tv; - setlightudV(&key, (void *)&KEY_PROFILE_FUNC); - tv = lj_tab_get(L, tabV(registry(L)), &key); - if (tvisfunc(tv)) { - char vmst = (char)vmstate; - int status; - setfuncV(L2, L2->top++, funcV(tv)); - setthreadV(L2, L2->top++, L); - setintV(L2->top++, samples); - setstrV(L2, L2->top++, lj_str_new(L2, &vmst, 1)); - status = lua_pcall(L2, 3, 0, 0); /* callback(thread, samples, vmstate) */ - if (status) { - if (G(L2)->panic) G(L2)->panic(L2); - exit(EXIT_FAILURE); - } - lj_trace_abort(G(L2)); - } -} - -/* profile.start(mode, cb) */ -LJLIB_CF(jit_profile_start) -{ - GCtab *registry = tabV(registry(L)); - GCstr *mode = lj_lib_optstr(L, 1); - GCfunc *func = lj_lib_checkfunc(L, 2); - lua_State *L2 = lua_newthread(L); /* Thread that runs profiler callback. */ - TValue key; - /* Anchor thread and function in registry. */ - setlightudV(&key, (void *)&KEY_PROFILE_THREAD); - setthreadV(L, lj_tab_set(L, registry, &key), L2); - setlightudV(&key, (void *)&KEY_PROFILE_FUNC); - setfuncV(L, lj_tab_set(L, registry, &key), func); - lj_gc_anybarriert(L, registry); - luaJIT_profile_start(L, mode ? strdata(mode) : "", - (luaJIT_profile_callback)jit_profile_callback, L2); - return 0; -} - -/* profile.stop() */ -LJLIB_CF(jit_profile_stop) -{ - GCtab *registry; - TValue key; - luaJIT_profile_stop(L); - registry = tabV(registry(L)); - setlightudV(&key, (void *)&KEY_PROFILE_THREAD); - setnilV(lj_tab_set(L, registry, &key)); - setlightudV(&key, (void *)&KEY_PROFILE_FUNC); - setnilV(lj_tab_set(L, registry, &key)); - lj_gc_anybarriert(L, registry); - return 0; -} - -/* dump = profile.dumpstack([thread,] fmt, depth) */ -LJLIB_CF(jit_profile_dumpstack) -{ - lua_State *L2 = L; - int arg = 0; - size_t len; - int depth; - GCstr *fmt; - const char *p; - if (L->top > L->base && tvisthread(L->base)) { - L2 = threadV(L->base); - arg = 1; - } - fmt = lj_lib_checkstr(L, arg+1); - depth = lj_lib_checkint(L, arg+2); - p = luaJIT_profile_dumpstack(L2, strdata(fmt), depth, &len); - lua_pushlstring(L, p, len); - return 1; -} - -#include "lj_libdef.h" - -static int luaopen_jit_profile(lua_State *L) -{ - LJ_LIB_REG(L, NULL, jit_profile); - return 1; -} - -#endif /* -- JIT compiler initialization ----------------------------------------- */ -#if LJ_HASJIT /* Default values for JIT parameters. */ static const int32_t jit_param_default[JIT_P__MAX+1] = { #define JIT_PARAMINIT(len, name, value) (value), @@ -640,25 +503,16 @@ JIT_PARAMDEF(JIT_PARAMINIT) #undef JIT_PARAMINIT 0 }; -#endif -#if LJ_TARGET_ARM && LJ_TARGET_LINUX -#include -#endif /* Arch-dependent CPU detection. */ static uint32_t jit_cpudetect(lua_State *L) { uint32_t flags = 0; -#if LJ_TARGET_X86ORX64 uint32_t vendor[4]; uint32_t features[4]; if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) { -#if !LJ_HASJIT -#define JIT_F_SSE2 2 -#endif flags |= ((features[3] >> 26)&1) * JIT_F_SSE2; -#if LJ_HASJIT flags |= ((features[2] >> 0)&1) * JIT_F_SSE3; flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1; if (vendor[2] == 0x6c65746e) { /* Intel. */ @@ -674,67 +528,8 @@ static uint32_t jit_cpudetect(lua_State *L) lj_vm_cpuid(7, xfeatures); flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2; } -#endif } /* Check for required instruction set support on x86 (unnecessary on x64). */ -#if LJ_TARGET_X86 - if (!(flags & JIT_F_SSE2)) - luaL_error(L, "CPU with SSE2 required"); -#endif -#elif LJ_TARGET_ARM -#if LJ_HASJIT - int ver = LJ_ARCH_VERSION; /* Compile-time ARM CPU detection. */ -#if LJ_TARGET_LINUX - if (ver < 70) { /* Runtime ARM CPU detection. */ - struct utsname ut; - uname(&ut); - if (strncmp(ut.machine, "armv", 4) == 0) { - if (ut.machine[4] >= '7') - ver = 70; - else if (ut.machine[4] == '6') - ver = 60; - } - } -#endif - flags |= ver >= 70 ? JIT_F_ARMV7 : - ver >= 61 ? JIT_F_ARMV6T2_ : - ver >= 60 ? JIT_F_ARMV6_ : 0; - flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2; -#endif -#elif LJ_TARGET_ARM64 - /* No optional CPU features to detect (for now). */ -#elif LJ_TARGET_PPC -#if LJ_HASJIT -#if LJ_ARCH_SQRT - flags |= JIT_F_SQRT; -#endif -#if LJ_ARCH_ROUND - flags |= JIT_F_ROUND; -#endif -#endif -#elif LJ_TARGET_MIPS -#if LJ_HASJIT - /* Compile-time MIPS CPU detection. */ -#if LJ_ARCH_VERSION >= 20 - flags |= JIT_F_MIPSXXR2; -#endif - /* Runtime MIPS CPU detection. */ -#if defined(__GNUC__) - if (!(flags & JIT_F_MIPSXXR2)) { - int x; -#ifdef __mips16 - x = 0; /* Runtime detection is difficult. Ensure optimal -march flags. */ -#else - /* On MIPS32R1 rotr is treated as srl. rotr r2,r2,1 -> srl r2,r2,1. */ - __asm__("li $2, 1\n\t.long 0x00221042\n\tmove %0, $2" : "=r"(x) : : "$2"); -#endif - if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */ - } -#endif -#endif -#else -#error "Missing CPU detection for this architecture" -#endif UNUSED(L); return flags; } @@ -743,14 +538,10 @@ static uint32_t jit_cpudetect(lua_State *L) static void jit_init(lua_State *L) { uint32_t flags = jit_cpudetect(L); -#if LJ_HASJIT jit_State *J = L2J(L); J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT; memcpy(J->param, jit_param_default, sizeof(J->param)); lj_dispatch_update(G(L)); -#else - UNUSED(flags); -#endif } LUALIB_API int luaopen_jit(lua_State *L) @@ -761,16 +552,10 @@ LUALIB_API int luaopen_jit(lua_State *L) lua_pushinteger(L, LUAJIT_VERSION_NUM); lua_pushliteral(L, LUAJIT_VERSION); LJ_LIB_REG(L, LUA_JITLIBNAME, jit); -#if LJ_HASPROFILE - lj_lib_prereg(L, LUA_JITLIBNAME ".profile", luaopen_jit_profile, - tabref(L->env)); -#endif #ifndef LUAJIT_DISABLE_JITUTIL lj_lib_prereg(L, LUA_JITLIBNAME ".util", luaopen_jit_util, tabref(L->env)); #endif -#if LJ_HASJIT LJ_LIB_REG(L, "jit.opt", jit_opt); -#endif L->top -= 2; return 1; } diff --git a/src/lib_math.c b/src/lib_math.c index 7bb03880bd..6f079fce8f 100644 --- a/src/lib_math.c +++ b/src/lib_math.c @@ -78,11 +78,7 @@ LJLIB_ASM_(math_fmod) LJLIB_ASM(math_ldexp) LJLIB_REC(.) { lj_lib_checknum(L, 1); -#if LJ_DUALNUM && !LJ_TARGET_X86ORX64 - lj_lib_checkint(L, 2); -#else lj_lib_checknum(L, 2); -#endif return FFH_RETRY; } diff --git a/src/lib_os.c b/src/lib_os.c index 9e78d49ac3..43c57bff77 100644 --- a/src/lib_os.c +++ b/src/lib_os.c @@ -23,15 +23,9 @@ #include "lj_str.h" #include "lj_lib.h" -#if LJ_TARGET_POSIX #include -#else -#include -#endif -#if !LJ_TARGET_PSVITA #include -#endif /* ------------------------------------------------------------------------ */ @@ -76,11 +70,6 @@ LJLIB_CF(os_rename) LJLIB_CF(os_tmpname) { -#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PSVITA - lj_err_caller(L, LJ_ERR_OSUNIQF); - return 0; -#else -#if LJ_TARGET_POSIX char buf[15+1]; int fp; strcpy(buf, "/tmp/lua_XXXXXX"); @@ -89,23 +78,13 @@ LJLIB_CF(os_tmpname) close(fp); else lj_err_caller(L, LJ_ERR_OSUNIQF); -#else - char buf[L_tmpnam]; - if (tmpnam(buf) == NULL) - lj_err_caller(L, LJ_ERR_OSUNIQF); -#endif lua_pushstring(L, buf); return 1; -#endif } LJLIB_CF(os_getenv) { -#if LJ_TARGET_CONSOLE - lua_pushnil(L); -#else lua_pushstring(L, getenv(luaL_checkstring(L, 1))); /* if NULL push nil */ -#endif return 1; } @@ -173,22 +152,12 @@ LJLIB_CF(os_date) const char *s = luaL_optstring(L, 1, "%c"); time_t t = luaL_opt(L, (time_t)luaL_checknumber, 2, time(NULL)); struct tm *stm; -#if LJ_TARGET_POSIX struct tm rtm; -#endif if (*s == '!') { /* UTC? */ s++; /* Skip '!' */ -#if LJ_TARGET_POSIX stm = gmtime_r(&t, &rtm); -#else - stm = gmtime(&t); -#endif } else { -#if LJ_TARGET_POSIX stm = localtime_r(&t, &rtm); -#else - stm = localtime(&t); -#endif } if (stm == NULL) { /* Invalid date? */ setnilV(L->top++); @@ -262,9 +231,6 @@ LJLIB_CF(os_difftime) LJLIB_CF(os_setlocale) { -#if LJ_TARGET_PSVITA - lua_pushliteral(L, "C"); -#else GCstr *s = lj_lib_optstr(L, 1); const char *str = s ? strdata(s) : NULL; int opt = lj_lib_checkopt(L, 2, 6, @@ -276,7 +242,6 @@ LJLIB_CF(os_setlocale) else if (opt == 4) opt = LC_MONETARY; else if (opt == 6) opt = LC_ALL; lua_pushstring(L, setlocale(opt, str)); -#endif return 1; } diff --git a/src/lib_package.c b/src/lib_package.c index b7655c6b27..a4d6f90ce2 100644 --- a/src/lib_package.c +++ b/src/lib_package.c @@ -32,7 +32,6 @@ #define SYMPREFIX_CF "luaopen_%s" #define SYMPREFIX_BC "luaJIT_BC_%s" -#if LJ_TARGET_DLOPEN #include @@ -65,120 +64,6 @@ static const char *ll_bcsym(void *lib, const char *sym) return (const char *)dlsym(lib, sym); } -#elif LJ_TARGET_WINDOWS - -#define WIN32_LEAN_AND_MEAN -#include - -#ifndef GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS -#define GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS 4 -#define GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT 2 -BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*); -#endif - -#undef setprogdir - -static void setprogdir(lua_State *L) -{ - char buff[MAX_PATH + 1]; - char *lb; - DWORD nsize = sizeof(buff); - DWORD n = GetModuleFileNameA(NULL, buff, nsize); - if (n == 0 || n == nsize || (lb = strrchr(buff, '\\')) == NULL) { - luaL_error(L, "unable to get ModuleFileName"); - } else { - *lb = '\0'; - luaL_gsub(L, lua_tostring(L, -1), LUA_EXECDIR, buff); - lua_remove(L, -2); /* remove original string */ - } -} - -static void pusherror(lua_State *L) -{ - DWORD error = GetLastError(); -#if LJ_TARGET_XBOXONE - wchar_t wbuffer[128]; - char buffer[128*2]; - if (FormatMessageW(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM, - NULL, error, 0, wbuffer, sizeof(wbuffer)/sizeof(wchar_t), NULL) && - WideCharToMultiByte(CP_ACP, 0, wbuffer, 128, buffer, 128*2, NULL, NULL)) -#else - char buffer[128]; - if (FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM, - NULL, error, 0, buffer, sizeof(buffer), NULL)) -#endif - lua_pushstring(L, buffer); - else - lua_pushfstring(L, "system error %d\n", error); -} - -static void ll_unloadlib(void *lib) -{ - FreeLibrary((HINSTANCE)lib); -} - -static void *ll_load(lua_State *L, const char *path, int gl) -{ - HINSTANCE lib = LoadLibraryExA(path, NULL, 0); - if (lib == NULL) pusherror(L); - UNUSED(gl); - return lib; -} - -static lua_CFunction ll_sym(lua_State *L, void *lib, const char *sym) -{ - lua_CFunction f = (lua_CFunction)GetProcAddress((HINSTANCE)lib, sym); - if (f == NULL) pusherror(L); - return f; -} - -static const char *ll_bcsym(void *lib, const char *sym) -{ - if (lib) { - return (const char *)GetProcAddress((HINSTANCE)lib, sym); - } else { - HINSTANCE h = GetModuleHandleA(NULL); - const char *p = (const char *)GetProcAddress(h, sym); - if (p == NULL && GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, - (const char *)ll_bcsym, &h)) - p = (const char *)GetProcAddress(h, sym); - return p; - } -} - -#else - -#undef PACKAGE_LIB_FAIL -#define PACKAGE_LIB_FAIL "absent" - -#define DLMSG "dynamic libraries not enabled; no support for target OS" - -static void ll_unloadlib(void *lib) -{ - UNUSED(lib); -} - -static void *ll_load(lua_State *L, const char *path, int gl) -{ - UNUSED(path); UNUSED(gl); - lua_pushliteral(L, DLMSG); - return NULL; -} - -static lua_CFunction ll_sym(lua_State *L, void *lib, const char *sym) -{ - UNUSED(lib); UNUSED(sym); - lua_pushliteral(L, DLMSG); - return NULL; -} - -static const char *ll_bcsym(void *lib, const char *sym) -{ - UNUSED(lib); UNUSED(sym); - return NULL; -} - -#endif /* ------------------------------------------------------------------------ */ @@ -534,12 +419,7 @@ static int lj_cf_package_seeall(lua_State *L) static void setpath(lua_State *L, const char *fieldname, const char *envname, const char *def, int noenv) { -#if LJ_TARGET_CONSOLE - const char *path = NULL; - UNUSED(envname); -#else const char *path = getenv(envname); -#endif if (path == NULL || noenv) { lua_pushstring(L, def); } else { diff --git a/src/lj_alloc.c b/src/lj_alloc.c index 95d15d046a..f32b11d0ad 100644 --- a/src/lj_alloc.c +++ b/src/lj_alloc.c @@ -74,18 +74,6 @@ /* Determine system-specific block allocation method. */ -#if LJ_TARGET_WINDOWS - -#define WIN32_LEAN_AND_MEAN -#include - -#define LJ_ALLOC_VIRTUALALLOC 1 - -#if LJ_64 && !LJ_GC64 -#define LJ_ALLOC_NTAVM 1 -#endif - -#else #include /* If this include fails, then rebuild with: -DLUAJIT_USE_SYSMALLOC */ @@ -93,119 +81,18 @@ #define LJ_ALLOC_MMAP 1 -#if LJ_64 #define LJ_ALLOC_MMAP_PROBE 1 -#if LJ_GC64 #define LJ_ALLOC_MBITS 47 /* 128 TB in LJ_GC64 mode. */ -#elif LJ_TARGET_X64 && LJ_HASJIT -/* Due to limitations in the x64 compiler backend. */ -#define LJ_ALLOC_MBITS 31 /* 2 GB on x64 with !LJ_GC64. */ -#else -#define LJ_ALLOC_MBITS 32 /* 4 GB on other archs with !LJ_GC64. */ -#endif -#endif -#if LJ_64 && !LJ_GC64 && defined(MAP_32BIT) -#define LJ_ALLOC_MMAP32 1 -#endif -#if LJ_TARGET_LINUX #define LJ_ALLOC_MREMAP 1 -#endif - -#endif - - -#if LJ_ALLOC_VIRTUALALLOC - -#if LJ_ALLOC_NTAVM -/* Undocumented, but hey, that's what we all love so much about Windows. */ -typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits, - size_t *size, ULONG alloctype, ULONG prot); -static PNTAVM ntavm; - -/* Number of top bits of the lower 32 bits of an address that must be zero. -** Apparently 0 gives us full 64 bit addresses and 1 gives us the lower 2GB. -*/ -#define NTAVM_ZEROBITS 1 - -static void init_mmap(void) -{ - ntavm = (PNTAVM)GetProcAddress(GetModuleHandleA("ntdll.dll"), - "NtAllocateVirtualMemory"); -} -#define INIT_MMAP() init_mmap() - -/* Win64 32 bit MMAP via NtAllocateVirtualMemory. */ -static void *CALL_MMAP(size_t size) -{ - DWORD olderr = GetLastError(); - void *ptr = NULL; - long st = ntavm(INVALID_HANDLE_VALUE, &ptr, NTAVM_ZEROBITS, &size, - MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); - SetLastError(olderr); - return st == 0 ? ptr : MFAIL; -} - -/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ -static void *DIRECT_MMAP(size_t size) -{ - DWORD olderr = GetLastError(); - void *ptr = NULL; - long st = ntavm(INVALID_HANDLE_VALUE, &ptr, NTAVM_ZEROBITS, &size, - MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, PAGE_READWRITE); - SetLastError(olderr); - return st == 0 ? ptr : MFAIL; -} - -#else - -/* Win32 MMAP via VirtualAlloc */ -static void *CALL_MMAP(size_t size) -{ - DWORD olderr = GetLastError(); - void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); - SetLastError(olderr); - return ptr ? ptr : MFAIL; -} -/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ -static void *DIRECT_MMAP(size_t size) -{ - DWORD olderr = GetLastError(); - void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, - PAGE_READWRITE); - SetLastError(olderr); - return ptr ? ptr : MFAIL; -} -#endif -/* This function supports releasing coalesed segments */ -static int CALL_MUNMAP(void *ptr, size_t size) -{ - DWORD olderr = GetLastError(); - MEMORY_BASIC_INFORMATION minfo; - char *cptr = (char *)ptr; - while (size) { - if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0) - return -1; - if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr || - minfo.State != MEM_COMMIT || minfo.RegionSize > size) - return -1; - if (VirtualFree(cptr, 0, MEM_RELEASE) == 0) - return -1; - cptr += minfo.RegionSize; - size -= minfo.RegionSize; - } - SetLastError(olderr); - return 0; -} - -#elif LJ_ALLOC_MMAP +#if LJ_ALLOC_MMAP #define MMAP_PROT (PROT_READ|PROT_WRITE) #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) @@ -299,11 +186,7 @@ static void *mmap_probe(size_t size) #if LJ_ALLOC_MMAP32 -#if defined(__sun__) -#define LJ_ALLOC_MMAP32_START ((uintptr_t)0x1000) -#else #define LJ_ALLOC_MMAP32_START ((uintptr_t)0) -#endif static void *mmap_map32(size_t size) { @@ -343,19 +226,6 @@ static void *CALL_MMAP(size_t size) } #endif -#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 - -#include - -static void init_mmap(void) -{ - struct rlimit rlim; - rlim.rlim_cur = rlim.rlim_max = 0x10000; - setrlimit(RLIMIT_DATA, &rlim); /* Ignore result. May fail later. */ -} -#define INIT_MMAP() init_mmap() - -#endif static int CALL_MUNMAP(void *ptr, size_t size) { @@ -378,12 +248,8 @@ static void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, int flags) #define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv)) #define CALL_MREMAP_NOMOVE 0 #define CALL_MREMAP_MAYMOVE 1 -#if LJ_64 && !LJ_GC64 -#define CALL_MREMAP_MV CALL_MREMAP_NOMOVE -#else #define CALL_MREMAP_MV CALL_MREMAP_MAYMOVE #endif -#endif #endif @@ -569,11 +435,7 @@ typedef struct malloc_state *mstate; (((S) + (DEFAULT_GRANULARITY - SIZE_T_ONE))\ & ~(DEFAULT_GRANULARITY - SIZE_T_ONE)) -#if LJ_TARGET_WINDOWS -#define mmap_align(S) granularity_align(S) -#else #define mmap_align(S) page_align(S) -#endif /* True if segment S holds address A */ #define segment_holds(S, A)\ diff --git a/src/lj_api.c b/src/lj_api.c index d1be3abf57..c35c89ef7a 100644 --- a/src/lj_api.c +++ b/src/lj_api.c @@ -189,19 +189,11 @@ LUA_API int lua_type(lua_State *L, int idx) cTValue *o = index2adr(L, idx); if (tvisnumber(o)) { return LUA_TNUMBER; -#if LJ_64 && !LJ_GC64 - } else if (tvislightud(o)) { - return LUA_TLIGHTUSERDATA; -#endif } else if (o == niltv(L)) { return LUA_TNONE; } else { /* Magic internal/external tag conversion. ORDER LJ_T */ uint32_t t = ~itype(o); -#if LJ_64 int tt = (int)((U64x(75a06,98042110) >> 4*t) & 15u); -#else - int tt = (int)(((t < 8 ? 0x98042110u : 0x75a06u) >> 4*(t&7)) & 15u); -#endif lua_assert(tt != LUA_TNIL || tvisnil(o)); return tt; } @@ -269,10 +261,6 @@ LUA_API int lua_equal(lua_State *L, int idx1, int idx2) return 0; } else if (tvispri(o1)) { return o1 != niltv(L) && o2 != niltv(L); -#if LJ_64 && !LJ_GC64 - } else if (tvislightud(o1)) { - return o1->u64 == o2->u64; -#endif } else if (gcrefeq(o1->gcr, o2->gcr)) { return 1; } else if (!tvistabud(o1)) { @@ -365,11 +353,7 @@ LUA_API lua_Integer lua_tointeger(lua_State *L, int idx) return (lua_Integer)intV(&tmp); n = numV(&tmp); } -#if LJ_64 return (lua_Integer)n; -#else - return lj_num2int(n); -#endif } LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx) @@ -388,11 +372,7 @@ LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx) return (lua_Integer)intV(&tmp); n = numV(&tmp); } -#if LJ_64 return (lua_Integer)n; -#else - return lj_num2int(n); -#endif } LUALIB_API lua_Integer luaL_optinteger(lua_State *L, int idx, lua_Integer def) @@ -413,11 +393,7 @@ LUALIB_API lua_Integer luaL_optinteger(lua_State *L, int idx, lua_Integer def) return (lua_Integer)intV(&tmp); n = numV(&tmp); } -#if LJ_64 return (lua_Integer)n; -#else - return lj_num2int(n); -#endif } LUA_API int lua_toboolean(lua_State *L, int idx) @@ -1017,7 +993,6 @@ LUA_API const char *lua_setupvalue(lua_State *L, int idx, int n) /* -- Calls --------------------------------------------------------------- */ -#if LJ_FR2 static TValue *api_call_base(lua_State *L, int nargs) { TValue *o = L->top, *base = o - nargs; @@ -1026,9 +1001,6 @@ static TValue *api_call_base(lua_State *L, int nargs) setnilV(o); return o+1; } -#else -#define api_call_base(L, nargs) (L->top - (nargs)) -#endif LUA_API void lua_call(lua_State *L, int nargs, int nresults) { @@ -1123,13 +1095,7 @@ LUA_API int lua_yield(lua_State *L, int nresults) setframe_gc(top, obj2gco(L), LJ_TTHREAD); setframe_ftsz(top, ((char *)(top+1)-(char *)L->base)+FRAME_CONT); L->top = L->base = top+1; -#if LJ_TARGET_X64 lj_err_throw(L, LUA_YIELD); -#else - L->cframe = NULL; - L->status = LUA_YIELD; - lj_vm_unwind_c(cf, LUA_YIELD); -#endif } } lj_err_msg(L, LJ_ERR_CYIELD); diff --git a/src/lj_arch.h b/src/lj_arch.h index 9bf6f481b4..56572f917e 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -39,66 +39,11 @@ #define LUAJIT_OS_POSIX 5 /* Select native target if no target defined. */ -#ifndef LUAJIT_TARGET - -#if defined(__i386) || defined(__i386__) || defined(_M_IX86) -#define LUAJIT_TARGET LUAJIT_ARCH_X86 -#elif defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) -#define LUAJIT_TARGET LUAJIT_ARCH_X64 -#elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM) -#define LUAJIT_TARGET LUAJIT_ARCH_ARM -#elif defined(__aarch64__) -#define LUAJIT_TARGET LUAJIT_ARCH_ARM64 -#elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC) -#define LUAJIT_TARGET LUAJIT_ARCH_PPC -#elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) || defined(__MIPS64) -#define LUAJIT_TARGET LUAJIT_ARCH_MIPS64 -#elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS) -#define LUAJIT_TARGET LUAJIT_ARCH_MIPS32 -#else -#error "No support for this architecture (yet)" -#endif - -#endif /* Select native OS if no target OS defined. */ -#ifndef LUAJIT_OS - -#if defined(_WIN32) && !defined(_XBOX_VER) -#define LUAJIT_OS LUAJIT_OS_WINDOWS -#elif defined(__linux__) -#define LUAJIT_OS LUAJIT_OS_LINUX -#elif defined(__MACH__) && defined(__APPLE__) -#define LUAJIT_OS LUAJIT_OS_OSX -#elif (defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \ - defined(__NetBSD__) || defined(__OpenBSD__) || \ - defined(__DragonFly__)) && !defined(__ORBIS__) -#define LUAJIT_OS LUAJIT_OS_BSD -#elif (defined(__sun__) && defined(__svr4__)) -#define LUAJIT_OS LUAJIT_OS_POSIX -#elif defined(__CYGWIN__) -#define LJ_TARGET_CYGWIN 1 -#define LUAJIT_OS LUAJIT_OS_POSIX -#else -#define LUAJIT_OS LUAJIT_OS_OTHER -#endif - -#endif /* Set target OS properties. */ -#if LUAJIT_OS == LUAJIT_OS_WINDOWS -#define LJ_OS_NAME "Windows" -#elif LUAJIT_OS == LUAJIT_OS_LINUX #define LJ_OS_NAME "Linux" -#elif LUAJIT_OS == LUAJIT_OS_OSX -#define LJ_OS_NAME "OSX" -#elif LUAJIT_OS == LUAJIT_OS_BSD -#define LJ_OS_NAME "BSD" -#elif LUAJIT_OS == LUAJIT_OS_POSIX -#define LJ_OS_NAME "POSIX" -#else -#define LJ_OS_NAME "Other" -#endif #define LJ_TARGET_WINDOWS (LUAJIT_OS == LUAJIT_OS_WINDOWS) #define LJ_TARGET_LINUX (LUAJIT_OS == LUAJIT_OS_LINUX) @@ -107,33 +52,10 @@ #define LJ_TARGET_POSIX (LUAJIT_OS > LUAJIT_OS_WINDOWS) #define LJ_TARGET_DLOPEN LJ_TARGET_POSIX -#ifdef __CELLOS_LV2__ -#define LJ_TARGET_PS3 1 -#define LJ_TARGET_CONSOLE 1 -#endif -#ifdef __ORBIS__ -#define LJ_TARGET_PS4 1 -#define LJ_TARGET_CONSOLE 1 -#undef NULL -#define NULL ((void*)0) -#endif -#ifdef __psp2__ -#define LJ_TARGET_PSVITA 1 -#define LJ_TARGET_CONSOLE 1 -#endif -#if _XBOX_VER >= 200 -#define LJ_TARGET_XBOX360 1 -#define LJ_TARGET_CONSOLE 1 -#endif -#ifdef _DURANGO -#define LJ_TARGET_XBOXONE 1 -#define LJ_TARGET_CONSOLE 1 -#define LJ_TARGET_GC64 1 -#endif #define LJ_NUMMODE_SINGLE 0 /* Single-number mode only. */ #define LJ_NUMMODE_SINGLE_DUAL 1 /* Default to single-number mode. */ @@ -141,34 +63,11 @@ #define LJ_NUMMODE_DUAL_SINGLE 3 /* Default to dual-number mode. */ /* Set target architecture properties. */ -#if LUAJIT_TARGET == LUAJIT_ARCH_X86 - -#define LJ_ARCH_NAME "x86" -#define LJ_ARCH_BITS 32 -#define LJ_ARCH_ENDIAN LUAJIT_LE -#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN -#define LJ_ABI_WIN 1 -#else -#define LJ_ABI_WIN 0 -#endif -#define LJ_TARGET_X86 1 -#define LJ_TARGET_X86ORX64 1 -#define LJ_TARGET_EHRETREG 0 -#define LJ_TARGET_MASKSHIFT 1 -#define LJ_TARGET_MASKROT 1 -#define LJ_TARGET_UNALIGNED 1 -#define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE_DUAL - -#elif LUAJIT_TARGET == LUAJIT_ARCH_X64 #define LJ_ARCH_NAME "x64" #define LJ_ARCH_BITS 64 #define LJ_ARCH_ENDIAN LUAJIT_LE -#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN -#define LJ_ABI_WIN 1 -#else #define LJ_ABI_WIN 0 -#endif #define LJ_TARGET_X64 1 #define LJ_TARGET_X86ORX64 1 #define LJ_TARGET_EHRETREG 0 @@ -177,185 +76,8 @@ #define LJ_TARGET_MASKROT 1 #define LJ_TARGET_UNALIGNED 1 #define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE_DUAL -#ifdef LUAJIT_ENABLE_GC64 #define LJ_TARGET_GC64 1 -#endif - -#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM -#define LJ_ARCH_NAME "arm" -#define LJ_ARCH_BITS 32 -#define LJ_ARCH_ENDIAN LUAJIT_LE -#if !defined(LJ_ARCH_HASFPU) && __SOFTFP__ -#define LJ_ARCH_HASFPU 0 -#endif -#if !defined(LJ_ABI_SOFTFP) && !__ARM_PCS_VFP -#define LJ_ABI_SOFTFP 1 -#endif -#define LJ_ABI_EABI 1 -#define LJ_TARGET_ARM 1 -#define LJ_TARGET_EHRETREG 0 -#define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */ -#define LJ_TARGET_MASKSHIFT 0 -#define LJ_TARGET_MASKROT 1 -#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ -#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL - -#if __ARM_ARCH____ARM_ARCH_8__ || __ARM_ARCH_8A__ -#define LJ_ARCH_VERSION 80 -#elif __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ -#define LJ_ARCH_VERSION 70 -#elif __ARM_ARCH_6T2__ -#define LJ_ARCH_VERSION 61 -#elif __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6K__ || __ARM_ARCH_6Z__ || __ARM_ARCH_6ZK__ -#define LJ_ARCH_VERSION 60 -#else -#define LJ_ARCH_VERSION 50 -#endif - -#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64 - -#define LJ_ARCH_NAME "arm64" -#define LJ_ARCH_BITS 64 -#define LJ_ARCH_ENDIAN LUAJIT_LE -#define LJ_TARGET_ARM64 1 -#define LJ_TARGET_EHRETREG 0 -#define LJ_TARGET_JUMPRANGE 27 /* +-2^27 = +-128MB */ -#define LJ_TARGET_MASKSHIFT 1 -#define LJ_TARGET_MASKROT 1 -#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ -#define LJ_TARGET_GC64 1 -#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL - -#define LJ_ARCH_VERSION 80 - -#elif LUAJIT_TARGET == LUAJIT_ARCH_PPC - -#ifndef LJ_ARCH_ENDIAN -#if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ -#define LJ_ARCH_ENDIAN LUAJIT_LE -#else -#define LJ_ARCH_ENDIAN LUAJIT_BE -#endif -#endif - -#if _LP64 -#define LJ_ARCH_BITS 64 -#if LJ_ARCH_ENDIAN == LUAJIT_LE -#define LJ_ARCH_NAME "ppc64le" -#else -#define LJ_ARCH_NAME "ppc64" -#endif -#else -#define LJ_ARCH_BITS 32 -#define LJ_ARCH_NAME "ppc" -#endif - -#define LJ_TARGET_PPC 1 -#define LJ_TARGET_EHRETREG 3 -#define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */ -#define LJ_TARGET_MASKSHIFT 0 -#define LJ_TARGET_MASKROT 1 -#define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */ -#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE - -#if LJ_TARGET_CONSOLE -#define LJ_ARCH_PPC32ON64 1 -#define LJ_ARCH_NOFFI 1 -#elif LJ_ARCH_BITS == 64 -#define LJ_ARCH_PPC64 1 -#define LJ_TARGET_GC64 1 -#define LJ_ARCH_NOJIT 1 /* NYI */ -#endif - -#if _ARCH_PWR7 -#define LJ_ARCH_VERSION 70 -#elif _ARCH_PWR6 -#define LJ_ARCH_VERSION 60 -#elif _ARCH_PWR5X -#define LJ_ARCH_VERSION 51 -#elif _ARCH_PWR5 -#define LJ_ARCH_VERSION 50 -#elif _ARCH_PWR4 -#define LJ_ARCH_VERSION 40 -#else -#define LJ_ARCH_VERSION 0 -#endif -#if _ARCH_PPCSQ -#define LJ_ARCH_SQRT 1 -#endif -#if _ARCH_PWR5X -#define LJ_ARCH_ROUND 1 -#endif -#if __PPU__ -#define LJ_ARCH_CELL 1 -#endif -#if LJ_TARGET_XBOX360 -#define LJ_ARCH_XENON 1 -#endif - -#elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 || LUAJIT_TARGET == LUAJIT_ARCH_MIPS64 - -#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) -#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 -#define LJ_ARCH_NAME "mipsel" -#else -#define LJ_ARCH_NAME "mips64el" -#endif -#define LJ_ARCH_ENDIAN LUAJIT_LE -#else -#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 -#define LJ_ARCH_NAME "mips" -#else -#define LJ_ARCH_NAME "mips64" -#endif -#define LJ_ARCH_ENDIAN LUAJIT_BE -#endif - -#if !defined(LJ_ARCH_HASFPU) -#ifdef __mips_soft_float -#define LJ_ARCH_HASFPU 0 -#else -#define LJ_ARCH_HASFPU 1 -#endif -#endif - -#if !defined(LJ_ABI_SOFTFP) -#ifdef __mips_soft_float -#define LJ_ABI_SOFTFP 1 -#else -#define LJ_ABI_SOFTFP 0 -#endif -#endif - -#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 -#define LJ_ARCH_BITS 32 -#define LJ_TARGET_MIPS32 1 -#else -#if LJ_ABI_SOFTFP || !LJ_ARCH_HASFPU -#define LJ_ARCH_NOJIT 1 /* NYI */ -#endif -#define LJ_ARCH_BITS 64 -#define LJ_TARGET_MIPS64 1 -#define LJ_TARGET_GC64 1 -#endif -#define LJ_TARGET_MIPS 1 -#define LJ_TARGET_EHRETREG 4 -#define LJ_TARGET_JUMPRANGE 27 /* 2*2^27 = 256MB-aligned region */ -#define LJ_TARGET_MASKSHIFT 1 -#define LJ_TARGET_MASKROT 1 -#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ -#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL - -#if _MIPS_ARCH_MIPS32R2 || _MIPS_ARCH_MIPS64R2 -#define LJ_ARCH_VERSION 20 -#else -#define LJ_ARCH_VERSION 10 -#endif - -#else -#error "No target architecture defined" -#endif #ifndef LJ_PAGESIZE #define LJ_PAGESIZE 4096 @@ -363,80 +85,16 @@ /* Check for minimum required compiler versions. */ #if defined(__GNUC__) -#if LJ_TARGET_X86 -#if (__GNUC__ < 3) || ((__GNUC__ == 3) && __GNUC_MINOR__ < 4) -#error "Need at least GCC 3.4 or newer" -#endif -#elif LJ_TARGET_X64 #if __GNUC__ < 4 #error "Need at least GCC 4.0 or newer" #endif -#elif LJ_TARGET_ARM -#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2) -#error "Need at least GCC 4.2 or newer" -#endif -#elif LJ_TARGET_ARM64 -#if __clang__ -#if (__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5) -#error "Need at least Clang 3.5 or newer" -#endif -#else -#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 8) -#error "Need at least GCC 4.8 or newer" -#endif -#endif -#elif !LJ_TARGET_PS3 -#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 3) -#error "Need at least GCC 4.3 or newer" -#endif -#endif #endif /* Check target-specific constraints. */ #ifndef _BUILDVM_H -#if LJ_TARGET_X64 #if __USING_SJLJ_EXCEPTIONS__ #error "Need a C compiler with native exception handling on x64" #endif -#elif LJ_TARGET_ARM -#if defined(__ARMEB__) -#error "No support for big-endian ARM" -#endif -#if __ARM_ARCH_6M__ || __ARM_ARCH_7M__ || __ARM_ARCH_7EM__ -#error "No support for Cortex-M CPUs" -#endif -#if !(__ARM_EABI__ || LJ_TARGET_IOS) -#error "Only ARM EABI or iOS 3.0+ ABI is supported" -#endif -#elif LJ_TARGET_ARM64 -#if defined(__AARCH64EB__) -#error "No support for big-endian ARM64" -#endif -#if defined(_ILP32) -#error "No support for ILP32 model on ARM64" -#endif -#elif LJ_TARGET_PPC -#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE) -#error "No support for PowerPC CPUs without double-precision FPU" -#endif -#if !LJ_ARCH_PPC64 && LJ_ARCH_ENDIAN == LUAJIT_LE -#error "No support for little-endian PPC32" -#endif -#if LJ_ARCH_PPC64 -#error "No support for PowerPC 64 bit mode (yet)" -#endif -#ifdef __NO_FPRS__ -#error "No support for PPC/e500 anymore (use LuaJIT 2.0)" -#endif -#elif LJ_TARGET_MIPS32 -#if !((defined(_MIPS_SIM_ABI32) && _MIPS_SIM == _MIPS_SIM_ABI32) || (defined(_ABIO32) && _MIPS_SIM == _ABIO32)) -#error "Only o32 ABI supported for MIPS32" -#endif -#elif LJ_TARGET_MIPS64 -#if !((defined(_MIPS_SIM_ABI64) && _MIPS_SIM == _MIPS_SIM_ABI64) || (defined(_ABI64) && _MIPS_SIM == _ABI64)) -#error "Only n64 ABI supported for MIPS64" -#endif -#endif #endif /* Enable or disable the dual-number mode for the VM. */ @@ -452,27 +110,12 @@ #define LJ_DUALNUM 0 #endif -#if LJ_TARGET_IOS || LJ_TARGET_CONSOLE -/* Runtime code generation is restricted on iOS. Complain to Apple, not me. */ -/* Ditto for the consoles. Complain to Sony or MS, not me. */ -#ifndef LUAJIT_ENABLE_JIT -#define LJ_OS_NOJIT 1 -#endif -#endif /* 64 bit GC references. */ -#if LJ_TARGET_GC64 #define LJ_GC64 1 -#else -#define LJ_GC64 0 -#endif /* 2-slot frame info. */ -#if LJ_GC64 #define LJ_FR2 1 -#else -#define LJ_FR2 0 -#endif /* Disable or enable the JIT compiler. */ #if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) @@ -482,33 +125,14 @@ #endif /* Disable or enable the FFI extension. */ -#if defined(LUAJIT_DISABLE_FFI) || defined(LJ_ARCH_NOFFI) -#define LJ_HASFFI 0 -#else #define LJ_HASFFI 1 -#endif -#if defined(LUAJIT_DISABLE_PROFILE) #define LJ_HASPROFILE 0 -#elif LJ_TARGET_POSIX -#define LJ_HASPROFILE 1 -#define LJ_PROFILE_SIGPROF 1 -#elif LJ_TARGET_PS3 -#define LJ_HASPROFILE 1 -#define LJ_PROFILE_PTHREAD 1 -#elif LJ_TARGET_WINDOWS || LJ_TARGET_XBOX360 -#define LJ_HASPROFILE 1 -#define LJ_PROFILE_WTHREAD 1 -#else -#define LJ_HASPROFILE 0 -#endif #ifndef LJ_ARCH_HASFPU #define LJ_ARCH_HASFPU 1 #endif -#ifndef LJ_ABI_SOFTFP #define LJ_ABI_SOFTFP 0 -#endif #define LJ_SOFTFP (!LJ_ARCH_HASFPU) #if LJ_ARCH_ENDIAN == LUAJIT_BE @@ -523,28 +147,14 @@ #define LJ_ENDIAN_LOHI(lo, hi) lo hi #endif -#if LJ_ARCH_BITS == 32 -#define LJ_32 1 -#define LJ_64 0 -#else #define LJ_32 0 #define LJ_64 1 -#endif #ifndef LJ_TARGET_UNALIGNED #define LJ_TARGET_UNALIGNED 0 #endif /* Various workarounds for embedded operating systems or weak C runtimes. */ -#if defined(__ANDROID__) || defined(__symbian__) || LJ_TARGET_XBOX360 || LJ_TARGET_WINDOWS -#define LUAJIT_NO_LOG2 -#endif -#if defined(__symbian__) || LJ_TARGET_WINDOWS -#define LUAJIT_NO_EXP2 -#endif -#if LJ_TARGET_CONSOLE || (LJ_TARGET_IOS && __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_8_0) -#define LJ_NO_SYSTEM 1 -#endif #if !defined(LUAJIT_NO_UNWIND) && __GNU_COMPACT_EH__ /* NYI: no support for compact unwind specification, yet. */ diff --git a/src/lj_asm.c b/src/lj_asm.c index 7c09dd9f50..51d4e6f51e 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -8,15 +8,12 @@ #include "lj_obj.h" -#if LJ_HASJIT #include "lj_gc.h" #include "lj_str.h" #include "lj_tab.h" #include "lj_frame.h" -#if LJ_HASFFI #include "lj_ctype.h" -#endif #include "lj_ir.h" #include "lj_jit.h" #include "lj_ircall.h" @@ -49,9 +46,7 @@ typedef struct ASMState { IRIns *ir; /* Copy of pointer to IR instructions/constants. */ jit_State *J; /* JIT compiler state. */ -#if LJ_TARGET_X86ORX64 x86ModRM mrm; /* Fused x86 address operand. */ -#endif RegSet freeset; /* Set of free registers. */ RegSet modset; /* Set of registers modified inside the loop. */ @@ -167,19 +162,7 @@ IRFLDEF(FLOFS) /* -- Target-specific instruction emitter --------------------------------- */ -#if LJ_TARGET_X86ORX64 #include "lj_emit_x86.h" -#elif LJ_TARGET_ARM -#include "lj_emit_arm.h" -#elif LJ_TARGET_ARM64 -#include "lj_emit_arm64.h" -#elif LJ_TARGET_PPC -#include "lj_emit_ppc.h" -#elif LJ_TARGET_MIPS -#include "lj_emit_mips.h" -#else -#error "Missing instruction emitter for target CPU" -#endif /* Generic load/store of register from/to stack slot. */ #define emit_spload(as, ir, r, ofs) \ @@ -324,11 +307,7 @@ static Reg ra_rematk(ASMState *as, IRRef ref) lua_assert(!rset_test(as->freeset, r)); ra_free(as, r); ra_modified(as, r); -#if LJ_64 emit_loadu64(as, r, ra_krefk(as, ref)); -#else - emit_loadi(as, r, ra_krefk(as, ref)); -#endif return r; } ir = IR(ref); @@ -338,27 +317,21 @@ static Reg ra_rematk(ASMState *as, IRRef ref) ra_modified(as, r); ir->r = RID_INIT; /* Do not keep any hint. */ RA_DBGX((as, "remat $i $r", ir, r)); -#if !LJ_SOFTFP if (ir->o == IR_KNUM) { emit_loadk64(as, r, ir); } else -#endif if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ emit_getgl(as, r, jit_base); } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */ emit_getgl(as, r, cur_L); -#if LJ_64 } else if (ir->o == IR_KINT64) { emit_loadu64(as, r, ir_kint64(ir)->u64); -#if LJ_GC64 } else if (ir->o == IR_KGC) { emit_loadu64(as, r, (uintptr_t)ir_kgc(ir)); } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { emit_loadu64(as, r, (uintptr_t)ir_kptr(ir)); -#endif -#endif } else { lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); @@ -482,7 +455,6 @@ static void ra_evictset(ASMState *as, RegSet drop) { RegSet work; as->modset |= drop; -#if !LJ_SOFTFP work = (drop & ~as->freeset) & RSET_FPR; while (work) { Reg r = rset_pickbot(work); @@ -490,7 +462,6 @@ static void ra_evictset(ASMState *as, RegSet drop) rset_clear(work, r); checkmclim(as); } -#endif work = (drop & ~as->freeset); while (work) { Reg r = rset_pickbot(work); @@ -504,7 +475,6 @@ static void ra_evictset(ASMState *as, RegSet drop) static void ra_evictk(ASMState *as) { RegSet work; -#if !LJ_SOFTFP work = ~as->freeset & RSET_FPR; while (work) { Reg r = rset_pickbot(work); @@ -515,7 +485,6 @@ static void ra_evictk(ASMState *as) } rset_clear(work, r); } -#endif work = ~as->freeset & RSET_GPR; while (work) { Reg r = rset_pickbot(work); @@ -539,7 +508,6 @@ static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow) IRRef ref; r = rset_pickbot(work); ref = regcost_ref(as->cost[r]); -#if LJ_64 if (ref < ASMREF_L) { if (ra_iskref(ref)) { if (k == ra_krefk(as, ref)) @@ -547,23 +515,14 @@ static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow) } else { IRIns *ir = IR(ref); if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) || -#if LJ_GC64 (ir->o == IR_KINT && k == ir->i) || (ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) || ((ir->o == IR_KPTR || ir->o == IR_KKPTR) && k == (intptr_t)ir_kptr(ir)) -#else - (ir->o != IR_KINT64 && k == ir->i) -#endif ) return r; } } -#else - if (ref < ASMREF_L && - k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i)) - return r; -#endif rset_clear(work, r); } pick = as->freeset & allow; @@ -718,7 +677,6 @@ static void ra_destreg(ASMState *as, IRIns *ir, Reg r) } } -#if LJ_TARGET_X86ORX64 /* Propagate dest register to left reference. Emit moves as needed. ** This is a required fixup step for all 2-operand machine instructions. */ @@ -734,16 +692,12 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref) emit_loadk64(as, dest, ir); return; } -#if LJ_64 } else if (ir->o == IR_KINT64) { emit_loadk64(as, dest, ir); return; -#if LJ_GC64 } else if (ir->o == IR_KGC || ir->o == IR_KPTR || ir->o == IR_KKPTR) { emit_loadk64(as, dest, ir); return; -#endif -#endif } else if (ir->o != IR_KPRI) { lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); @@ -767,81 +721,7 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref) } } } -#else -/* Similar to ra_left, except we override any hints. */ -static void ra_leftov(ASMState *as, Reg dest, IRRef lref) -{ - IRIns *ir = IR(lref); - Reg left = ir->r; - if (ra_noreg(left)) { - ra_sethint(ir->r, dest); /* Propagate register hint. */ - left = ra_allocref(as, lref, - (LJ_SOFTFP || dest < RID_MAX_GPR) ? RSET_GPR : RSET_FPR); - } - ra_noweak(as, left); - if (dest != left) { - /* Use register renaming if dest is the PHI reg. */ - if (irt_isphi(ir->t) && as->phireg[dest] == lref) { - ra_modified(as, left); - ra_rename(as, left, dest); - } else { - emit_movrr(as, ir, dest, left); - } - } -} -#endif -#if !LJ_64 -/* Force a RID_RETLO/RID_RETHI destination register pair (marked as free). */ -static void ra_destpair(ASMState *as, IRIns *ir) -{ - Reg destlo = ir->r, desthi = (ir+1)->r; - /* First spill unrelated refs blocking the destination registers. */ - if (!rset_test(as->freeset, RID_RETLO) && - destlo != RID_RETLO && desthi != RID_RETLO) - ra_restore(as, regcost_ref(as->cost[RID_RETLO])); - if (!rset_test(as->freeset, RID_RETHI) && - destlo != RID_RETHI && desthi != RID_RETHI) - ra_restore(as, regcost_ref(as->cost[RID_RETHI])); - /* Next free the destination registers (if any). */ - if (ra_hasreg(destlo)) { - ra_free(as, destlo); - ra_modified(as, destlo); - } else { - destlo = RID_RETLO; - } - if (ra_hasreg(desthi)) { - ra_free(as, desthi); - ra_modified(as, desthi); - } else { - desthi = RID_RETHI; - } - /* Check for conflicts and shuffle the registers as needed. */ - if (destlo == RID_RETHI) { - if (desthi == RID_RETLO) { -#if LJ_TARGET_X86 - *--as->mcp = XI_XCHGa + RID_RETHI; -#else - emit_movrr(as, ir, RID_RETHI, RID_TMP); - emit_movrr(as, ir, RID_RETLO, RID_RETHI); - emit_movrr(as, ir, RID_TMP, RID_RETLO); -#endif - } else { - emit_movrr(as, ir, RID_RETHI, RID_RETLO); - if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI); - } - } else if (desthi == RID_RETLO) { - emit_movrr(as, ir, RID_RETLO, RID_RETHI); - if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO); - } else { - if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI); - if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO); - } - /* Restore spill slots (if any). */ - if (ra_hasspill((ir+1)->s)) ra_save(as, ir+1, RID_RETHI); - if (ra_hasspill(ir->s)) ra_save(as, ir, RID_RETLO); -} -#endif /* -- Snapshot handling --------- ----------------------------------------- */ @@ -879,13 +759,11 @@ static void asm_snap_alloc1(ASMState *as, IRRef ref) if (!irref_isk(ref) && (!(ra_used(ir) || ir->r == RID_SUNK))) { if (ir->r == RID_SINK) { ir->r = RID_SUNK; -#if LJ_HASFFI if (ir->o == IR_CNEWI) { /* Allocate CNEWI value. */ asm_snap_alloc1(as, ir->op2); if (LJ_32 && (ir+1)->o == IR_HIOP) asm_snap_alloc1(as, (ir+1)->op2); } else -#endif { /* Allocate stored values for TNEW, TDUP and CNEW. */ IRIns *irs; lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW); @@ -1103,11 +981,7 @@ static void asm_bufhdr(ASMState *as, IRIns *ir) emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p)); emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b)); } -#if LJ_TARGET_X86ORX64 ra_left(as, sb, ir->op1); -#else - ra_leftov(as, sb, ir->op1); -#endif } static void asm_bufput(ASMState *as, IRIns *ir) @@ -1191,34 +1065,6 @@ static void asm_tostr(ASMState *as, IRIns *ir) asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1); } -#if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86 -static void asm_conv64(ASMState *as, IRIns *ir) -{ - IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); - IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); - IRCallID id; - IRRef args[2]; - lua_assert((ir-1)->o == IR_CONV && ir->o == IR_HIOP); - args[LJ_BE] = (ir-1)->op1; - args[LJ_LE] = ir->op1; - if (st == IRT_NUM || st == IRT_FLOAT) { - id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64); - ir--; - } else { - id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64); - } - { -#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP - CCallInfo cim = lj_ir_callinfo[id], *ci = &cim; - cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */ -#else - const CCallInfo *ci = &lj_ir_callinfo[id]; -#endif - asm_setupresult(as, ir, ci); - asm_gencall(as, ci, args); - } -} -#endif /* -- Memory references --------------------------------------------------- */ @@ -1239,11 +1085,7 @@ static void asm_newref(ASMState *as, IRIns *ir) static void asm_lref(ASMState *as, IRIns *ir) { Reg r = ra_dest(as, ir, RSET_GPR); -#if LJ_TARGET_X86ORX64 ra_left(as, r, ASMREF_L); -#else - ra_leftov(as, r, ASMREF_L); -#endif } /* -- Calls --------------------------------------------------------------- */ @@ -1273,16 +1115,11 @@ static uint32_t asm_callx_flags(ASMState *as, IRIns *ir) nargs++; while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); } } -#if LJ_HASFFI if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */ CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i; CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id); nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0); -#if LJ_TARGET_X86 - nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT); -#endif } -#endif return (nargs | (ir->t.irt << CCI_OTSHIFT)); } @@ -1305,7 +1142,6 @@ static void asm_call(ASMState *as, IRIns *ir) asm_gencall(as, ci, args); } -#if !LJ_SOFTFP static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref) { const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; @@ -1329,7 +1165,6 @@ static int asm_fpjoin_pow(ASMState *as, IRIns *ir) } return 0; } -#endif /* -- PHI and loop handling ----------------------------------------------- */ @@ -1411,7 +1246,6 @@ static void asm_phi_shuffle(ASMState *as) } /* Restore/remat invariants whose registers are modified inside the loop. */ -#if !LJ_SOFTFP work = as->modset & ~(as->freeset | as->phiset) & RSET_FPR; while (work) { Reg r = rset_pickbot(work); @@ -1419,7 +1253,6 @@ static void asm_phi_shuffle(ASMState *as) rset_clear(work, r); checkmclim(as); } -#endif work = as->modset & ~(as->freeset | as->phiset); while (work) { Reg r = rset_pickbot(work); @@ -1453,15 +1286,11 @@ static void asm_phi_copyspill(ASMState *as) if (ra_hasspill(ir->s) && ra_hasspill(IR(ir->op1)->s)) need |= irt_isfp(ir->t) ? 2 : 1; /* Unsynced spill slot? */ if ((need & 1)) { /* Copy integer spill slots. */ -#if !LJ_TARGET_X86ORX64 - Reg r = RID_TMP; -#else Reg r = RID_RET; if ((as->freeset & RSET_GPR)) r = rset_pickbot((as->freeset & RSET_GPR)); else emit_spload(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP); -#endif for (ir = IR(as->orignins-1); ir->o == IR_PHI; ir--) { if (ra_hasspill(ir->s)) { IRIns *irl = IR(ir->op1); @@ -1472,18 +1301,11 @@ static void asm_phi_copyspill(ASMState *as) } } } -#if LJ_TARGET_X86ORX64 if (!rset_test(as->freeset, r)) emit_spstore(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP); -#endif } -#if !LJ_SOFTFP if ((need & 2)) { /* Copy FP spill slots. */ -#if LJ_TARGET_X86 - Reg r = RID_XMM0; -#else Reg r = RID_FPRET; -#endif if ((as->freeset & RSET_FPR)) r = rset_pickbot((as->freeset & RSET_FPR)); if (!rset_test(as->freeset, r)) @@ -1501,7 +1323,6 @@ static void asm_phi_copyspill(ASMState *as) if (!rset_test(as->freeset, r)) emit_spstore(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP); } -#endif } /* Emit renames for left PHIs which are only spilled outside the loop. */ @@ -1587,19 +1408,7 @@ static void asm_loop(ASMState *as) /* -- Target-specific assembler ------------------------------------------- */ -#if LJ_TARGET_X86ORX64 #include "lj_asm_x86.h" -#elif LJ_TARGET_ARM -#include "lj_asm_arm.h" -#elif LJ_TARGET_ARM64 -#include "lj_asm_arm64.h" -#elif LJ_TARGET_PPC -#include "lj_asm_ppc.h" -#elif LJ_TARGET_MIPS -#include "lj_asm_mips.h" -#else -#error "Missing assembler for target CPU" -#endif /* -- Instruction dispatch ------------------------------------------------ */ @@ -1652,12 +1461,6 @@ static void asm_ir(ASMState *as, IRIns *ir) case IR_MUL: asm_mul(as, ir); break; case IR_MOD: asm_mod(as, ir); break; case IR_NEG: asm_neg(as, ir); break; -#if LJ_SOFTFP - case IR_DIV: case IR_POW: case IR_ABS: - case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT: - lua_assert(0); /* Unused for LJ_SOFTFP. */ - break; -#else case IR_DIV: asm_div(as, ir); break; case IR_POW: asm_pow(as, ir); break; case IR_ABS: asm_abs(as, ir); break; @@ -1665,7 +1468,6 @@ static void asm_ir(ASMState *as, IRIns *ir) case IR_LDEXP: asm_ldexp(as, ir); break; case IR_FPMATH: asm_fpmath(as, ir); break; case IR_TOBIT: asm_tobit(as, ir); break; -#endif case IR_MIN: asm_min(as, ir); break; case IR_MAX: asm_max(as, ir); break; @@ -1844,11 +1646,6 @@ static void asm_head_side(ASMState *as) emit_setvmstate(as, (int32_t)as->T->traceno); emit_spsub(as, spdelta); -#if !LJ_TARGET_X86ORX64 - /* Restore BASE register from parent spill slot. */ - if (ra_hasspill(irp->s)) - emit_spload(as, IR(REF_BASE), IR(REF_BASE)->r, sps_scale(irp->s)); -#endif /* Restore target registers from parent spill slots. */ if (pass3) { @@ -1956,12 +1753,7 @@ static void asm_tail_link(ASMState *as) if (bc_isret(bc_op(*retpc))) pc = retpc; } -#if LJ_GC64 emit_loadu64(as, RID_LPC, u64ptr(pc)); -#else - ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); - ra_allockreg(as, i32ptr(pc), RID_LPC); -#endif mres = (int32_t)(snap->nslots - baseslot - LJ_FR2); switch (bc_op(*pc)) { case BC_CALLM: case BC_CALLMT: @@ -2000,9 +1792,6 @@ static void asm_setup_regsp(ASMState *as) IRRef nins = T->nins; IRIns *ir, *lastir; int inloop; -#if LJ_TARGET_ARM - uint32_t rload = 0xa6402a64; -#endif ra_setup(as); @@ -2010,12 +1799,7 @@ static void asm_setup_regsp(ASMState *as) for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) { ir->prev = REGSP_INIT; if (irt_is64(ir->t) && ir->o != IR_KNULL) { -#if LJ_GC64 ir->i = 0; /* Will become non-zero only for RIP-relative addresses. */ -#else - /* Make life easier for backends by putting address of constant in i. */ - ir->i = (int32_t)(intptr_t)(ir+1); -#endif ir++; } } @@ -2064,17 +1848,6 @@ static void asm_setup_regsp(ASMState *as) case IR_LOOP: inloop = 1; break; -#if LJ_TARGET_ARM - case IR_SLOAD: - if (!((ir->op2 & IRSLOAD_TYPECHECK) || (ir+1)->o == IR_HIOP)) - break; - /* fallthrough */ - case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: - if (!LJ_SOFTFP && irt_isnum(ir->t)) break; - ir->prev = (uint16_t)REGSP_HINT((rload & 15)); - rload = lj_ror(rload, 4); - continue; -#endif case IR_CALLXS: { CCallInfo ci; ci.flags = asm_callx_flags(as, ir); @@ -2091,55 +1864,11 @@ static void asm_setup_regsp(ASMState *as) (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH; continue; } -#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) - case IR_HIOP: - switch ((ir-1)->o) { -#if LJ_SOFTFP && LJ_TARGET_ARM - case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: - if (ra_hashint((ir-1)->r)) { - ir->prev = (ir-1)->prev + 1; - continue; - } - break; -#endif -#if !LJ_SOFTFP && LJ_NEED_FP64 - case IR_CONV: - if (irt_isfp((ir-1)->t)) { - ir->prev = REGSP_HINT(RID_FPRET); - continue; - } - /* fallthrough */ -#endif - case IR_CALLN: case IR_CALLXS: -#if LJ_SOFTFP - case IR_MIN: case IR_MAX: -#endif - (ir-1)->prev = REGSP_HINT(RID_RETLO); - ir->prev = REGSP_HINT(RID_RETHI); - continue; - default: - break; - } - break; -#endif -#if LJ_SOFTFP - case IR_MIN: case IR_MAX: - if ((ir+1)->o != IR_HIOP) break; - /* fallthrough */ -#endif /* C calls evict all scratch regs and return results in RID_RET. */ case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT: if (REGARG_NUMGPR < 3 && as->evenspill < 3) as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ -#if LJ_TARGET_X86 && LJ_HASFFI - if (0) { - case IR_CNEW: - if (ir->op2 != REF_NIL && as->evenspill < 4) - as->evenspill = 4; /* lj_cdata_newv needs 4 args. */ - } -#else case IR_CNEW: -#endif case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR: case IR_BUFSTR: ir->prev = REGSP_HINT(RID_RET); @@ -2150,26 +1879,13 @@ static void asm_setup_regsp(ASMState *as) if (inloop) as->modset = RSET_SCRATCH; break; -#if !LJ_SOFTFP case IR_ATAN2: -#if LJ_TARGET_X86 - if (as->evenspill < 4) /* Leave room to call atan2(). */ - as->evenspill = 4; -#endif -#if !LJ_TARGET_X86ORX64 - case IR_LDEXP: -#endif -#endif case IR_POW: if (!LJ_SOFTFP && irt_isnum(ir->t)) { if (inloop) as->modset |= RSET_SCRATCH; -#if LJ_TARGET_X86 - break; -#else ir->prev = REGSP_HINT(RID_FPRET); continue; -#endif } /* fallthrough for integer POW */ case IR_DIV: case IR_MOD: @@ -2181,7 +1897,6 @@ static void asm_setup_regsp(ASMState *as) } break; case IR_FPMATH: -#if LJ_TARGET_X86ORX64 if (ir->op2 <= IRFPM_TRUNC) { if (!(as->flags & JIT_F_SSE4_1)) { ir->prev = REGSP_HINT(RID_XMM0); @@ -2194,16 +1909,10 @@ static void asm_setup_regsp(ASMState *as) if (as->evenspill < 4) /* Leave room to call pow(). */ as->evenspill = 4; } -#endif if (inloop) as->modset |= RSET_SCRATCH; -#if LJ_TARGET_X86 - break; -#else ir->prev = REGSP_HINT(RID_FPRET); continue; -#endif -#if LJ_TARGET_X86ORX64 /* Non-constant shift counts need to be in RID_ECX on x86/x64. */ case IR_BSHL: case IR_BSHR: case IR_BSAR: if ((as->flags & JIT_F_BMI2)) /* Except if BMI2 is available. */ @@ -2215,13 +1924,10 @@ static void asm_setup_regsp(ASMState *as) rset_set(as->modset, RID_ECX); } break; -#endif /* Do not propagate hints across type conversions or loads. */ case IR_TOBIT: case IR_XLOAD: -#if !LJ_TARGET_ARM case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: -#endif break; case IR_CONV: if (irt_isfp(ir->t) || (ir->op2 & IRCONV_SRCMASK) == IRT_NUM || @@ -2398,4 +2104,3 @@ void lj_asm_trace(jit_State *J, GCtrace *T) #undef IR -#endif diff --git a/src/lj_asm.h b/src/lj_asm.h index 2819481b6d..ddaa6bf600 100644 --- a/src/lj_asm.h +++ b/src/lj_asm.h @@ -8,10 +8,8 @@ #include "lj_jit.h" -#if LJ_HASJIT LJ_FUNC void lj_asm_trace(jit_State *J, GCtrace *T); LJ_FUNC void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target); -#endif #endif diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h deleted file mode 100644 index 37bfa40f2f..0000000000 --- a/src/lj_asm_arm.h +++ /dev/null @@ -1,2210 +0,0 @@ -/* -** ARM IR assembler (SSA IR -> machine code). -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -/* -- Register allocator extensions --------------------------------------- */ - -/* Allocate a register with a hint. */ -static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) -{ - Reg r = IR(ref)->r; - if (ra_noreg(r)) { - if (!ra_hashint(r) && !iscrossref(as, ref)) - ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ - r = ra_allocref(as, ref, allow); - } - ra_noweak(as, r); - return r; -} - -/* Allocate a scratch register pair. */ -static Reg ra_scratchpair(ASMState *as, RegSet allow) -{ - RegSet pick1 = as->freeset & allow; - RegSet pick2 = pick1 & (pick1 >> 1) & RSET_GPREVEN; - Reg r; - if (pick2) { - r = rset_picktop(pick2); - } else { - RegSet pick = pick1 & (allow >> 1) & RSET_GPREVEN; - if (pick) { - r = rset_picktop(pick); - ra_restore(as, regcost_ref(as->cost[r+1])); - } else { - pick = pick1 & (allow << 1) & RSET_GPRODD; - if (pick) { - r = ra_restore(as, regcost_ref(as->cost[rset_picktop(pick)-1])); - } else { - r = ra_evict(as, allow & (allow >> 1) & RSET_GPREVEN); - ra_restore(as, regcost_ref(as->cost[r+1])); - } - } - } - lua_assert(rset_test(RSET_GPREVEN, r)); - ra_modified(as, r); - ra_modified(as, r+1); - RA_DBGX((as, "scratchpair $r $r", r, r+1)); - return r; -} - -#if !LJ_SOFTFP -/* Allocate two source registers for three-operand instructions. */ -static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) -{ - IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); - Reg left = irl->r, right = irr->r; - if (ra_hasreg(left)) { - ra_noweak(as, left); - if (ra_noreg(right)) - right = ra_allocref(as, ir->op2, rset_exclude(allow, left)); - else - ra_noweak(as, right); - } else if (ra_hasreg(right)) { - ra_noweak(as, right); - left = ra_allocref(as, ir->op1, rset_exclude(allow, right)); - } else if (ra_hashint(right)) { - right = ra_allocref(as, ir->op2, allow); - left = ra_alloc1(as, ir->op1, rset_exclude(allow, right)); - } else { - left = ra_allocref(as, ir->op1, allow); - right = ra_alloc1(as, ir->op2, rset_exclude(allow, left)); - } - return left | (right << 8); -} -#endif - -/* -- Guard handling ------------------------------------------------------ */ - -/* Generate an exit stub group at the bottom of the reserved MCode memory. */ -static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) -{ - MCode *mxp = as->mcbot; - int i; - if (mxp + 4*4+4*EXITSTUBS_PER_GROUP >= as->mctop) - asm_mclimit(as); - /* str lr, [sp]; bl ->vm_exit_handler; .long DISPATCH_address, group. */ - *mxp++ = ARMI_STR|ARMI_LS_P|ARMI_LS_U|ARMF_D(RID_LR)|ARMF_N(RID_SP); - *mxp = ARMI_BL|((((MCode *)(void *)lj_vm_exit_handler-mxp)-2)&0x00ffffffu); - mxp++; - *mxp++ = (MCode)i32ptr(J2GG(as->J)->dispatch); /* DISPATCH address */ - *mxp++ = group*EXITSTUBS_PER_GROUP; - for (i = 0; i < EXITSTUBS_PER_GROUP; i++) - *mxp++ = ARMI_B|((-6-i)&0x00ffffffu); - lj_mcode_sync(as->mcbot, mxp); - lj_mcode_commitbot(as->J, mxp); - as->mcbot = mxp; - as->mclim = as->mcbot + MCLIM_REDZONE; - return mxp - EXITSTUBS_PER_GROUP; -} - -/* Setup all needed exit stubs. */ -static void asm_exitstub_setup(ASMState *as, ExitNo nexits) -{ - ExitNo i; - if (nexits >= EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR) - lj_trace_err(as->J, LJ_TRERR_SNAPOV); - for (i = 0; i < (nexits+EXITSTUBS_PER_GROUP-1)/EXITSTUBS_PER_GROUP; i++) - if (as->J->exitstubgroup[i] == NULL) - as->J->exitstubgroup[i] = asm_exitstub_gen(as, i); -} - -/* Emit conditional branch to exit for guard. */ -static void asm_guardcc(ASMState *as, ARMCC cc) -{ - MCode *target = exitstub_addr(as->J, as->snapno); - MCode *p = as->mcp; - if (LJ_UNLIKELY(p == as->invmcp)) { - as->loopinv = 1; - *p = ARMI_BL | ((target-p-2) & 0x00ffffffu); - emit_branch(as, ARMF_CC(ARMI_B, cc^1), p+1); - return; - } - emit_branch(as, ARMF_CC(ARMI_BL, cc), target); -} - -/* -- Operand fusion ------------------------------------------------------ */ - -/* Limit linear search to this distance. Avoids O(n^2) behavior. */ -#define CONFLICT_SEARCH_LIM 31 - -/* Check if there's no conflicting instruction between curins and ref. */ -static int noconflict(ASMState *as, IRRef ref, IROp conflict) -{ - IRIns *ir = as->ir; - IRRef i = as->curins; - if (i > ref + CONFLICT_SEARCH_LIM) - return 0; /* Give up, ref is too far away. */ - while (--i > ref) - if (ir[i].o == conflict) - return 0; /* Conflict found. */ - return 1; /* Ok, no conflict. */ -} - -/* Fuse the array base of colocated arrays. */ -static int32_t asm_fuseabase(ASMState *as, IRRef ref) -{ - IRIns *ir = IR(ref); - if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE && - !neverfuse(as) && noconflict(as, ref, IR_NEWREF)) - return (int32_t)sizeof(GCtab); - return 0; -} - -/* Fuse array/hash/upvalue reference into register+offset operand. */ -static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow, - int lim) -{ - IRIns *ir = IR(ref); - if (ra_noreg(ir->r)) { - if (ir->o == IR_AREF) { - if (mayfuse(as, ref)) { - if (irref_isk(ir->op2)) { - IRRef tab = IR(ir->op1)->op1; - int32_t ofs = asm_fuseabase(as, tab); - IRRef refa = ofs ? tab : ir->op1; - ofs += 8*IR(ir->op2)->i; - if (ofs > -lim && ofs < lim) { - *ofsp = ofs; - return ra_alloc1(as, refa, allow); - } - } - } - } else if (ir->o == IR_HREFK) { - if (mayfuse(as, ref)) { - int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); - if (ofs < lim) { - *ofsp = ofs; - return ra_alloc1(as, ir->op1, allow); - } - } - } else if (ir->o == IR_UREFC) { - if (irref_isk(ir->op1)) { - GCfunc *fn = ir_kfunc(IR(ir->op1)); - int32_t ofs = i32ptr(&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv); - *ofsp = (ofs & 255); /* Mask out less bits to allow LDRD. */ - return ra_allock(as, (ofs & ~255), allow); - } - } - } - *ofsp = 0; - return ra_alloc1(as, ref, allow); -} - -/* Fuse m operand into arithmetic/logic instructions. */ -static uint32_t asm_fuseopm(ASMState *as, ARMIns ai, IRRef ref, RegSet allow) -{ - IRIns *ir = IR(ref); - if (ra_hasreg(ir->r)) { - ra_noweak(as, ir->r); - return ARMF_M(ir->r); - } else if (irref_isk(ref)) { - uint32_t k = emit_isk12(ai, ir->i); - if (k) - return k; - } else if (mayfuse(as, ref)) { - if (ir->o >= IR_BSHL && ir->o <= IR_BROR) { - Reg m = ra_alloc1(as, ir->op1, allow); - ARMShift sh = ir->o == IR_BSHL ? ARMSH_LSL : - ir->o == IR_BSHR ? ARMSH_LSR : - ir->o == IR_BSAR ? ARMSH_ASR : ARMSH_ROR; - if (irref_isk(ir->op2)) { - return m | ARMF_SH(sh, (IR(ir->op2)->i & 31)); - } else { - Reg s = ra_alloc1(as, ir->op2, rset_exclude(allow, m)); - return m | ARMF_RSH(sh, s); - } - } else if (ir->o == IR_ADD && ir->op1 == ir->op2) { - Reg m = ra_alloc1(as, ir->op1, allow); - return m | ARMF_SH(ARMSH_LSL, 1); - } - } - return ra_allocref(as, ref, allow); -} - -/* Fuse shifts into loads/stores. Only bother with BSHL 2 => lsl #2. */ -static IRRef asm_fuselsl2(ASMState *as, IRRef ref) -{ - IRIns *ir = IR(ref); - if (ra_noreg(ir->r) && mayfuse(as, ref) && ir->o == IR_BSHL && - irref_isk(ir->op2) && IR(ir->op2)->i == 2) - return ir->op1; - return 0; /* No fusion. */ -} - -/* Fuse XLOAD/XSTORE reference into load/store operand. */ -static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref, - RegSet allow, int32_t ofs) -{ - IRIns *ir = IR(ref); - Reg base; - if (ra_noreg(ir->r) && canfuse(as, ir)) { - int32_t lim = (!LJ_SOFTFP && (ai & 0x08000000)) ? 1024 : - (ai & 0x04000000) ? 4096 : 256; - if (ir->o == IR_ADD) { - int32_t ofs2; - if (irref_isk(ir->op2) && - (ofs2 = ofs + IR(ir->op2)->i) > -lim && ofs2 < lim && - (!(!LJ_SOFTFP && (ai & 0x08000000)) || !(ofs2 & 3))) { - ofs = ofs2; - ref = ir->op1; - } else if (ofs == 0 && !(!LJ_SOFTFP && (ai & 0x08000000))) { - IRRef lref = ir->op1, rref = ir->op2; - Reg rn, rm; - if ((ai & 0x04000000)) { - IRRef sref = asm_fuselsl2(as, rref); - if (sref) { - rref = sref; - ai |= ARMF_SH(ARMSH_LSL, 2); - } else if ((sref = asm_fuselsl2(as, lref)) != 0) { - lref = rref; - rref = sref; - ai |= ARMF_SH(ARMSH_LSL, 2); - } - } - rn = ra_alloc1(as, lref, allow); - rm = ra_alloc1(as, rref, rset_exclude(allow, rn)); - if ((ai & 0x04000000)) ai |= ARMI_LS_R; - emit_dnm(as, ai|ARMI_LS_P|ARMI_LS_U, rd, rn, rm); - return; - } - } else if (ir->o == IR_STRREF && !(!LJ_SOFTFP && (ai & 0x08000000))) { - lua_assert(ofs == 0); - ofs = (int32_t)sizeof(GCstr); - if (irref_isk(ir->op2)) { - ofs += IR(ir->op2)->i; - ref = ir->op1; - } else if (irref_isk(ir->op1)) { - ofs += IR(ir->op1)->i; - ref = ir->op2; - } else { - /* NYI: Fuse ADD with constant. */ - Reg rn = ra_alloc1(as, ir->op1, allow); - uint32_t m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn)); - if ((ai & 0x04000000)) - emit_lso(as, ai, rd, rd, ofs); - else - emit_lsox(as, ai, rd, rd, ofs); - emit_dn(as, ARMI_ADD^m, rd, rn); - return; - } - if (ofs <= -lim || ofs >= lim) { - Reg rn = ra_alloc1(as, ref, allow); - Reg rm = ra_allock(as, ofs, rset_exclude(allow, rn)); - if ((ai & 0x04000000)) ai |= ARMI_LS_R; - emit_dnm(as, ai|ARMI_LS_P|ARMI_LS_U, rd, rn, rm); - return; - } - } - } - base = ra_alloc1(as, ref, allow); -#if !LJ_SOFTFP - if ((ai & 0x08000000)) - emit_vlso(as, ai, rd, base, ofs); - else -#endif - if ((ai & 0x04000000)) - emit_lso(as, ai, rd, base, ofs); - else - emit_lsox(as, ai, rd, base, ofs); -} - -#if !LJ_SOFTFP -/* Fuse to multiply-add/sub instruction. */ -static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air) -{ - IRRef lref = ir->op1, rref = ir->op2; - IRIns *irm; - if (lref != rref && - ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && - ra_noreg(irm->r)) || - (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && - (rref = lref, ai = air, ra_noreg(irm->r))))) { - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg add = ra_hintalloc(as, rref, dest, RSET_FPR); - Reg right, left = ra_alloc2(as, irm, - rset_exclude(rset_exclude(RSET_FPR, dest), add)); - right = (left >> 8); left &= 255; - emit_dnm(as, ai, (dest & 15), (left & 15), (right & 15)); - if (dest != add) emit_dm(as, ARMI_VMOV_D, (dest & 15), (add & 15)); - return 1; - } - return 0; -} -#endif - -/* -- Calls --------------------------------------------------------------- */ - -/* Generate a call to a C function. */ -static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) -{ - uint32_t n, nargs = CCI_XNARGS(ci); - int32_t ofs = 0; -#if LJ_SOFTFP - Reg gpr = REGARG_FIRSTGPR; -#else - Reg gpr, fpr = REGARG_FIRSTFPR, fprodd = 0; -#endif - if ((void *)ci->func) - emit_call(as, (void *)ci->func); -#if !LJ_SOFTFP - for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) - as->cost[gpr] = REGCOST(~0u, ASMREF_L); - gpr = REGARG_FIRSTGPR; -#endif - for (n = 0; n < nargs; n++) { /* Setup args. */ - IRRef ref = args[n]; - IRIns *ir = IR(ref); -#if !LJ_SOFTFP - if (ref && irt_isfp(ir->t)) { - RegSet of = as->freeset; - Reg src; - if (!LJ_ABI_SOFTFP && !(ci->flags & CCI_VARARG)) { - if (irt_isnum(ir->t)) { - if (fpr <= REGARG_LASTFPR) { - ra_leftov(as, fpr, ref); - fpr++; - continue; - } - } else if (fprodd) { /* Ick. */ - src = ra_alloc1(as, ref, RSET_FPR); - emit_dm(as, ARMI_VMOV_S, (fprodd & 15), (src & 15) | 0x00400000); - fprodd = 0; - continue; - } else if (fpr <= REGARG_LASTFPR) { - ra_leftov(as, fpr, ref); - fprodd = fpr++; - continue; - } - /* Workaround to protect argument GPRs from being used for remat. */ - as->freeset &= ~RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1); - src = ra_alloc1(as, ref, RSET_FPR); /* May alloc GPR to remat FPR. */ - as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); - fprodd = 0; - goto stackfp; - } - /* Workaround to protect argument GPRs from being used for remat. */ - as->freeset &= ~RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1); - src = ra_alloc1(as, ref, RSET_FPR); /* May alloc GPR to remat FPR. */ - as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); - if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1u; - if (gpr <= REGARG_LASTGPR) { - lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ - if (irt_isnum(ir->t)) { - lua_assert(rset_test(as->freeset, gpr+1)); /* Ditto. */ - emit_dnm(as, ARMI_VMOV_RR_D, gpr, gpr+1, (src & 15)); - gpr += 2; - } else { - emit_dn(as, ARMI_VMOV_R_S, gpr, (src & 15)); - gpr++; - } - } else { - stackfp: - if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4; - emit_spstore(as, ir, src, ofs); - ofs += irt_isnum(ir->t) ? 8 : 4; - } - } else -#endif - { - if (gpr <= REGARG_LASTGPR) { - lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ - if (ref) ra_leftov(as, gpr, ref); - gpr++; - } else { - if (ref) { - Reg r = ra_alloc1(as, ref, RSET_GPR); - emit_spstore(as, ir, r, ofs); - } - ofs += 4; - } - } - } -} - -/* Setup result reg/sp for call. Evict scratch regs. */ -static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) -{ - RegSet drop = RSET_SCRATCH; - int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); - if (ra_hasreg(ir->r)) - rset_clear(drop, ir->r); /* Dest reg handled below. */ - if (hiop && ra_hasreg((ir+1)->r)) - rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ - ra_evictset(as, drop); /* Evictions must be performed first. */ - if (ra_used(ir)) { - lua_assert(!irt_ispri(ir->t)); - if (!LJ_SOFTFP && irt_isfp(ir->t)) { - if (LJ_ABI_SOFTFP || (ci->flags & (CCI_CASTU64|CCI_VARARG))) { - Reg dest = (ra_dest(as, ir, RSET_FPR) & 15); - if (irt_isnum(ir->t)) - emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, dest); - else - emit_dn(as, ARMI_VMOV_S_R, RID_RET, dest); - } else { - ra_destreg(as, ir, RID_FPRET); - } - } else if (hiop) { - ra_destpair(as, ir); - } else { - ra_destreg(as, ir, RID_RET); - } - } - UNUSED(ci); -} - -static void asm_callx(ASMState *as, IRIns *ir) -{ - IRRef args[CCI_NARGS_MAX*2]; - CCallInfo ci; - IRRef func; - IRIns *irf; - ci.flags = asm_callx_flags(as, ir); - asm_collectargs(as, ir, &ci, args); - asm_setupresult(as, ir, &ci); - func = ir->op2; irf = IR(func); - if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } - if (irref_isk(func)) { /* Call to constant address. */ - ci.func = (ASMFunction)(void *)(irf->i); - } else { /* Need a non-argument register for indirect calls. */ - Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_R4, RID_R12+1)); - emit_m(as, ARMI_BLXr, freg); - ci.func = (ASMFunction)(void *)0; - } - asm_gencall(as, &ci, args); -} - -/* -- Returns ------------------------------------------------------------- */ - -/* Return to lower frame. Guard that it goes to the right spot. */ -static void asm_retf(ASMState *as, IRIns *ir) -{ - Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); - void *pc = ir_kptr(IR(ir->op2)); - int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); - as->topslot -= (BCReg)delta; - if ((int32_t)as->topslot < 0) as->topslot = 0; - irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ - /* Need to force a spill on REF_BASE now to update the stack slot. */ - emit_lso(as, ARMI_STR, base, RID_SP, ra_spill(as, IR(REF_BASE))); - emit_setgl(as, base, jit_base); - emit_addptr(as, base, -8*delta); - asm_guardcc(as, CC_NE); - emit_nm(as, ARMI_CMP, RID_TMP, - ra_allock(as, i32ptr(pc), rset_exclude(RSET_GPR, base))); - emit_lso(as, ARMI_LDR, RID_TMP, base, -4); -} - -/* -- Type conversions ---------------------------------------------------- */ - -#if !LJ_SOFTFP -static void asm_tointg(ASMState *as, IRIns *ir, Reg left) -{ - Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); - Reg dest = ra_dest(as, ir, RSET_GPR); - asm_guardcc(as, CC_NE); - emit_d(as, ARMI_VMRS, 0); - emit_dm(as, ARMI_VCMP_D, (tmp & 15), (left & 15)); - emit_dm(as, ARMI_VCVT_F64_S32, (tmp & 15), (tmp & 15)); - emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); - emit_dm(as, ARMI_VCVT_S32_F64, (tmp & 15), (left & 15)); -} - -static void asm_tobit(ASMState *as, IRIns *ir) -{ - RegSet allow = RSET_FPR; - Reg left = ra_alloc1(as, ir->op1, allow); - Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); - Reg tmp = ra_scratch(as, rset_clear(allow, right)); - Reg dest = ra_dest(as, ir, RSET_GPR); - emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); - emit_dnm(as, ARMI_VADD_D, (tmp & 15), (left & 15), (right & 15)); -} -#endif - -static void asm_conv(ASMState *as, IRIns *ir) -{ - IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); -#if !LJ_SOFTFP - int stfp = (st == IRT_NUM || st == IRT_FLOAT); -#endif - IRRef lref = ir->op1; - /* 64 bit integer conversions are handled by SPLIT. */ - lua_assert(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64)); -#if LJ_SOFTFP - /* FP conversions are handled by SPLIT. */ - lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); - /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ -#else - lua_assert(irt_type(ir->t) != st); - if (irt_isfp(ir->t)) { - Reg dest = ra_dest(as, ir, RSET_FPR); - if (stfp) { /* FP to FP conversion. */ - emit_dm(as, st == IRT_NUM ? ARMI_VCVT_F32_F64 : ARMI_VCVT_F64_F32, - (dest & 15), (ra_alloc1(as, lref, RSET_FPR) & 15)); - } else { /* Integer to FP conversion. */ - Reg left = ra_alloc1(as, lref, RSET_GPR); - ARMIns ai = irt_isfloat(ir->t) ? - (st == IRT_INT ? ARMI_VCVT_F32_S32 : ARMI_VCVT_F32_U32) : - (st == IRT_INT ? ARMI_VCVT_F64_S32 : ARMI_VCVT_F64_U32); - emit_dm(as, ai, (dest & 15), (dest & 15)); - emit_dn(as, ARMI_VMOV_S_R, left, (dest & 15)); - } - } else if (stfp) { /* FP to integer conversion. */ - if (irt_isguard(ir->t)) { - /* Checked conversions are only supported from number to int. */ - lua_assert(irt_isint(ir->t) && st == IRT_NUM); - asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); - } else { - Reg left = ra_alloc1(as, lref, RSET_FPR); - Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); - Reg dest = ra_dest(as, ir, RSET_GPR); - ARMIns ai; - emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); - ai = irt_isint(ir->t) ? - (st == IRT_NUM ? ARMI_VCVT_S32_F64 : ARMI_VCVT_S32_F32) : - (st == IRT_NUM ? ARMI_VCVT_U32_F64 : ARMI_VCVT_U32_F32); - emit_dm(as, ai, (tmp & 15), (left & 15)); - } - } else -#endif - { - Reg dest = ra_dest(as, ir, RSET_GPR); - if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ - Reg left = ra_alloc1(as, lref, RSET_GPR); - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); - if ((as->flags & JIT_F_ARMV6)) { - ARMIns ai = st == IRT_I8 ? ARMI_SXTB : - st == IRT_U8 ? ARMI_UXTB : - st == IRT_I16 ? ARMI_SXTH : ARMI_UXTH; - emit_dm(as, ai, dest, left); - } else if (st == IRT_U8) { - emit_dn(as, ARMI_AND|ARMI_K12|255, dest, left); - } else { - uint32_t shift = st == IRT_I8 ? 24 : 16; - ARMShift sh = st == IRT_U16 ? ARMSH_LSR : ARMSH_ASR; - emit_dm(as, ARMI_MOV|ARMF_SH(sh, shift), dest, RID_TMP); - emit_dm(as, ARMI_MOV|ARMF_SH(ARMSH_LSL, shift), RID_TMP, left); - } - } else { /* Handle 32/32 bit no-op (cast). */ - ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ - } - } -} - -static void asm_strto(ASMState *as, IRIns *ir) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; - IRRef args[2]; - Reg rlo = 0, rhi = 0, tmp; - int destused = ra_used(ir); - int32_t ofs = 0; - ra_evictset(as, RSET_SCRATCH); -#if LJ_SOFTFP - if (destused) { - if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) && - (ir->s & 1) == 0 && ir->s + 1 == (ir+1)->s) { - int i; - for (i = 0; i < 2; i++) { - Reg r = (ir+i)->r; - if (ra_hasreg(r)) { - ra_free(as, r); - ra_modified(as, r); - emit_spload(as, ir+i, r, sps_scale((ir+i)->s)); - } - } - ofs = sps_scale(ir->s); - destused = 0; - } else { - rhi = ra_dest(as, ir+1, RSET_GPR); - rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi)); - } - } - asm_guardcc(as, CC_EQ); - if (destused) { - emit_lso(as, ARMI_LDR, rhi, RID_SP, 4); - emit_lso(as, ARMI_LDR, rlo, RID_SP, 0); - } -#else - UNUSED(rhi); - if (destused) { - if (ra_hasspill(ir->s)) { - ofs = sps_scale(ir->s); - destused = 0; - if (ra_hasreg(ir->r)) { - ra_free(as, ir->r); - ra_modified(as, ir->r); - emit_spload(as, ir, ir->r, ofs); - } - } else { - rlo = ra_dest(as, ir, RSET_FPR); - } - } - asm_guardcc(as, CC_EQ); - if (destused) - emit_vlso(as, ARMI_VLDR_D, rlo, RID_SP, 0); -#endif - emit_n(as, ARMI_CMP|ARMI_K12|0, RID_RET); /* Test return status. */ - args[0] = ir->op1; /* GCstr *str */ - args[1] = ASMREF_TMP1; /* TValue *n */ - asm_gencall(as, ci, args); - tmp = ra_releasetmp(as, ASMREF_TMP1); - if (ofs == 0) - emit_dm(as, ARMI_MOV, tmp, RID_SP); - else - emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR); -} - -/* -- Memory references --------------------------------------------------- */ - -/* Get pointer to TValue. */ -static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) -{ - IRIns *ir = IR(ref); - if (irt_isnum(ir->t)) { - if (irref_isk(ref)) { - /* Use the number constant itself as a TValue. */ - ra_allockreg(as, i32ptr(ir_knum(ir)), dest); - } else { -#if LJ_SOFTFP - lua_assert(0); -#else - /* Otherwise force a spill and use the spill slot. */ - emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR); -#endif - } - } else { - /* Otherwise use [sp] and [sp+4] to hold the TValue. */ - RegSet allow = rset_exclude(RSET_GPR, dest); - Reg type; - emit_dm(as, ARMI_MOV, dest, RID_SP); - if (!irt_ispri(ir->t)) { - Reg src = ra_alloc1(as, ref, allow); - emit_lso(as, ARMI_STR, src, RID_SP, 0); - } - if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) - type = ra_alloc1(as, ref+1, allow); - else - type = ra_allock(as, irt_toitype(ir->t), allow); - emit_lso(as, ARMI_STR, type, RID_SP, 4); - } -} - -static void asm_aref(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg idx, base; - if (irref_isk(ir->op2)) { - IRRef tab = IR(ir->op1)->op1; - int32_t ofs = asm_fuseabase(as, tab); - IRRef refa = ofs ? tab : ir->op1; - uint32_t k = emit_isk12(ARMI_ADD, ofs + 8*IR(ir->op2)->i); - if (k) { - base = ra_alloc1(as, refa, RSET_GPR); - emit_dn(as, ARMI_ADD^k, dest, base); - return; - } - } - base = ra_alloc1(as, ir->op1, RSET_GPR); - idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); - emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 3), dest, base, idx); -} - -/* Inlined hash lookup. Specialized for key type and for const keys. -** The equivalent C code is: -** Node *n = hashkey(t, key); -** do { -** if (lj_obj_equal(&n->key, key)) return &n->val; -** } while ((n = nextnode(n))); -** return niltv(L); -*/ -static void asm_href(ASMState *as, IRIns *ir, IROp merge) -{ - RegSet allow = RSET_GPR; - int destused = ra_used(ir); - Reg dest = ra_dest(as, ir, allow); - Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); - Reg key = 0, keyhi = 0, keynumhi = RID_NONE, tmp = RID_TMP; - IRRef refkey = ir->op2; - IRIns *irkey = IR(refkey); - IRType1 kt = irkey->t; - int32_t k = 0, khi = emit_isk12(ARMI_CMP, irt_toitype(kt)); - uint32_t khash; - MCLabel l_end, l_loop; - rset_clear(allow, tab); - if (!irref_isk(refkey) || irt_isstr(kt)) { -#if LJ_SOFTFP - key = ra_alloc1(as, refkey, allow); - rset_clear(allow, key); - if (irkey[1].o == IR_HIOP) { - if (ra_hasreg((irkey+1)->r)) { - keynumhi = (irkey+1)->r; - keyhi = RID_TMP; - ra_noweak(as, keynumhi); - } else { - keyhi = keynumhi = ra_allocref(as, refkey+1, allow); - } - rset_clear(allow, keynumhi); - khi = 0; - } -#else - if (irt_isnum(kt)) { - key = ra_scratch(as, allow); - rset_clear(allow, key); - keyhi = keynumhi = ra_scratch(as, allow); - rset_clear(allow, keyhi); - khi = 0; - } else { - key = ra_alloc1(as, refkey, allow); - rset_clear(allow, key); - } -#endif - } else if (irt_isnum(kt)) { - int32_t val = (int32_t)ir_knum(irkey)->u32.lo; - k = emit_isk12(ARMI_CMP, val); - if (!k) { - key = ra_allock(as, val, allow); - rset_clear(allow, key); - } - val = (int32_t)ir_knum(irkey)->u32.hi; - khi = emit_isk12(ARMI_CMP, val); - if (!khi) { - keyhi = ra_allock(as, val, allow); - rset_clear(allow, keyhi); - } - } else if (!irt_ispri(kt)) { - k = emit_isk12(ARMI_CMP, irkey->i); - if (!k) { - key = ra_alloc1(as, refkey, allow); - rset_clear(allow, key); - } - } - if (!irt_ispri(kt)) - tmp = ra_scratchpair(as, allow); - - /* Key not found in chain: jump to exit (if merged) or load niltv. */ - l_end = emit_label(as); - as->invmcp = NULL; - if (merge == IR_NE) - asm_guardcc(as, CC_AL); - else if (destused) - emit_loada(as, dest, niltvg(J2G(as->J))); - - /* Follow hash chain until the end. */ - l_loop = --as->mcp; - emit_n(as, ARMI_CMP|ARMI_K12|0, dest); - emit_lso(as, ARMI_LDR, dest, dest, (int32_t)offsetof(Node, next)); - - /* Type and value comparison. */ - if (merge == IR_EQ) - asm_guardcc(as, CC_EQ); - else - emit_branch(as, ARMF_CC(ARMI_B, CC_EQ), l_end); - if (!irt_ispri(kt)) { - emit_nm(as, ARMF_CC(ARMI_CMP, CC_EQ)^k, tmp, key); - emit_nm(as, ARMI_CMP^khi, tmp+1, keyhi); - emit_lsox(as, ARMI_LDRD, tmp, dest, (int32_t)offsetof(Node, key)); - } else { - emit_n(as, ARMI_CMP^khi, tmp); - emit_lso(as, ARMI_LDR, tmp, dest, (int32_t)offsetof(Node, key.it)); - } - *l_loop = ARMF_CC(ARMI_B, CC_NE) | ((as->mcp-l_loop-2) & 0x00ffffffu); - - /* Load main position relative to tab->node into dest. */ - khash = irref_isk(refkey) ? ir_khash(irkey) : 1; - if (khash == 0) { - emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); - } else { - emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 3), dest, dest, tmp); - emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 1), tmp, tmp, tmp); - if (irt_isstr(kt)) { /* Fetch of str->hash is cheaper than ra_allock. */ - emit_dnm(as, ARMI_AND, tmp, tmp+1, RID_TMP); - emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); - emit_lso(as, ARMI_LDR, tmp+1, key, (int32_t)offsetof(GCstr, hash)); - emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask)); - } else if (irref_isk(refkey)) { - emit_opk(as, ARMI_AND, tmp, RID_TMP, (int32_t)khash, - rset_exclude(rset_exclude(RSET_GPR, tab), dest)); - emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); - emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask)); - } else { /* Must match with hash*() in lj_tab.c. */ - if (ra_hasreg(keynumhi)) { /* Canonicalize +-0.0 to 0.0. */ - if (keyhi == RID_TMP) - emit_dm(as, ARMF_CC(ARMI_MOV, CC_NE), keyhi, keynumhi); - emit_d(as, ARMF_CC(ARMI_MOV, CC_EQ)|ARMI_K12|0, keyhi); - } - emit_dnm(as, ARMI_AND, tmp, tmp, RID_TMP); - emit_dnm(as, ARMI_SUB|ARMF_SH(ARMSH_ROR, 32-HASH_ROT3), tmp, tmp, tmp+1); - emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); - emit_dnm(as, ARMI_EOR|ARMF_SH(ARMSH_ROR, 32-((HASH_ROT2+HASH_ROT1)&31)), - tmp, tmp+1, tmp); - emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask)); - emit_dnm(as, ARMI_SUB|ARMF_SH(ARMSH_ROR, 32-HASH_ROT1), tmp+1, tmp+1, tmp); - if (ra_hasreg(keynumhi)) { - emit_dnm(as, ARMI_EOR, tmp+1, tmp, key); - emit_dnm(as, ARMI_ORR|ARMI_S, RID_TMP, tmp, key); /* Test for +-0.0. */ - emit_dnm(as, ARMI_ADD, tmp, keynumhi, keynumhi); -#if !LJ_SOFTFP - emit_dnm(as, ARMI_VMOV_RR_D, key, keynumhi, - (ra_alloc1(as, refkey, RSET_FPR) & 15)); -#endif - } else { - emit_dnm(as, ARMI_EOR, tmp+1, tmp, key); - emit_opk(as, ARMI_ADD, tmp, key, (int32_t)HASH_BIAS, - rset_exclude(rset_exclude(RSET_GPR, tab), key)); - } - } - } -} - -static void asm_hrefk(ASMState *as, IRIns *ir) -{ - IRIns *kslot = IR(ir->op2); - IRIns *irkey = IR(kslot->op1); - int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); - int32_t kofs = ofs + (int32_t)offsetof(Node, key); - Reg dest = (ra_used(ir) || ofs > 4095) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; - Reg node = ra_alloc1(as, ir->op1, RSET_GPR); - Reg key = RID_NONE, type = RID_TMP, idx = node; - RegSet allow = rset_exclude(RSET_GPR, node); - lua_assert(ofs % sizeof(Node) == 0); - if (ofs > 4095) { - idx = dest; - rset_clear(allow, dest); - kofs = (int32_t)offsetof(Node, key); - } else if (ra_hasreg(dest)) { - emit_opk(as, ARMI_ADD, dest, node, ofs, allow); - } - asm_guardcc(as, CC_NE); - if (!irt_ispri(irkey->t)) { - RegSet even = (as->freeset & allow); - even = even & (even >> 1) & RSET_GPREVEN; - if (even) { - key = ra_scratch(as, even); - if (rset_test(as->freeset, key+1)) { - type = key+1; - ra_modified(as, type); - } - } else { - key = ra_scratch(as, allow); - } - rset_clear(allow, key); - } - rset_clear(allow, type); - if (irt_isnum(irkey->t)) { - emit_opk(as, ARMF_CC(ARMI_CMP, CC_EQ), 0, type, - (int32_t)ir_knum(irkey)->u32.hi, allow); - emit_opk(as, ARMI_CMP, 0, key, - (int32_t)ir_knum(irkey)->u32.lo, allow); - } else { - if (ra_hasreg(key)) - emit_opk(as, ARMF_CC(ARMI_CMP, CC_EQ), 0, key, irkey->i, allow); - emit_n(as, ARMI_CMN|ARMI_K12|-irt_toitype(irkey->t), type); - } - emit_lso(as, ARMI_LDR, type, idx, kofs+4); - if (ra_hasreg(key)) emit_lso(as, ARMI_LDR, key, idx, kofs); - if (ofs > 4095) - emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR); -} - -static void asm_uref(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - if (irref_isk(ir->op1)) { - GCfunc *fn = ir_kfunc(IR(ir->op1)); - MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; - emit_lsptr(as, ARMI_LDR, dest, v); - } else { - Reg uv = ra_scratch(as, RSET_GPR); - Reg func = ra_alloc1(as, ir->op1, RSET_GPR); - if (ir->o == IR_UREFC) { - asm_guardcc(as, CC_NE); - emit_n(as, ARMI_CMP|ARMI_K12|1, RID_TMP); - emit_opk(as, ARMI_ADD, dest, uv, - (int32_t)offsetof(GCupval, tv), RSET_GPR); - emit_lso(as, ARMI_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); - } else { - emit_lso(as, ARMI_LDR, dest, uv, (int32_t)offsetof(GCupval, v)); - } - emit_lso(as, ARMI_LDR, uv, func, - (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); - } -} - -static void asm_fref(ASMState *as, IRIns *ir) -{ - UNUSED(as); UNUSED(ir); - lua_assert(!ra_used(ir)); -} - -static void asm_strref(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - IRRef ref = ir->op2, refk = ir->op1; - Reg r; - if (irref_isk(ref)) { - IRRef tmp = refk; refk = ref; ref = tmp; - } else if (!irref_isk(refk)) { - uint32_t k, m = ARMI_K12|sizeof(GCstr); - Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); - IRIns *irr = IR(ir->op2); - if (ra_hasreg(irr->r)) { - ra_noweak(as, irr->r); - right = irr->r; - } else if (mayfuse(as, irr->op2) && - irr->o == IR_ADD && irref_isk(irr->op2) && - (k = emit_isk12(ARMI_ADD, - (int32_t)sizeof(GCstr) + IR(irr->op2)->i))) { - m = k; - right = ra_alloc1(as, irr->op1, rset_exclude(RSET_GPR, left)); - } else { - right = ra_allocref(as, ir->op2, rset_exclude(RSET_GPR, left)); - } - emit_dn(as, ARMI_ADD^m, dest, dest); - emit_dnm(as, ARMI_ADD, dest, left, right); - return; - } - r = ra_alloc1(as, ref, RSET_GPR); - emit_opk(as, ARMI_ADD, dest, r, - sizeof(GCstr) + IR(refk)->i, rset_exclude(RSET_GPR, r)); -} - -/* -- Loads and stores ---------------------------------------------------- */ - -static ARMIns asm_fxloadins(IRIns *ir) -{ - switch (irt_type(ir->t)) { - case IRT_I8: return ARMI_LDRSB; - case IRT_U8: return ARMI_LDRB; - case IRT_I16: return ARMI_LDRSH; - case IRT_U16: return ARMI_LDRH; - case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VLDR_D; - case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VLDR_S; - default: return ARMI_LDR; - } -} - -static ARMIns asm_fxstoreins(IRIns *ir) -{ - switch (irt_type(ir->t)) { - case IRT_I8: case IRT_U8: return ARMI_STRB; - case IRT_I16: case IRT_U16: return ARMI_STRH; - case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VSTR_D; - case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VSTR_S; - default: return ARMI_STR; - } -} - -static void asm_fload(ASMState *as, IRIns *ir) -{ - if (ir->op1 == REF_NIL) { - lua_assert(!ra_used(ir)); /* We can end up here if DCE is turned off. */ - } else { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); - ARMIns ai = asm_fxloadins(ir); - int32_t ofs; - if (ir->op2 == IRFL_TAB_ARRAY) { - ofs = asm_fuseabase(as, ir->op1); - if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ - emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx); - return; - } - } - ofs = field_ofs[ir->op2]; - if ((ai & 0x04000000)) - emit_lso(as, ai, dest, idx, ofs); - else - emit_lsox(as, ai, dest, idx, ofs); - } -} - -static void asm_fstore(ASMState *as, IRIns *ir) -{ - if (ir->r != RID_SINK) { - Reg src = ra_alloc1(as, ir->op2, RSET_GPR); - IRIns *irf = IR(ir->op1); - Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); - int32_t ofs = field_ofs[irf->op2]; - ARMIns ai = asm_fxstoreins(ir); - if ((ai & 0x04000000)) - emit_lso(as, ai, src, idx, ofs); - else - emit_lsox(as, ai, src, idx, ofs); - } -} - -static void asm_xload(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, - (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); - lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); - asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); -} - -static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) -{ - if (ir->r != RID_SINK) { - Reg src = ra_alloc1(as, ir->op2, - (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); - asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, - rset_exclude(RSET_GPR, src), ofs); - } -} - -#define asm_xstore(as, ir) asm_xstore_(as, ir, 0) - -static void asm_ahuvload(ASMState *as, IRIns *ir) -{ - int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); - IRType t = hiop ? IRT_NUM : irt_type(ir->t); - Reg dest = RID_NONE, type = RID_NONE, idx; - RegSet allow = RSET_GPR; - int32_t ofs = 0; - if (hiop && ra_used(ir+1)) { - type = ra_dest(as, ir+1, allow); - rset_clear(allow, type); - } - if (ra_used(ir)) { - lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || - irt_isint(ir->t) || irt_isaddr(ir->t)); - dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); - rset_clear(allow, dest); - } - idx = asm_fuseahuref(as, ir->op1, &ofs, allow, - (!LJ_SOFTFP && t == IRT_NUM) ? 1024 : 4096); - if (!hiop || type == RID_NONE) { - rset_clear(allow, idx); - if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 && - rset_test((as->freeset & allow), dest+1)) { - type = dest+1; - ra_modified(as, type); - } else { - type = RID_TMP; - } - } - asm_guardcc(as, t == IRT_NUM ? CC_HS : CC_NE); - emit_n(as, ARMI_CMN|ARMI_K12|-irt_toitype_(t), type); - if (ra_hasreg(dest)) { -#if !LJ_SOFTFP - if (t == IRT_NUM) - emit_vlso(as, ARMI_VLDR_D, dest, idx, ofs); - else -#endif - emit_lso(as, ARMI_LDR, dest, idx, ofs); - } - emit_lso(as, ARMI_LDR, type, idx, ofs+4); -} - -static void asm_ahustore(ASMState *as, IRIns *ir) -{ - if (ir->r != RID_SINK) { - RegSet allow = RSET_GPR; - Reg idx, src = RID_NONE, type = RID_NONE; - int32_t ofs = 0; -#if !LJ_SOFTFP - if (irt_isnum(ir->t)) { - src = ra_alloc1(as, ir->op2, RSET_FPR); - idx = asm_fuseahuref(as, ir->op1, &ofs, allow, 1024); - emit_vlso(as, ARMI_VSTR_D, src, idx, ofs); - } else -#endif - { - int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); - if (!irt_ispri(ir->t)) { - src = ra_alloc1(as, ir->op2, allow); - rset_clear(allow, src); - } - if (hiop) - type = ra_alloc1(as, (ir+1)->op2, allow); - else - type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); - idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type), 4096); - if (ra_hasreg(src)) emit_lso(as, ARMI_STR, src, idx, ofs); - emit_lso(as, ARMI_STR, type, idx, ofs+4); - } - } -} - -static void asm_sload(ASMState *as, IRIns *ir) -{ - int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); - int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); - IRType t = hiop ? IRT_NUM : irt_type(ir->t); - Reg dest = RID_NONE, type = RID_NONE, base; - RegSet allow = RSET_GPR; - lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ - lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); -#if LJ_SOFTFP - lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ - if (hiop && ra_used(ir+1)) { - type = ra_dest(as, ir+1, allow); - rset_clear(allow, type); - } -#else - if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(ir->t) && t == IRT_INT) { - dest = ra_scratch(as, RSET_FPR); - asm_tointg(as, ir, dest); - t = IRT_NUM; /* Continue with a regular number type check. */ - } else -#endif - if (ra_used(ir)) { - Reg tmp = RID_NONE; - if ((ir->op2 & IRSLOAD_CONVERT)) - tmp = ra_scratch(as, t == IRT_INT ? RSET_FPR : RSET_GPR); - lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || - irt_isint(ir->t) || irt_isaddr(ir->t)); - dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); - rset_clear(allow, dest); - base = ra_alloc1(as, REF_BASE, allow); - if ((ir->op2 & IRSLOAD_CONVERT)) { - if (t == IRT_INT) { - emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); - emit_dm(as, ARMI_VCVT_S32_F64, (tmp & 15), (tmp & 15)); - t = IRT_NUM; /* Check for original type. */ - } else { - emit_dm(as, ARMI_VCVT_F64_S32, (dest & 15), (dest & 15)); - emit_dn(as, ARMI_VMOV_S_R, tmp, (dest & 15)); - t = IRT_INT; /* Check for original type. */ - } - dest = tmp; - } - goto dotypecheck; - } - base = ra_alloc1(as, REF_BASE, allow); -dotypecheck: - rset_clear(allow, base); - if ((ir->op2 & IRSLOAD_TYPECHECK)) { - if (ra_noreg(type)) { - if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 && - rset_test((as->freeset & allow), dest+1)) { - type = dest+1; - ra_modified(as, type); - } else { - type = RID_TMP; - } - } - asm_guardcc(as, t == IRT_NUM ? CC_HS : CC_NE); - emit_n(as, ARMI_CMN|ARMI_K12|-irt_toitype_(t), type); - } - if (ra_hasreg(dest)) { -#if !LJ_SOFTFP - if (t == IRT_NUM) { - if (ofs < 1024) { - emit_vlso(as, ARMI_VLDR_D, dest, base, ofs); - } else { - if (ra_hasreg(type)) emit_lso(as, ARMI_LDR, type, base, ofs+4); - emit_vlso(as, ARMI_VLDR_D, dest, RID_TMP, 0); - emit_opk(as, ARMI_ADD, RID_TMP, base, ofs, allow); - return; - } - } else -#endif - emit_lso(as, ARMI_LDR, dest, base, ofs); - } - if (ra_hasreg(type)) emit_lso(as, ARMI_LDR, type, base, ofs+4); -} - -/* -- Allocations --------------------------------------------------------- */ - -#if LJ_HASFFI -static void asm_cnew(ASMState *as, IRIns *ir) -{ - CTState *cts = ctype_ctsG(J2G(as->J)); - CTypeID id = (CTypeID)IR(ir->op1)->i; - CTSize sz; - CTInfo info = lj_ctype_info(cts, id, &sz); - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; - IRRef args[4]; - RegSet allow = (RSET_GPR & ~RSET_SCRATCH); - RegSet drop = RSET_SCRATCH; - lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); - - as->gcsteps++; - if (ra_hasreg(ir->r)) - rset_clear(drop, ir->r); /* Dest reg handled below. */ - ra_evictset(as, drop); - if (ra_used(ir)) - ra_destreg(as, ir, RID_RET); /* GCcdata * */ - - /* Initialize immutable cdata object. */ - if (ir->o == IR_CNEWI) { - int32_t ofs = sizeof(GCcdata); - lua_assert(sz == 4 || sz == 8); - if (sz == 8) { - ofs += 4; ir++; - lua_assert(ir->o == IR_HIOP); - } - for (;;) { - Reg r = ra_alloc1(as, ir->op2, allow); - emit_lso(as, ARMI_STR, r, RID_RET, ofs); - rset_clear(allow, r); - if (ofs == sizeof(GCcdata)) break; - ofs -= 4; ir--; - } - } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ - ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ir->op1; /* CTypeID id */ - args[2] = ir->op2; /* CTSize sz */ - args[3] = ASMREF_TMP1; /* CTSize align */ - asm_gencall(as, ci, args); - emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); - return; - } - - /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ - { - uint32_t k = emit_isk12(ARMI_MOV, id); - Reg r = k ? RID_R1 : ra_allock(as, id, allow); - emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); - emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); - emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP); - if (k) emit_d(as, ARMI_MOV^k, RID_R1); - } - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ASMREF_TMP1; /* MSize size */ - asm_gencall(as, ci, args); - ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), - ra_releasetmp(as, ASMREF_TMP1)); -} -#else -#define asm_cnew(as, ir) ((void)0) -#endif - -/* -- Write barriers ------------------------------------------------------ */ - -static void asm_tbar(ASMState *as, IRIns *ir) -{ - Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); - Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab)); - Reg gr = ra_allock(as, i32ptr(J2G(as->J)), - rset_exclude(rset_exclude(RSET_GPR, tab), link)); - Reg mark = RID_TMP; - MCLabel l_end = emit_label(as); - emit_lso(as, ARMI_STR, link, tab, (int32_t)offsetof(GCtab, gclist)); - emit_lso(as, ARMI_STRB, mark, tab, (int32_t)offsetof(GCtab, marked)); - emit_lso(as, ARMI_STR, tab, gr, - (int32_t)offsetof(global_State, gc.grayagain)); - emit_dn(as, ARMI_BIC|ARMI_K12|LJ_GC_BLACK, mark, mark); - emit_lso(as, ARMI_LDR, link, gr, - (int32_t)offsetof(global_State, gc.grayagain)); - emit_branch(as, ARMF_CC(ARMI_B, CC_EQ), l_end); - emit_n(as, ARMI_TST|ARMI_K12|LJ_GC_BLACK, mark); - emit_lso(as, ARMI_LDRB, mark, tab, (int32_t)offsetof(GCtab, marked)); -} - -static void asm_obar(ASMState *as, IRIns *ir) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; - IRRef args[2]; - MCLabel l_end; - Reg obj, val, tmp; - /* No need for other object barriers (yet). */ - lua_assert(IR(ir->op1)->o == IR_UREFC); - ra_evictset(as, RSET_SCRATCH); - l_end = emit_label(as); - args[0] = ASMREF_TMP1; /* global_State *g */ - args[1] = ir->op1; /* TValue *tv */ - asm_gencall(as, ci, args); - if ((l_end[-1] >> 28) == CC_AL) - l_end[-1] = ARMF_CC(l_end[-1], CC_NE); - else - emit_branch(as, ARMF_CC(ARMI_B, CC_EQ), l_end); - ra_allockreg(as, i32ptr(J2G(as->J)), ra_releasetmp(as, ASMREF_TMP1)); - obj = IR(ir->op1)->r; - tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); - emit_n(as, ARMF_CC(ARMI_TST, CC_NE)|ARMI_K12|LJ_GC_BLACK, tmp); - emit_n(as, ARMI_TST|ARMI_K12|LJ_GC_WHITES, RID_TMP); - val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); - emit_lso(as, ARMI_LDRB, tmp, obj, - (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); - emit_lso(as, ARMI_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked)); -} - -/* -- Arithmetic and logic operations ------------------------------------- */ - -#if !LJ_SOFTFP -static void asm_fparith(ASMState *as, IRIns *ir, ARMIns ai) -{ - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = (left >> 8); left &= 255; - emit_dnm(as, ai, (dest & 15), (left & 15), (right & 15)); -} - -static void asm_fpunary(ASMState *as, IRIns *ir, ARMIns ai) -{ - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); - emit_dm(as, ai, (dest & 15), (left & 15)); -} - -static void asm_callround(ASMState *as, IRIns *ir, int id) -{ - /* The modified regs must match with the *.dasc implementation. */ - RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)| - RID2RSET(RID_R3)|RID2RSET(RID_R12); - RegSet of; - Reg dest, src; - ra_evictset(as, drop); - dest = ra_dest(as, ir, RSET_FPR); - emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15)); - emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf : - id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf : - (void *)lj_vm_trunc_sf); - /* Workaround to protect argument GPRs from being used for remat. */ - of = as->freeset; - as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1); - as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L); - src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */ - as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1)); - emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15)); -} - -static void asm_fpmath(ASMState *as, IRIns *ir) -{ - if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) - return; - if (ir->op2 <= IRFPM_TRUNC) - asm_callround(as, ir, ir->op2); - else if (ir->op2 == IRFPM_SQRT) - asm_fpunary(as, ir, ARMI_VSQRT_D); - else - asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); -} -#endif - -static int asm_swapops(ASMState *as, IRRef lref, IRRef rref) -{ - IRIns *ir; - if (irref_isk(rref)) - return 0; /* Don't swap constants to the left. */ - if (irref_isk(lref)) - return 1; /* But swap constants to the right. */ - ir = IR(rref); - if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) || - (ir->o == IR_ADD && ir->op1 == ir->op2)) - return 0; /* Don't swap fusable operands to the left. */ - ir = IR(lref); - if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) || - (ir->o == IR_ADD && ir->op1 == ir->op2)) - return 1; /* But swap fusable operands to the right. */ - return 0; /* Otherwise don't swap. */ -} - -static void asm_intop(ASMState *as, IRIns *ir, ARMIns ai) -{ - IRRef lref = ir->op1, rref = ir->op2; - Reg left, dest = ra_dest(as, ir, RSET_GPR); - uint32_t m; - if (asm_swapops(as, lref, rref)) { - IRRef tmp = lref; lref = rref; rref = tmp; - if ((ai & ~ARMI_S) == ARMI_SUB || (ai & ~ARMI_S) == ARMI_SBC) - ai ^= (ARMI_SUB^ARMI_RSB); - } - left = ra_hintalloc(as, lref, dest, RSET_GPR); - m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left)); - if (irt_isguard(ir->t)) { /* For IR_ADDOV etc. */ - asm_guardcc(as, CC_VS); - ai |= ARMI_S; - } - emit_dn(as, ai^m, dest, left); -} - -static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai) -{ - if (as->flagmcp == as->mcp) { /* Drop cmp r, #0. */ - as->flagmcp = NULL; - as->mcp++; - ai |= ARMI_S; - } - asm_intop(as, ir, ai); -} - -static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - emit_dn(as, ai|ARMI_K12|0, dest, left); -} - -/* NYI: use add/shift for MUL(OV) with constants. FOLD only does 2^k. */ -static void asm_intmul(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest)); - Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - Reg tmp = RID_NONE; - /* ARMv5 restriction: dest != left and dest_hi != left. */ - if (dest == left && left != right) { left = right; right = dest; } - if (irt_isguard(ir->t)) { /* IR_MULOV */ - if (!(as->flags & JIT_F_ARMV6) && dest == left) - tmp = left = ra_scratch(as, rset_exclude(RSET_GPR, left)); - asm_guardcc(as, CC_NE); - emit_nm(as, ARMI_TEQ|ARMF_SH(ARMSH_ASR, 31), RID_TMP, dest); - emit_dnm(as, ARMI_SMULL|ARMF_S(right), dest, RID_TMP, left); - } else { - if (!(as->flags & JIT_F_ARMV6) && dest == left) tmp = left = RID_TMP; - emit_nm(as, ARMI_MUL|ARMF_S(right), dest, left); - } - /* Only need this for the dest == left == right case. */ - if (ra_hasreg(tmp)) emit_dm(as, ARMI_MOV, tmp, right); -} - -static void asm_add(ASMState *as, IRIns *ir) -{ -#if !LJ_SOFTFP - if (irt_isnum(ir->t)) { - if (!asm_fusemadd(as, ir, ARMI_VMLA_D, ARMI_VMLA_D)) - asm_fparith(as, ir, ARMI_VADD_D); - return; - } -#endif - asm_intop_s(as, ir, ARMI_ADD); -} - -static void asm_sub(ASMState *as, IRIns *ir) -{ -#if !LJ_SOFTFP - if (irt_isnum(ir->t)) { - if (!asm_fusemadd(as, ir, ARMI_VNMLS_D, ARMI_VMLS_D)) - asm_fparith(as, ir, ARMI_VSUB_D); - return; - } -#endif - asm_intop_s(as, ir, ARMI_SUB); -} - -static void asm_mul(ASMState *as, IRIns *ir) -{ -#if !LJ_SOFTFP - if (irt_isnum(ir->t)) { - asm_fparith(as, ir, ARMI_VMUL_D); - return; - } -#endif - asm_intmul(as, ir); -} - -#define asm_addov(as, ir) asm_add(as, ir) -#define asm_subov(as, ir) asm_sub(as, ir) -#define asm_mulov(as, ir) asm_mul(as, ir) - -#if !LJ_SOFTFP -#define asm_div(as, ir) asm_fparith(as, ir, ARMI_VDIV_D) -#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi) -#define asm_abs(as, ir) asm_fpunary(as, ir, ARMI_VABS_D) -#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) -#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) -#endif - -#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi) - -static void asm_neg(ASMState *as, IRIns *ir) -{ -#if !LJ_SOFTFP - if (irt_isnum(ir->t)) { - asm_fpunary(as, ir, ARMI_VNEG_D); - return; - } -#endif - asm_intneg(as, ir, ARMI_RSB); -} - -static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai) -{ - if (as->flagmcp == as->mcp) { /* Try to drop cmp r, #0. */ - uint32_t cc = (as->mcp[1] >> 28); - as->flagmcp = NULL; - if (cc <= CC_NE) { - as->mcp++; - ai |= ARMI_S; - } else if (cc == CC_GE) { - *++as->mcp ^= ((CC_GE^CC_PL) << 28); - ai |= ARMI_S; - } else if (cc == CC_LT) { - *++as->mcp ^= ((CC_LT^CC_MI) << 28); - ai |= ARMI_S; - } /* else: other conds don't work with bit ops. */ - } - if (ir->op2 == 0) { - Reg dest = ra_dest(as, ir, RSET_GPR); - uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR); - emit_d(as, ai^m, dest); - } else { - /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */ - asm_intop(as, ir, ai); - } -} - -#define asm_bnot(as, ir) asm_bitop(as, ir, ARMI_MVN) - -static void asm_bswap(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - if ((as->flags & JIT_F_ARMV6)) { - emit_dm(as, ARMI_REV, dest, left); - } else { - Reg tmp2 = dest; - if (tmp2 == left) - tmp2 = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, dest), left)); - emit_dnm(as, ARMI_EOR|ARMF_SH(ARMSH_LSR, 8), dest, tmp2, RID_TMP); - emit_dm(as, ARMI_MOV|ARMF_SH(ARMSH_ROR, 8), tmp2, left); - emit_dn(as, ARMI_BIC|ARMI_K12|256*8|255, RID_TMP, RID_TMP); - emit_dnm(as, ARMI_EOR|ARMF_SH(ARMSH_ROR, 16), RID_TMP, left, left); - } -} - -#define asm_band(as, ir) asm_bitop(as, ir, ARMI_AND) -#define asm_bor(as, ir) asm_bitop(as, ir, ARMI_ORR) -#define asm_bxor(as, ir) asm_bitop(as, ir, ARMI_EOR) - -static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) -{ - if (irref_isk(ir->op2)) { /* Constant shifts. */ - /* NYI: Turn SHL+SHR or BAND+SHR into uxtb, uxth or ubfx. */ - /* NYI: Turn SHL+ASR into sxtb, sxth or sbfx. */ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - int32_t shift = (IR(ir->op2)->i & 31); - emit_dm(as, ARMI_MOV|ARMF_SH(sh, shift), dest, left); - } else { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_dm(as, ARMI_MOV|ARMF_RSH(sh, right), dest, left); - } -} - -#define asm_bshl(as, ir) asm_bitshift(as, ir, ARMSH_LSL) -#define asm_bshr(as, ir) asm_bitshift(as, ir, ARMSH_LSR) -#define asm_bsar(as, ir) asm_bitshift(as, ir, ARMSH_ASR) -#define asm_bror(as, ir) asm_bitshift(as, ir, ARMSH_ROR) -#define asm_brol(as, ir) lua_assert(0) - -static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) -{ - uint32_t kcmp = 0, kmov = 0; - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - Reg right = 0; - if (irref_isk(ir->op2)) { - kcmp = emit_isk12(ARMI_CMP, IR(ir->op2)->i); - if (kcmp) kmov = emit_isk12(ARMI_MOV, IR(ir->op2)->i); - } - if (!kmov) { - kcmp = 0; - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - } - if (kmov || dest != right) { - emit_dm(as, ARMF_CC(ARMI_MOV, cc)^kmov, dest, right); - cc ^= 1; /* Must use opposite conditions for paired moves. */ - } else { - cc ^= (CC_LT^CC_GT); /* Otherwise may swap CC_LT <-> CC_GT. */ - } - if (dest != left) emit_dm(as, ARMF_CC(ARMI_MOV, cc), dest, left); - emit_nm(as, ARMI_CMP^kcmp, left, right); -} - -#if LJ_SOFTFP -static void asm_sfpmin_max(ASMState *as, IRIns *ir, int cc) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; - RegSet drop = RSET_SCRATCH; - Reg r; - IRRef args[4]; - args[0] = ir->op1; args[1] = (ir+1)->op1; - args[2] = ir->op2; args[3] = (ir+1)->op2; - /* __aeabi_cdcmple preserves r0-r3. */ - if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); - if (ra_hasreg((ir+1)->r)) rset_clear(drop, (ir+1)->r); - if (!rset_test(as->freeset, RID_R2) && - regcost_ref(as->cost[RID_R2]) == args[2]) rset_clear(drop, RID_R2); - if (!rset_test(as->freeset, RID_R3) && - regcost_ref(as->cost[RID_R3]) == args[3]) rset_clear(drop, RID_R3); - ra_evictset(as, drop); - ra_destpair(as, ir); - emit_dm(as, ARMF_CC(ARMI_MOV, cc), RID_RETHI, RID_R3); - emit_dm(as, ARMF_CC(ARMI_MOV, cc), RID_RETLO, RID_R2); - emit_call(as, (void *)ci->func); - for (r = RID_R0; r <= RID_R3; r++) - ra_leftov(as, r, args[r-RID_R0]); -} -#else -static void asm_fpmin_max(ASMState *as, IRIns *ir, int cc) -{ - Reg dest = (ra_dest(as, ir, RSET_FPR) & 15); - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = ((left >> 8) & 15); left &= 15; - if (dest != left) emit_dm(as, ARMF_CC(ARMI_VMOV_D, cc^1), dest, left); - if (dest != right) emit_dm(as, ARMF_CC(ARMI_VMOV_D, cc), dest, right); - emit_d(as, ARMI_VMRS, 0); - emit_dm(as, ARMI_VCMP_D, left, right); -} -#endif - -static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc) -{ -#if LJ_SOFTFP - UNUSED(fcc); -#else - if (irt_isnum(ir->t)) - asm_fpmin_max(as, ir, fcc); - else -#endif - asm_intmin_max(as, ir, cc); -} - -#define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_HI) -#define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LO) - -/* -- Comparisons --------------------------------------------------------- */ - -/* Map of comparisons to flags. ORDER IR. */ -static const uint8_t asm_compmap[IR_ABC+1] = { - /* op FP swp int cc FP cc */ - /* LT */ CC_GE + (CC_HS << 4), - /* GE x */ CC_LT + (CC_HI << 4), - /* LE */ CC_GT + (CC_HI << 4), - /* GT x */ CC_LE + (CC_HS << 4), - /* ULT x */ CC_HS + (CC_LS << 4), - /* UGE */ CC_LO + (CC_LO << 4), - /* ULE x */ CC_HI + (CC_LO << 4), - /* UGT */ CC_LS + (CC_LS << 4), - /* EQ */ CC_NE + (CC_NE << 4), - /* NE */ CC_EQ + (CC_EQ << 4), - /* ABC */ CC_LS + (CC_LS << 4) /* Same as UGT. */ -}; - -#if LJ_SOFTFP -/* FP comparisons. */ -static void asm_sfpcomp(ASMState *as, IRIns *ir) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; - RegSet drop = RSET_SCRATCH; - Reg r; - IRRef args[4]; - int swp = (((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1) << 1); - args[swp^0] = ir->op1; args[swp^1] = (ir+1)->op1; - args[swp^2] = ir->op2; args[swp^3] = (ir+1)->op2; - /* __aeabi_cdcmple preserves r0-r3. This helps to reduce spills. */ - for (r = RID_R0; r <= RID_R3; r++) - if (!rset_test(as->freeset, r) && - regcost_ref(as->cost[r]) == args[r-RID_R0]) rset_clear(drop, r); - ra_evictset(as, drop); - asm_guardcc(as, (asm_compmap[ir->o] >> 4)); - emit_call(as, (void *)ci->func); - for (r = RID_R0; r <= RID_R3; r++) - ra_leftov(as, r, args[r-RID_R0]); -} -#else -/* FP comparisons. */ -static void asm_fpcomp(ASMState *as, IRIns *ir) -{ - Reg left, right; - ARMIns ai; - int swp = ((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1); - if (!swp && irref_isk(ir->op2) && ir_knum(IR(ir->op2))->u64 == 0) { - left = (ra_alloc1(as, ir->op1, RSET_FPR) & 15); - right = 0; - ai = ARMI_VCMPZ_D; - } else { - left = ra_alloc2(as, ir, RSET_FPR); - if (swp) { - right = (left & 15); left = ((left >> 8) & 15); - } else { - right = ((left >> 8) & 15); left &= 15; - } - ai = ARMI_VCMP_D; - } - asm_guardcc(as, (asm_compmap[ir->o] >> 4)); - emit_d(as, ARMI_VMRS, 0); - emit_dm(as, ai, left, right); -} -#endif - -/* Integer comparisons. */ -static void asm_intcomp(ASMState *as, IRIns *ir) -{ - ARMCC cc = (asm_compmap[ir->o] & 15); - IRRef lref = ir->op1, rref = ir->op2; - Reg left; - uint32_t m; - int cmpprev0 = 0; - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)); - if (asm_swapops(as, lref, rref)) { - Reg tmp = lref; lref = rref; rref = tmp; - if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */ - else if (cc > CC_NE) cc ^= 11; /* LO <-> HI, LS <-> HS */ - } - if (irref_isk(rref) && IR(rref)->i == 0) { - IRIns *irl = IR(lref); - cmpprev0 = (irl+1 == ir); - /* Combine comp(BAND(left, right), 0) into tst left, right. */ - if (cmpprev0 && irl->o == IR_BAND && !ra_used(irl)) { - IRRef blref = irl->op1, brref = irl->op2; - uint32_t m2 = 0; - Reg bleft; - if (asm_swapops(as, blref, brref)) { - Reg tmp = blref; blref = brref; brref = tmp; - } - if (irref_isk(brref)) { - m2 = emit_isk12(ARMI_AND, IR(brref)->i); - if ((m2 & (ARMI_AND^ARMI_BIC))) - goto notst; /* Not beneficial if we miss a constant operand. */ - } - if (cc == CC_GE) cc = CC_PL; - else if (cc == CC_LT) cc = CC_MI; - else if (cc > CC_NE) goto notst; /* Other conds don't work with tst. */ - bleft = ra_alloc1(as, blref, RSET_GPR); - if (!m2) m2 = asm_fuseopm(as, 0, brref, rset_exclude(RSET_GPR, bleft)); - asm_guardcc(as, cc); - emit_n(as, ARMI_TST^m2, bleft); - return; - } - } -notst: - left = ra_alloc1(as, lref, RSET_GPR); - m = asm_fuseopm(as, ARMI_CMP, rref, rset_exclude(RSET_GPR, left)); - asm_guardcc(as, cc); - emit_n(as, ARMI_CMP^m, left); - /* Signed comparison with zero and referencing previous ins? */ - if (cmpprev0 && (cc <= CC_NE || cc >= CC_GE)) - as->flagmcp = as->mcp; /* Allow elimination of the compare. */ -} - -static void asm_comp(ASMState *as, IRIns *ir) -{ -#if !LJ_SOFTFP - if (irt_isnum(ir->t)) - asm_fpcomp(as, ir); - else -#endif - asm_intcomp(as, ir); -} - -#define asm_equal(as, ir) asm_comp(as, ir) - -#if LJ_HASFFI -/* 64 bit integer comparisons. */ -static void asm_int64comp(ASMState *as, IRIns *ir) -{ - int signedcomp = (ir->o <= IR_GT); - ARMCC cclo, cchi; - Reg leftlo, lefthi; - uint32_t mlo, mhi; - RegSet allow = RSET_GPR, oldfree; - - /* Always use unsigned comparison for loword. */ - cclo = asm_compmap[ir->o + (signedcomp ? 4 : 0)] & 15; - leftlo = ra_alloc1(as, ir->op1, allow); - oldfree = as->freeset; - mlo = asm_fuseopm(as, ARMI_CMP, ir->op2, rset_clear(allow, leftlo)); - allow &= ~(oldfree & ~as->freeset); /* Update for allocs of asm_fuseopm. */ - - /* Use signed or unsigned comparison for hiword. */ - cchi = asm_compmap[ir->o] & 15; - lefthi = ra_alloc1(as, (ir+1)->op1, allow); - mhi = asm_fuseopm(as, ARMI_CMP, (ir+1)->op2, rset_clear(allow, lefthi)); - - /* All register allocations must be performed _before_ this point. */ - if (signedcomp) { - MCLabel l_around = emit_label(as); - asm_guardcc(as, cclo); - emit_n(as, ARMI_CMP^mlo, leftlo); - emit_branch(as, ARMF_CC(ARMI_B, CC_NE), l_around); - if (cchi == CC_GE || cchi == CC_LE) cchi ^= 6; /* GE -> GT, LE -> LT */ - asm_guardcc(as, cchi); - } else { - asm_guardcc(as, cclo); - emit_n(as, ARMF_CC(ARMI_CMP, CC_EQ)^mlo, leftlo); - } - emit_n(as, ARMI_CMP^mhi, lefthi); -} -#endif - -/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ - -/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ -static void asm_hiop(ASMState *as, IRIns *ir) -{ -#if LJ_HASFFI || LJ_SOFTFP - /* HIOP is marked as a store because it needs its own DCE logic. */ - int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ - if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; - if ((ir-1)->o <= IR_NE) { /* 64 bit integer or FP comparisons. ORDER IR. */ - as->curins--; /* Always skip the loword comparison. */ -#if LJ_SOFTFP - if (!irt_isint(ir->t)) { - asm_sfpcomp(as, ir-1); - return; - } -#endif -#if LJ_HASFFI - asm_int64comp(as, ir-1); -#endif - return; -#if LJ_SOFTFP - } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { - as->curins--; /* Always skip the loword min/max. */ - if (uselo || usehi) - asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO); - return; -#elif LJ_HASFFI - } else if ((ir-1)->o == IR_CONV) { - as->curins--; /* Always skip the CONV. */ - if (usehi || uselo) - asm_conv64(as, ir); - return; -#endif - } else if ((ir-1)->o == IR_XSTORE) { - if ((ir-1)->r != RID_SINK) - asm_xstore_(as, ir, 4); - return; - } - if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ - switch ((ir-1)->o) { -#if LJ_HASFFI - case IR_ADD: - as->curins--; - asm_intop(as, ir, ARMI_ADC); - asm_intop(as, ir-1, ARMI_ADD|ARMI_S); - break; - case IR_SUB: - as->curins--; - asm_intop(as, ir, ARMI_SBC); - asm_intop(as, ir-1, ARMI_SUB|ARMI_S); - break; - case IR_NEG: - as->curins--; - asm_intneg(as, ir, ARMI_RSC); - asm_intneg(as, ir-1, ARMI_RSB|ARMI_S); - break; -#endif -#if LJ_SOFTFP - case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: - case IR_STRTO: - if (!uselo) - ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ - break; -#endif - case IR_CALLN: - case IR_CALLS: - case IR_CALLXS: - if (!uselo) - ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ - break; -#if LJ_SOFTFP - case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: -#endif - case IR_CNEWI: - /* Nothing to do here. Handled by lo op itself. */ - break; - default: lua_assert(0); break; - } -#else - UNUSED(as); UNUSED(ir); lua_assert(0); -#endif -} - -/* -- Profiling ----------------------------------------------------------- */ - -static void asm_prof(ASMState *as, IRIns *ir) -{ - UNUSED(ir); - asm_guardcc(as, CC_NE); - emit_n(as, ARMI_TST|ARMI_K12|HOOK_PROFILE, RID_TMP); - emit_lsptr(as, ARMI_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask); -} - -/* -- Stack handling ------------------------------------------------------ */ - -/* Check Lua stack size for overflow. Use exit handler as fallback. */ -static void asm_stack_check(ASMState *as, BCReg topslot, - IRIns *irp, RegSet allow, ExitNo exitno) -{ - Reg pbase; - uint32_t k; - if (irp) { - if (!ra_hasspill(irp->s)) { - pbase = irp->r; - lua_assert(ra_hasreg(pbase)); - } else if (allow) { - pbase = rset_pickbot(allow); - } else { - pbase = RID_RET; - emit_lso(as, ARMI_LDR, RID_RET, RID_SP, 0); /* Restore temp. register. */ - } - } else { - pbase = RID_BASE; - } - emit_branch(as, ARMF_CC(ARMI_BL, CC_LS), exitstub_addr(as->J, exitno)); - k = emit_isk12(0, (int32_t)(8*topslot)); - lua_assert(k); - emit_n(as, ARMI_CMP^k, RID_TMP); - emit_dnm(as, ARMI_SUB, RID_TMP, RID_TMP, pbase); - emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, - (int32_t)offsetof(lua_State, maxstack)); - if (irp) { /* Must not spill arbitrary registers in head of side trace. */ - int32_t i = i32ptr(&J2G(as->J)->cur_L); - if (ra_hasspill(irp->s)) - emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s)); - emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095)); - if (ra_hasspill(irp->s) && !allow) - emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */ - emit_loadi(as, RID_TMP, (i & ~4095)); - } else { - emit_getgl(as, RID_TMP, cur_L); - } -} - -/* Restore Lua stack from on-trace state. */ -static void asm_stack_restore(ASMState *as, SnapShot *snap) -{ - SnapEntry *map = &as->T->snapmap[snap->mapofs]; - SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1]; - MSize n, nent = snap->nent; - /* Store the value of all modified slots to the Lua stack. */ - for (n = 0; n < nent; n++) { - SnapEntry sn = map[n]; - BCReg s = snap_slot(sn); - int32_t ofs = 8*((int32_t)s-1); - IRRef ref = snap_ref(sn); - IRIns *ir = IR(ref); - if ((sn & SNAP_NORESTORE)) - continue; - if (irt_isnum(ir->t)) { -#if LJ_SOFTFP - RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); - Reg tmp; - lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */ - tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, - rset_exclude(RSET_GPREVEN, RID_BASE)); - emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs); - if (rset_test(as->freeset, tmp+1)) odd = RID2RSET(tmp+1); - tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, odd); - emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs+4); -#else - Reg src = ra_alloc1(as, ref, RSET_FPR); - emit_vlso(as, ARMI_VSTR_D, src, RID_BASE, ofs); -#endif - } else { - RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); - Reg type; - lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); - if (!irt_ispri(ir->t)) { - Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPREVEN, RID_BASE)); - emit_lso(as, ARMI_STR, src, RID_BASE, ofs); - if (rset_test(as->freeset, src+1)) odd = RID2RSET(src+1); - } - if ((sn & (SNAP_CONT|SNAP_FRAME))) { - if (s == 0) continue; /* Do not overwrite link to previous frame. */ - type = ra_allock(as, (int32_t)(*flinks--), odd); -#if LJ_SOFTFP - } else if ((sn & SNAP_SOFTFPNUM)) { - type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPRODD, RID_BASE)); -#endif - } else { - type = ra_allock(as, (int32_t)irt_toitype(ir->t), odd); - } - emit_lso(as, ARMI_STR, type, RID_BASE, ofs+4); - } - checkmclim(as); - } - lua_assert(map + nent == flinks); -} - -/* -- GC handling --------------------------------------------------------- */ - -/* Check GC threshold and do one or more GC steps. */ -static void asm_gc_check(ASMState *as) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; - IRRef args[2]; - MCLabel l_end; - Reg tmp1, tmp2; - ra_evictset(as, RSET_SCRATCH); - l_end = emit_label(as); - /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ - asm_guardcc(as, CC_NE); /* Assumes asm_snap_prep() already done. */ - emit_n(as, ARMI_CMP|ARMI_K12|0, RID_RET); - args[0] = ASMREF_TMP1; /* global_State *g */ - args[1] = ASMREF_TMP2; /* MSize steps */ - asm_gencall(as, ci, args); - tmp1 = ra_releasetmp(as, ASMREF_TMP1); - tmp2 = ra_releasetmp(as, ASMREF_TMP2); - emit_loadi(as, tmp2, as->gcsteps); - /* Jump around GC step if GC total < GC threshold. */ - emit_branch(as, ARMF_CC(ARMI_B, CC_LS), l_end); - emit_nm(as, ARMI_CMP, RID_TMP, tmp2); - emit_lso(as, ARMI_LDR, tmp2, tmp1, - (int32_t)offsetof(global_State, gc.threshold)); - emit_lso(as, ARMI_LDR, RID_TMP, tmp1, - (int32_t)offsetof(global_State, gc.total)); - ra_allockreg(as, i32ptr(J2G(as->J)), tmp1); - as->gcsteps = 0; - checkmclim(as); -} - -/* -- Loop handling ------------------------------------------------------- */ - -/* Fixup the loop branch. */ -static void asm_loop_fixup(ASMState *as) -{ - MCode *p = as->mctop; - MCode *target = as->mcp; - if (as->loopinv) { /* Inverted loop branch? */ - /* asm_guardcc already inverted the bcc and patched the final bl. */ - p[-2] |= ((uint32_t)(target-p) & 0x00ffffffu); - } else { - p[-1] = ARMI_B | ((uint32_t)((target-p)-1) & 0x00ffffffu); - } -} - -/* -- Head of trace ------------------------------------------------------- */ - -/* Reload L register from g->cur_L. */ -static void asm_head_lreg(ASMState *as) -{ - IRIns *ir = IR(ASMREF_L); - if (ra_used(ir)) { - Reg r = ra_dest(as, ir, RSET_GPR); - emit_getgl(as, r, cur_L); - ra_evictk(as); - } -} - -/* Coalesce BASE register for a root trace. */ -static void asm_head_root_base(ASMState *as) -{ - IRIns *ir; - asm_head_lreg(as); - ir = IR(REF_BASE); - if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) - ra_spill(as, ir); - ra_destreg(as, ir, RID_BASE); -} - -/* Coalesce BASE register for a side trace. */ -static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) -{ - IRIns *ir; - asm_head_lreg(as); - ir = IR(REF_BASE); - if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) - ra_spill(as, ir); - if (ra_hasspill(irp->s)) { - rset_clear(allow, ra_dest(as, ir, allow)); - } else { - Reg r = irp->r; - lua_assert(ra_hasreg(r)); - rset_clear(allow, r); - if (r != ir->r && !rset_test(as->freeset, r)) - ra_restore(as, regcost_ref(as->cost[r])); - ra_destreg(as, ir, r); - } - return allow; -} - -/* -- Tail of trace ------------------------------------------------------- */ - -/* Fixup the tail code. */ -static void asm_tail_fixup(ASMState *as, TraceNo lnk) -{ - MCode *p = as->mctop; - MCode *target; - int32_t spadj = as->T->spadjust; - if (spadj == 0) { - as->mctop = --p; - } else { - /* Patch stack adjustment. */ - uint32_t k = emit_isk12(ARMI_ADD, spadj); - lua_assert(k); - p[-2] = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP); - } - /* Patch exit branch. */ - target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; - p[-1] = ARMI_B|(((target-p)-1)&0x00ffffffu); -} - -/* Prepare tail of code. */ -static void asm_tail_prep(ASMState *as) -{ - MCode *p = as->mctop - 1; /* Leave room for exit branch. */ - if (as->loopref) { - as->invmcp = as->mcp = p; - } else { - as->mcp = p-1; /* Leave room for stack pointer adjustment. */ - as->invmcp = NULL; - } - *p = 0; /* Prevent load/store merging. */ -} - -/* -- Trace setup --------------------------------------------------------- */ - -/* Ensure there are enough stack slots for call arguments. */ -static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) -{ - IRRef args[CCI_NARGS_MAX*2]; - uint32_t i, nargs = CCI_XNARGS(ci); - int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0; - asm_collectargs(as, ir, ci, args); - for (i = 0; i < nargs; i++) { - if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) { - if (!LJ_ABI_SOFTFP && !(ci->flags & CCI_VARARG)) { - if (irt_isnum(IR(args[i])->t)) { - if (nfpr > 0) nfpr--; - else fprodd = 0, nslots = (nslots + 3) & ~1; - } else { - if (fprodd) fprodd--; - else if (nfpr > 0) fprodd = 1, nfpr--; - else nslots++; - } - } else if (irt_isnum(IR(args[i])->t)) { - ngpr &= ~1; - if (ngpr > 0) ngpr -= 2; else nslots += 2; - } else { - if (ngpr > 0) ngpr--; else nslots++; - } - } else { - if (ngpr > 0) ngpr--; else nslots++; - } - } - if (nslots > as->evenspill) /* Leave room for args in stack slots. */ - as->evenspill = nslots; - return REGSP_HINT(RID_RET); -} - -static void asm_setup_target(ASMState *as) -{ - /* May need extra exit for asm_stack_check on side traces. */ - asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0)); -} - -/* -- Trace patching ------------------------------------------------------ */ - -/* Patch exit jumps of existing machine code to a new target. */ -void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) -{ - MCode *p = T->mcode; - MCode *pe = (MCode *)((char *)p + T->szmcode); - MCode *cstart = NULL, *cend = p; - MCode *mcarea = lj_mcode_patch(J, p, 0); - MCode *px = exitstub_addr(J, exitno) - 2; - for (; p < pe; p++) { - /* Look for bl_cc exitstub, replace with b_cc target. */ - uint32_t ins = *p; - if ((ins & 0x0f000000u) == 0x0b000000u && ins < 0xf0000000u && - ((ins ^ (px-p)) & 0x00ffffffu) == 0) { - *p = (ins & 0xfe000000u) | (((target-p)-2) & 0x00ffffffu); - cend = p+1; - if (!cstart) cstart = p; - } - } - lua_assert(cstart != NULL); - lj_mcode_sync(cstart, cend); - lj_mcode_patch(J, mcarea, 1); -} - diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h deleted file mode 100644 index 328e4d7740..0000000000 --- a/src/lj_asm_arm64.h +++ /dev/null @@ -1,2008 +0,0 @@ -/* -** ARM64 IR assembler (SSA IR -> machine code). -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -** -** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. -** Sponsored by Cisco Systems, Inc. -*/ - -/* -- Register allocator extensions --------------------------------------- */ - -/* Allocate a register with a hint. */ -static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) -{ - Reg r = IR(ref)->r; - if (ra_noreg(r)) { - if (!ra_hashint(r) && !iscrossref(as, ref)) - ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ - r = ra_allocref(as, ref, allow); - } - ra_noweak(as, r); - return r; -} - -/* Allocate two source registers for three-operand instructions. */ -static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) -{ - IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); - Reg left = irl->r, right = irr->r; - if (ra_hasreg(left)) { - ra_noweak(as, left); - if (ra_noreg(right)) - right = ra_allocref(as, ir->op2, rset_exclude(allow, left)); - else - ra_noweak(as, right); - } else if (ra_hasreg(right)) { - ra_noweak(as, right); - left = ra_allocref(as, ir->op1, rset_exclude(allow, right)); - } else if (ra_hashint(right)) { - right = ra_allocref(as, ir->op2, allow); - left = ra_alloc1(as, ir->op1, rset_exclude(allow, right)); - } else { - left = ra_allocref(as, ir->op1, allow); - right = ra_alloc1(as, ir->op2, rset_exclude(allow, left)); - } - return left | (right << 8); -} - -/* -- Guard handling ------------------------------------------------------ */ - -/* Setup all needed exit stubs. */ -static void asm_exitstub_setup(ASMState *as, ExitNo nexits) -{ - ExitNo i; - MCode *mxp = as->mctop; - if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim) - asm_mclimit(as); - /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */ - for (i = nexits-1; (int32_t)i >= 0; i--) - *--mxp = A64I_BL|((-3-i)&0x03ffffffu); - *--mxp = A64I_MOVZw|A64F_U16(as->T->traceno); - mxp--; - *mxp = A64I_BL|(((MCode *)(void *)lj_vm_exit_handler-mxp)&0x03ffffffu); - *--mxp = A64I_STRx|A64F_D(RID_LR)|A64F_N(RID_SP); - as->mctop = mxp; -} - -static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno) -{ - /* Keep this in-sync with exitstub_trace_addr(). */ - return as->mctop + exitno + 3; -} - -/* Emit conditional branch to exit for guard. */ -static void asm_guardcc(ASMState *as, A64CC cc) -{ - MCode *target = asm_exitstub_addr(as, as->snapno); - MCode *p = as->mcp; - if (LJ_UNLIKELY(p == as->invmcp)) { - as->loopinv = 1; - *p = A64I_B | ((target-p) & 0x03ffffffu); - emit_cond_branch(as, cc^1, p-1); - return; - } - emit_cond_branch(as, cc, target); -} - -/* Emit test and branch instruction to exit for guard. */ -static void asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit) -{ - MCode *target = asm_exitstub_addr(as, as->snapno); - MCode *p = as->mcp; - if (LJ_UNLIKELY(p == as->invmcp)) { - as->loopinv = 1; - *p = A64I_B | ((target-p) & 0x03ffffffu); - emit_tnb(as, ai^0x01000000u, r, bit, p-1); - return; - } - emit_tnb(as, ai, r, bit, target); -} - -/* Emit compare and branch instruction to exit for guard. */ -static void asm_guardcnb(ASMState *as, A64Ins ai, Reg r) -{ - MCode *target = asm_exitstub_addr(as, as->snapno); - MCode *p = as->mcp; - if (LJ_UNLIKELY(p == as->invmcp)) { - as->loopinv = 1; - *p = A64I_B | ((target-p) & 0x03ffffffu); - emit_cnb(as, ai^0x01000000u, r, p-1); - return; - } - emit_cnb(as, ai, r, target); -} - -/* -- Operand fusion ------------------------------------------------------ */ - -/* Limit linear search to this distance. Avoids O(n^2) behavior. */ -#define CONFLICT_SEARCH_LIM 31 - -static int asm_isk32(ASMState *as, IRRef ref, int32_t *k) -{ - if (irref_isk(ref)) { - IRIns *ir = IR(ref); - if (ir->o == IR_KNULL || !irt_is64(ir->t)) { - *k = ir->i; - return 1; - } else if (checki32((int64_t)ir_k64(ir)->u64)) { - *k = (int32_t)ir_k64(ir)->u64; - return 1; - } - } - return 0; -} - -/* Check if there's no conflicting instruction between curins and ref. */ -static int noconflict(ASMState *as, IRRef ref, IROp conflict) -{ - IRIns *ir = as->ir; - IRRef i = as->curins; - if (i > ref + CONFLICT_SEARCH_LIM) - return 0; /* Give up, ref is too far away. */ - while (--i > ref) - if (ir[i].o == conflict) - return 0; /* Conflict found. */ - return 1; /* Ok, no conflict. */ -} - -/* Fuse the array base of colocated arrays. */ -static int32_t asm_fuseabase(ASMState *as, IRRef ref) -{ - IRIns *ir = IR(ref); - if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE && - !neverfuse(as) && noconflict(as, ref, IR_NEWREF)) - return (int32_t)sizeof(GCtab); - return 0; -} - -#define FUSE_REG 0x40000000 - -/* Fuse array/hash/upvalue reference into register+offset operand. */ -static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow, - A64Ins ins) -{ - IRIns *ir = IR(ref); - if (ra_noreg(ir->r)) { - if (ir->o == IR_AREF) { - if (mayfuse(as, ref)) { - if (irref_isk(ir->op2)) { - IRRef tab = IR(ir->op1)->op1; - int32_t ofs = asm_fuseabase(as, tab); - IRRef refa = ofs ? tab : ir->op1; - ofs += 8*IR(ir->op2)->i; - if (emit_checkofs(ins, ofs)) { - *ofsp = ofs; - return ra_alloc1(as, refa, allow); - } - } else { - Reg base = ra_alloc1(as, ir->op1, allow); - *ofsp = FUSE_REG|ra_alloc1(as, ir->op2, rset_exclude(allow, base)); - return base; - } - } - } else if (ir->o == IR_HREFK) { - if (mayfuse(as, ref)) { - int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); - if (emit_checkofs(ins, ofs)) { - *ofsp = ofs; - return ra_alloc1(as, ir->op1, allow); - } - } - } else if (ir->o == IR_UREFC) { - if (irref_isk(ir->op1)) { - GCfunc *fn = ir_kfunc(IR(ir->op1)); - GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv; - int64_t ofs = glofs(as, &uv->tv); - if (emit_checkofs(ins, ofs)) { - *ofsp = (int32_t)ofs; - return RID_GL; - } - } - } - } - *ofsp = 0; - return ra_alloc1(as, ref, allow); -} - -/* Fuse m operand into arithmetic/logic instructions. */ -static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow) -{ - IRIns *ir = IR(ref); - if (ra_hasreg(ir->r)) { - ra_noweak(as, ir->r); - return A64F_M(ir->r); - } else if (irref_isk(ref)) { - uint32_t m; - int64_t k = get_k64val(ir); - if ((ai & 0x1f000000) == 0x0a000000) - m = emit_isk13(k, irt_is64(ir->t)); - else - m = emit_isk12(k); - if (m) - return m; - } else if (mayfuse(as, ref)) { - if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR && irref_isk(ir->op2)) || - (ir->o == IR_ADD && ir->op1 == ir->op2)) { - A64Shift sh = ir->o == IR_BSHR ? A64SH_LSR : - ir->o == IR_BSAR ? A64SH_ASR : A64SH_LSL; - int shift = ir->o == IR_ADD ? 1 : - (IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31)); - IRIns *irl = IR(ir->op1); - if (sh == A64SH_LSL && - irl->o == IR_CONV && - irl->op2 == ((IRT_I64<op1, allow); - return A64F_M(m) | A64F_EXSH(A64EX_SXTW, shift); - } else { - Reg m = ra_alloc1(as, ir->op1, allow); - return A64F_M(m) | A64F_SH(sh, shift); - } - } else if (ir->o == IR_CONV && - ir->op2 == ((IRT_I64<op1, allow); - return A64F_M(m) | A64F_EX(A64EX_SXTW); - } - } - return A64F_M(ra_allocref(as, ref, allow)); -} - -/* Fuse XLOAD/XSTORE reference into load/store operand. */ -static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref, - RegSet allow) -{ - IRIns *ir = IR(ref); - Reg base; - int32_t ofs = 0; - if (ra_noreg(ir->r) && canfuse(as, ir)) { - if (ir->o == IR_ADD) { - if (asm_isk32(as, ir->op2, &ofs) && emit_checkofs(ai, ofs)) { - ref = ir->op1; - } else { - Reg rn, rm; - IRRef lref = ir->op1, rref = ir->op2; - IRIns *irl = IR(lref); - if (mayfuse(as, irl->op1)) { - unsigned int shift = 4; - if (irl->o == IR_BSHL && irref_isk(irl->op2)) { - shift = (IR(irl->op2)->i & 63); - } else if (irl->o == IR_ADD && irl->op1 == irl->op2) { - shift = 1; - } - if ((ai >> 30) == shift) { - lref = irl->op1; - irl = IR(lref); - ai |= A64I_LS_SH; - } - } - if (irl->o == IR_CONV && - irl->op2 == ((IRT_I64<op1; - ai |= A64I_LS_SXTWx; - } else { - ai |= A64I_LS_LSLx; - } - rm = ra_alloc1(as, lref, allow); - rn = ra_alloc1(as, rref, rset_exclude(allow, rm)); - emit_dnm(as, (ai^A64I_LS_R), rd, rn, rm); - return; - } - } else if (ir->o == IR_STRREF) { - if (asm_isk32(as, ir->op2, &ofs)) { - ref = ir->op1; - } else if (asm_isk32(as, ir->op1, &ofs)) { - ref = ir->op2; - } else { - Reg rn = ra_alloc1(as, ir->op1, allow); - IRIns *irr = IR(ir->op2); - uint32_t m; - if (irr+1 == ir && !ra_used(irr) && - irr->o == IR_ADD && irref_isk(irr->op2)) { - ofs = sizeof(GCstr) + IR(irr->op2)->i; - if (emit_checkofs(ai, ofs)) { - Reg rm = ra_alloc1(as, irr->op1, rset_exclude(allow, rn)); - m = A64F_M(rm) | A64F_EX(A64EX_SXTW); - goto skipopm; - } - } - m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn)); - ofs = sizeof(GCstr); - skipopm: - emit_lso(as, ai, rd, rd, ofs); - emit_dn(as, A64I_ADDx^m, rd, rn); - return; - } - ofs += sizeof(GCstr); - if (!emit_checkofs(ai, ofs)) { - Reg rn = ra_alloc1(as, ref, allow); - Reg rm = ra_allock(as, ofs, rset_exclude(allow, rn)); - emit_dnm(as, (ai^A64I_LS_R)|A64I_LS_UXTWx, rd, rn, rm); - return; - } - } - } - base = ra_alloc1(as, ref, allow); - emit_lso(as, ai, (rd & 31), base, ofs); -} - -/* Fuse FP multiply-add/sub. */ -static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air) -{ - IRRef lref = ir->op1, rref = ir->op2; - IRIns *irm; - if (lref != rref && - ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && - ra_noreg(irm->r)) || - (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && - (rref = lref, ai = air, ra_noreg(irm->r))))) { - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg add = ra_hintalloc(as, rref, dest, RSET_FPR); - Reg left = ra_alloc2(as, irm, - rset_exclude(rset_exclude(RSET_FPR, dest), add)); - Reg right = (left >> 8); left &= 255; - emit_dnma(as, ai, (dest & 31), (left & 31), (right & 31), (add & 31)); - return 1; - } - return 0; -} - -/* Fuse BAND + BSHL/BSHR into UBFM. */ -static int asm_fuseandshift(ASMState *as, IRIns *ir) -{ - IRIns *irl = IR(ir->op1); - lua_assert(ir->o == IR_BAND); - if (canfuse(as, irl) && irref_isk(ir->op2)) { - uint64_t mask = get_k64val(IR(ir->op2)); - if (irref_isk(irl->op2) && (irl->o == IR_BSHR || irl->o == IR_BSHL)) { - int32_t shmask = irt_is64(irl->t) ? 63 : 31; - int32_t shift = (IR(irl->op2)->i & shmask); - int32_t imms = shift; - if (irl->o == IR_BSHL) { - mask >>= shift; - shift = (shmask-shift+1) & shmask; - imms = 0; - } - if (mask && !((mask+1) & mask)) { /* Contiguous 1-bits at the bottom. */ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, irl->op1, RSET_GPR); - A64Ins ai = shmask == 63 ? A64I_UBFMx : A64I_UBFMw; - imms += 63 - emit_clz64(mask); - if (imms > shmask) imms = shmask; - emit_dn(as, ai | A64F_IMMS(imms) | A64F_IMMR(shift), dest, left); - return 1; - } - } - } - return 0; -} - -/* Fuse BOR(BSHL, BSHR) into EXTR/ROR. */ -static int asm_fuseorshift(ASMState *as, IRIns *ir) -{ - IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); - lua_assert(ir->o == IR_BOR); - if (canfuse(as, irl) && canfuse(as, irr) && - ((irl->o == IR_BSHR && irr->o == IR_BSHL) || - (irl->o == IR_BSHL && irr->o == IR_BSHR))) { - if (irref_isk(irl->op2) && irref_isk(irr->op2)) { - IRRef lref = irl->op1, rref = irr->op1; - uint32_t lshift = IR(irl->op2)->i, rshift = IR(irr->op2)->i; - if (irl->o == IR_BSHR) { /* BSHR needs to be the right operand. */ - uint32_t tmp2; - IRRef tmp1 = lref; lref = rref; rref = tmp1; - tmp2 = lshift; lshift = rshift; rshift = tmp2; - } - if (rshift + lshift == (irt_is64(ir->t) ? 64 : 32)) { - A64Ins ai = irt_is64(ir->t) ? A64I_EXTRx : A64I_EXTRw; - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, lref, RSET_GPR); - Reg right = ra_alloc1(as, rref, rset_exclude(RSET_GPR, left)); - emit_dnm(as, ai | A64F_IMMS(rshift), dest, left, right); - return 1; - } - } - } - return 0; -} - -/* -- Calls --------------------------------------------------------------- */ - -/* Generate a call to a C function. */ -static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) -{ - uint32_t n, nargs = CCI_XNARGS(ci); - int32_t ofs = 0; - Reg gpr, fpr = REGARG_FIRSTFPR; - if ((void *)ci->func) - emit_call(as, (void *)ci->func); - for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) - as->cost[gpr] = REGCOST(~0u, ASMREF_L); - gpr = REGARG_FIRSTGPR; - for (n = 0; n < nargs; n++) { /* Setup args. */ - IRRef ref = args[n]; - IRIns *ir = IR(ref); - if (ref) { - if (irt_isfp(ir->t)) { - if (fpr <= REGARG_LASTFPR) { - lua_assert(rset_test(as->freeset, fpr)); /* Must have been evicted. */ - ra_leftov(as, fpr, ref); - fpr++; - } else { - Reg r = ra_alloc1(as, ref, RSET_FPR); - emit_spstore(as, ir, r, ofs); - ofs += 8; - } - } else { - if (gpr <= REGARG_LASTGPR) { - lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ - ra_leftov(as, gpr, ref); - gpr++; - } else { - Reg r = ra_alloc1(as, ref, RSET_GPR); - emit_spstore(as, ir, r, ofs); - ofs += 8; - } - } - } - } -} - -/* Setup result reg/sp for call. Evict scratch regs. */ -static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) -{ - RegSet drop = RSET_SCRATCH; - if (ra_hasreg(ir->r)) - rset_clear(drop, ir->r); /* Dest reg handled below. */ - ra_evictset(as, drop); /* Evictions must be performed first. */ - if (ra_used(ir)) { - lua_assert(!irt_ispri(ir->t)); - if (irt_isfp(ir->t)) { - if (ci->flags & CCI_CASTU64) { - Reg dest = ra_dest(as, ir, RSET_FPR) & 31; - emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D_R : A64I_FMOV_S_R, - dest, RID_RET); - } else { - ra_destreg(as, ir, RID_FPRET); - } - } else { - ra_destreg(as, ir, RID_RET); - } - } - UNUSED(ci); -} - -static void asm_callx(ASMState *as, IRIns *ir) -{ - IRRef args[CCI_NARGS_MAX*2]; - CCallInfo ci; - IRRef func; - IRIns *irf; - ci.flags = asm_callx_flags(as, ir); - asm_collectargs(as, ir, &ci, args); - asm_setupresult(as, ir, &ci); - func = ir->op2; irf = IR(func); - if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } - if (irref_isk(func)) { /* Call to constant address. */ - ci.func = (ASMFunction)(ir_k64(irf)->u64); - } else { /* Need a non-argument register for indirect calls. */ - Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED); - emit_n(as, A64I_BLR, freg); - ci.func = (ASMFunction)(void *)0; - } - asm_gencall(as, &ci, args); -} - -/* -- Returns ------------------------------------------------------------- */ - -/* Return to lower frame. Guard that it goes to the right spot. */ -static void asm_retf(ASMState *as, IRIns *ir) -{ - Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); - void *pc = ir_kptr(IR(ir->op2)); - int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); - as->topslot -= (BCReg)delta; - if ((int32_t)as->topslot < 0) as->topslot = 0; - irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ - /* Need to force a spill on REF_BASE now to update the stack slot. */ - emit_lso(as, A64I_STRx, base, RID_SP, ra_spill(as, IR(REF_BASE))); - emit_setgl(as, base, jit_base); - emit_addptr(as, base, -8*delta); - asm_guardcc(as, CC_NE); - emit_nm(as, A64I_CMPx, RID_TMP, - ra_allock(as, i64ptr(pc), rset_exclude(RSET_GPR, base))); - emit_lso(as, A64I_LDRx, RID_TMP, base, -8); -} - -/* -- Type conversions ---------------------------------------------------- */ - -static void asm_tointg(ASMState *as, IRIns *ir, Reg left) -{ - Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); - Reg dest = ra_dest(as, ir, RSET_GPR); - asm_guardcc(as, CC_NE); - emit_nm(as, A64I_FCMPd, (tmp & 31), (left & 31)); - emit_dn(as, A64I_FCVT_F64_S32, (tmp & 31), dest); - emit_dn(as, A64I_FCVT_S32_F64, dest, (left & 31)); -} - -static void asm_tobit(ASMState *as, IRIns *ir) -{ - RegSet allow = RSET_FPR; - Reg left = ra_alloc1(as, ir->op1, allow); - Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); - Reg tmp = ra_scratch(as, rset_clear(allow, right)); - Reg dest = ra_dest(as, ir, RSET_GPR); - emit_dn(as, A64I_FMOV_R_S, dest, (tmp & 31)); - emit_dnm(as, A64I_FADDd, (tmp & 31), (left & 31), (right & 31)); -} - -static void asm_conv(ASMState *as, IRIns *ir) -{ - IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); - int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64); - int stfp = (st == IRT_NUM || st == IRT_FLOAT); - IRRef lref = ir->op1; - lua_assert(irt_type(ir->t) != st); - if (irt_isfp(ir->t)) { - Reg dest = ra_dest(as, ir, RSET_FPR); - if (stfp) { /* FP to FP conversion. */ - emit_dn(as, st == IRT_NUM ? A64I_FCVT_F32_F64 : A64I_FCVT_F64_F32, - (dest & 31), (ra_alloc1(as, lref, RSET_FPR) & 31)); - } else { /* Integer to FP conversion. */ - Reg left = ra_alloc1(as, lref, RSET_GPR); - A64Ins ai = irt_isfloat(ir->t) ? - (((IRT_IS64 >> st) & 1) ? - (st == IRT_I64 ? A64I_FCVT_F32_S64 : A64I_FCVT_F32_U64) : - (st == IRT_INT ? A64I_FCVT_F32_S32 : A64I_FCVT_F32_U32)) : - (((IRT_IS64 >> st) & 1) ? - (st == IRT_I64 ? A64I_FCVT_F64_S64 : A64I_FCVT_F64_U64) : - (st == IRT_INT ? A64I_FCVT_F64_S32 : A64I_FCVT_F64_U32)); - emit_dn(as, ai, (dest & 31), left); - } - } else if (stfp) { /* FP to integer conversion. */ - if (irt_isguard(ir->t)) { - /* Checked conversions are only supported from number to int. */ - lua_assert(irt_isint(ir->t) && st == IRT_NUM); - asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); - } else { - Reg left = ra_alloc1(as, lref, RSET_FPR); - Reg dest = ra_dest(as, ir, RSET_GPR); - A64Ins ai = irt_is64(ir->t) ? - (st == IRT_NUM ? - (irt_isi64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_U64_F64) : - (irt_isi64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_U64_F32)) : - (st == IRT_NUM ? - (irt_isint(ir->t) ? A64I_FCVT_S32_F64 : A64I_FCVT_U32_F64) : - (irt_isint(ir->t) ? A64I_FCVT_S32_F32 : A64I_FCVT_U32_F32)); - emit_dn(as, ai, dest, (left & 31)); - } - } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, lref, RSET_GPR); - A64Ins ai = st == IRT_I8 ? A64I_SXTBw : - st == IRT_U8 ? A64I_UXTBw : - st == IRT_I16 ? A64I_SXTHw : A64I_UXTHw; - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); - emit_dn(as, ai, dest, left); - } else { - Reg dest = ra_dest(as, ir, RSET_GPR); - if (irt_is64(ir->t)) { - if (st64 || !(ir->op2 & IRCONV_SEXT)) { - /* 64/64 bit no-op (cast) or 32 to 64 bit zero extension. */ - ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ - } else { /* 32 to 64 bit sign extension. */ - Reg left = ra_alloc1(as, lref, RSET_GPR); - emit_dn(as, A64I_SXTW, dest, left); - } - } else { - if (st64) { - /* This is either a 32 bit reg/reg mov which zeroes the hiword - ** or a load of the loword from a 64 bit address. - */ - Reg left = ra_alloc1(as, lref, RSET_GPR); - emit_dm(as, A64I_MOVw, dest, left); - } else { /* 32/32 bit no-op (cast). */ - ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ - } - } - } -} - -static void asm_strto(ASMState *as, IRIns *ir) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; - IRRef args[2]; - Reg dest = 0, tmp; - int destused = ra_used(ir); - int32_t ofs = 0; - ra_evictset(as, RSET_SCRATCH); - if (destused) { - if (ra_hasspill(ir->s)) { - ofs = sps_scale(ir->s); - destused = 0; - if (ra_hasreg(ir->r)) { - ra_free(as, ir->r); - ra_modified(as, ir->r); - emit_spload(as, ir, ir->r, ofs); - } - } else { - dest = ra_dest(as, ir, RSET_FPR); - } - } - if (destused) - emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0); - asm_guardcnb(as, A64I_CBZ, RID_RET); - args[0] = ir->op1; /* GCstr *str */ - args[1] = ASMREF_TMP1; /* TValue *n */ - asm_gencall(as, ci, args); - tmp = ra_releasetmp(as, ASMREF_TMP1); - emit_opk(as, A64I_ADDx, tmp, RID_SP, ofs, RSET_GPR); -} - -/* -- Memory references --------------------------------------------------- */ - -/* Store tagged value for ref at base+ofs. */ -static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) -{ - RegSet allow = rset_exclude(RSET_GPR, base); - IRIns *ir = IR(ref); - lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); - if (irref_isk(ref)) { - TValue k; - lj_ir_kvalue(as->J->L, &k, ir); - emit_lso(as, A64I_STRx, ra_allock(as, k.u64, allow), base, ofs); - } else { - Reg src = ra_alloc1(as, ref, allow); - rset_clear(allow, src); - if (irt_isinteger(ir->t)) { - Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow); - emit_lso(as, A64I_STRx, RID_TMP, base, ofs); - emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), RID_TMP, type, src); - } else { - Reg type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); - emit_lso(as, A64I_STRx, RID_TMP, base, ofs); - emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), RID_TMP, src, type); - } - } -} - -/* Get pointer to TValue. */ -static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) -{ - IRIns *ir = IR(ref); - if (irt_isnum(ir->t)) { - if (irref_isk(ref)) { - /* Use the number constant itself as a TValue. */ - ra_allockreg(as, i64ptr(ir_knum(ir)), dest); - } else { - /* Otherwise force a spill and use the spill slot. */ - emit_opk(as, A64I_ADDx, dest, RID_SP, ra_spill(as, ir), RSET_GPR); - } - } else { - /* Otherwise use g->tmptv to hold the TValue. */ - asm_tvstore64(as, dest, 0, ref); - ra_allockreg(as, i64ptr(&J2G(as->J)->tmptv), dest); - } -} - -static void asm_aref(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg idx, base; - if (irref_isk(ir->op2)) { - IRRef tab = IR(ir->op1)->op1; - int32_t ofs = asm_fuseabase(as, tab); - IRRef refa = ofs ? tab : ir->op1; - uint32_t k = emit_isk12(ofs + 8*IR(ir->op2)->i); - if (k) { - base = ra_alloc1(as, refa, RSET_GPR); - emit_dn(as, A64I_ADDx^k, dest, base); - return; - } - } - base = ra_alloc1(as, ir->op1, RSET_GPR); - idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); - emit_dnm(as, A64I_ADDx | A64F_EXSH(A64EX_UXTW, 3), dest, base, idx); -} - -/* Inlined hash lookup. Specialized for key type and for const keys. -** The equivalent C code is: -** Node *n = hashkey(t, key); -** do { -** if (lj_obj_equal(&n->key, key)) return &n->val; -** } while ((n = nextnode(n))); -** return niltv(L); -*/ -static void asm_href(ASMState *as, IRIns *ir, IROp merge) -{ - RegSet allow = RSET_GPR; - int destused = ra_used(ir); - Reg dest = ra_dest(as, ir, allow); - Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); - Reg key = 0, tmp = RID_TMP; - IRRef refkey = ir->op2; - IRIns *irkey = IR(refkey); - int isk = irref_isk(ir->op2); - IRType1 kt = irkey->t; - uint32_t k = 0; - uint32_t khash; - MCLabel l_end, l_loop, l_next; - rset_clear(allow, tab); - - if (!isk) { - key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow); - rset_clear(allow, key); - if (!irt_isstr(kt)) { - tmp = ra_scratch(as, allow); - rset_clear(allow, tmp); - } - } else if (irt_isnum(kt)) { - int64_t val = (int64_t)ir_knum(irkey)->u64; - if (!(k = emit_isk12(val))) { - key = ra_allock(as, val, allow); - rset_clear(allow, key); - } - } else if (!irt_ispri(kt)) { - if (!(k = emit_isk12(irkey->i))) { - key = ra_alloc1(as, refkey, allow); - rset_clear(allow, key); - } - } - - /* Key not found in chain: jump to exit (if merged) or load niltv. */ - l_end = emit_label(as); - as->invmcp = NULL; - if (merge == IR_NE) - asm_guardcc(as, CC_AL); - else if (destused) - emit_loada(as, dest, niltvg(J2G(as->J))); - - /* Follow hash chain until the end. */ - l_loop = --as->mcp; - emit_n(as, A64I_CMPx^A64I_K12^0, dest); - emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next)); - l_next = emit_label(as); - - /* Type and value comparison. */ - if (merge == IR_EQ) - asm_guardcc(as, CC_EQ); - else - emit_cond_branch(as, CC_EQ, l_end); - - if (irt_isnum(kt)) { - if (isk) { - /* Assumes -0.0 is already canonicalized to +0.0. */ - if (k) - emit_n(as, A64I_CMPx^k, tmp); - else - emit_nm(as, A64I_CMPx, key, tmp); - emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64)); - } else { - Reg tisnum = ra_allock(as, LJ_TISNUM << 15, allow); - Reg ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key)); - rset_clear(allow, tisnum); - emit_nm(as, A64I_FCMPd, key, ftmp); - emit_dn(as, A64I_FMOV_D_R, (ftmp & 31), (tmp & 31)); - emit_cond_branch(as, CC_LO, l_next); - emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), tisnum, tmp); - emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.n)); - } - } else if (irt_isaddr(kt)) { - Reg scr; - if (isk) { - int64_t kk = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; - scr = ra_allock(as, kk, allow); - emit_nm(as, A64I_CMPx, scr, tmp); - emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64)); - } else { - scr = ra_scratch(as, allow); - emit_nm(as, A64I_CMPx, tmp, scr); - emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key.u64)); - } - rset_clear(allow, scr); - } else { - Reg type, scr; - lua_assert(irt_ispri(kt) && !irt_isnil(kt)); - type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); - scr = ra_scratch(as, rset_clear(allow, type)); - rset_clear(allow, scr); - emit_nm(as, A64I_CMPw, scr, type); - emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key)); - } - - *l_loop = A64I_BCC | A64F_S19(as->mcp - l_loop) | CC_NE; - if (!isk && irt_isaddr(kt)) { - Reg type = ra_allock(as, (int32_t)irt_toitype(kt), allow); - emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, key, type); - rset_clear(allow, type); - } - /* Load main position relative to tab->node into dest. */ - khash = isk ? ir_khash(irkey) : 1; - if (khash == 0) { - emit_lso(as, A64I_LDRx, dest, tab, offsetof(GCtab, node)); - } else { - emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 3), dest, tmp, dest); - emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 1), dest, dest, dest); - emit_lso(as, A64I_LDRx, tmp, tab, offsetof(GCtab, node)); - if (isk) { - Reg tmphash = ra_allock(as, khash, allow); - emit_dnm(as, A64I_ANDw, dest, dest, tmphash); - emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); - } else if (irt_isstr(kt)) { - /* Fetch of str->hash is cheaper than ra_allock. */ - emit_dnm(as, A64I_ANDw, dest, dest, tmp); - emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, hash)); - emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); - } else { /* Must match with hash*() in lj_tab.c. */ - emit_dnm(as, A64I_ANDw, dest, dest, tmp); - emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask)); - emit_dnm(as, A64I_SUBw, dest, dest, tmp); - emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp); - emit_dnm(as, A64I_EORw, dest, dest, tmp); - emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT2)), dest, dest, dest); - emit_dnm(as, A64I_SUBw, tmp, tmp, dest); - emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest); - emit_dnm(as, A64I_EORw, tmp, tmp, dest); - if (irt_isnum(kt)) { - emit_dnm(as, A64I_ADDw, dest, dest, dest); - emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest); - emit_dm(as, A64I_MOVw, tmp, dest); - emit_dn(as, A64I_FMOV_R_D, dest, (key & 31)); - } else { - checkmclim(as); - emit_dm(as, A64I_MOVw, tmp, key); - emit_dnm(as, A64I_EORw, dest, dest, - ra_allock(as, irt_toitype(kt) << 15, allow)); - emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest); - emit_dm(as, A64I_MOVx, dest, key); - } - } - } -} - -static void asm_hrefk(ASMState *as, IRIns *ir) -{ - IRIns *kslot = IR(ir->op2); - IRIns *irkey = IR(kslot->op1); - int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); - int32_t kofs = ofs + (int32_t)offsetof(Node, key); - int bigofs = !emit_checkofs(A64I_LDRx, ofs); - RegSet allow = RSET_GPR; - Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; - Reg node = ra_alloc1(as, ir->op1, allow); - Reg key = ra_scratch(as, rset_clear(allow, node)); - Reg idx = node; - uint64_t k; - lua_assert(ofs % sizeof(Node) == 0); - rset_clear(allow, key); - if (bigofs) { - idx = dest; - rset_clear(allow, dest); - kofs = (int32_t)offsetof(Node, key); - } else if (ra_hasreg(dest)) { - emit_opk(as, A64I_ADDx, dest, node, ofs, allow); - } - asm_guardcc(as, CC_NE); - if (irt_ispri(irkey->t)) { - k = ~((int64_t)~irt_toitype(irkey->t) << 47); - } else if (irt_isnum(irkey->t)) { - k = ir_knum(irkey)->u64; - } else { - k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey); - } - emit_nm(as, A64I_CMPx, key, ra_allock(as, k, allow)); - emit_lso(as, A64I_LDRx, key, idx, kofs); - if (bigofs) - emit_opk(as, A64I_ADDx, dest, node, ofs, RSET_GPR); -} - -static void asm_uref(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - if (irref_isk(ir->op1)) { - GCfunc *fn = ir_kfunc(IR(ir->op1)); - MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; - emit_lsptr(as, A64I_LDRx, dest, v); - } else { - Reg uv = ra_scratch(as, RSET_GPR); - Reg func = ra_alloc1(as, ir->op1, RSET_GPR); - if (ir->o == IR_UREFC) { - asm_guardcc(as, CC_NE); - emit_n(as, (A64I_CMPx^A64I_K12) | A64F_U12(1), RID_TMP); - emit_opk(as, A64I_ADDx, dest, uv, - (int32_t)offsetof(GCupval, tv), RSET_GPR); - emit_lso(as, A64I_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); - } else { - emit_lso(as, A64I_LDRx, dest, uv, (int32_t)offsetof(GCupval, v)); - } - emit_lso(as, A64I_LDRx, uv, func, - (int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8)); - } -} - -static void asm_fref(ASMState *as, IRIns *ir) -{ - UNUSED(as); UNUSED(ir); - lua_assert(!ra_used(ir)); -} - -static void asm_strref(ASMState *as, IRIns *ir) -{ - RegSet allow = RSET_GPR; - Reg dest = ra_dest(as, ir, allow); - Reg base = ra_alloc1(as, ir->op1, allow); - IRIns *irr = IR(ir->op2); - int32_t ofs = sizeof(GCstr); - uint32_t m; - rset_clear(allow, base); - if (irref_isk(ir->op2) && (m = emit_isk12(ofs + irr->i))) { - emit_dn(as, A64I_ADDx^m, dest, base); - } else { - emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, dest); - emit_dnm(as, A64I_ADDx, dest, base, ra_alloc1(as, ir->op2, allow)); - } -} - -/* -- Loads and stores ---------------------------------------------------- */ - -static A64Ins asm_fxloadins(IRIns *ir) -{ - switch (irt_type(ir->t)) { - case IRT_I8: return A64I_LDRB ^ A64I_LS_S; - case IRT_U8: return A64I_LDRB; - case IRT_I16: return A64I_LDRH ^ A64I_LS_S; - case IRT_U16: return A64I_LDRH; - case IRT_NUM: return A64I_LDRd; - case IRT_FLOAT: return A64I_LDRs; - default: return irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw; - } -} - -static A64Ins asm_fxstoreins(IRIns *ir) -{ - switch (irt_type(ir->t)) { - case IRT_I8: case IRT_U8: return A64I_STRB; - case IRT_I16: case IRT_U16: return A64I_STRH; - case IRT_NUM: return A64I_STRd; - case IRT_FLOAT: return A64I_STRs; - default: return irt_is64(ir->t) ? A64I_STRx : A64I_STRw; - } -} - -static void asm_fload(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg idx; - A64Ins ai = asm_fxloadins(ir); - int32_t ofs; - if (ir->op1 == REF_NIL) { - idx = RID_GL; - ofs = (ir->op2 << 2) - GG_OFS(g); - } else { - idx = ra_alloc1(as, ir->op1, RSET_GPR); - if (ir->op2 == IRFL_TAB_ARRAY) { - ofs = asm_fuseabase(as, ir->op1); - if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ - emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, idx); - return; - } - } - ofs = field_ofs[ir->op2]; - } - emit_lso(as, ai, (dest & 31), idx, ofs); -} - -static void asm_fstore(ASMState *as, IRIns *ir) -{ - if (ir->r != RID_SINK) { - Reg src = ra_alloc1(as, ir->op2, RSET_GPR); - IRIns *irf = IR(ir->op1); - Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); - int32_t ofs = field_ofs[irf->op2]; - emit_lso(as, asm_fxstoreins(ir), (src & 31), idx, ofs); - } -} - -static void asm_xload(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); - lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); - asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR); -} - -static void asm_xstore(ASMState *as, IRIns *ir) -{ - if (ir->r != RID_SINK) { - Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); - asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, - rset_exclude(RSET_GPR, src)); - } -} - -static void asm_ahuvload(ASMState *as, IRIns *ir) -{ - Reg idx, tmp, type; - int32_t ofs = 0; - RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; - lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || - irt_isint(ir->t)); - if (ra_used(ir)) { - Reg dest = ra_dest(as, ir, allow); - tmp = irt_isnum(ir->t) ? ra_scratch(as, rset_clear(gpr, dest)) : dest; - if (irt_isaddr(ir->t)) { - emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest); - } else if (irt_isnum(ir->t)) { - emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp); - } else if (irt_isint(ir->t)) { - emit_dm(as, A64I_MOVw, dest, dest); - } - } else { - tmp = ra_scratch(as, gpr); - } - type = ra_scratch(as, rset_clear(gpr, tmp)); - idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, type), A64I_LDRx); - /* Always do the type check, even if the load result is unused. */ - asm_guardcc(as, irt_isnum(ir->t) ? CC_LS : CC_NE); - if (irt_type(ir->t) >= IRT_NUM) { - lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t)); - emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), - ra_allock(as, LJ_TISNUM << 15, rset_exclude(gpr, idx)), tmp); - } else if (irt_isaddr(ir->t)) { - emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), type); - emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp); - } else if (irt_isnil(ir->t)) { - emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp); - } else { - emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), - ra_allock(as, (irt_toitype(ir->t) << 15) | 0x7fff, allow), tmp); - } - if (ofs & FUSE_REG) - emit_dnm(as, (A64I_LDRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31)); - else - emit_lso(as, A64I_LDRx, tmp, idx, ofs); -} - -static void asm_ahustore(ASMState *as, IRIns *ir) -{ - if (ir->r != RID_SINK) { - RegSet allow = RSET_GPR; - Reg idx, src = RID_NONE, tmp = RID_TMP, type = RID_NONE; - int32_t ofs = 0; - if (irt_isnum(ir->t)) { - src = ra_alloc1(as, ir->op2, RSET_FPR); - idx = asm_fuseahuref(as, ir->op1, &ofs, allow, A64I_STRd); - if (ofs & FUSE_REG) - emit_dnm(as, (A64I_STRd^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, (src & 31), idx, (ofs &31)); - else - emit_lso(as, A64I_STRd, (src & 31), idx, ofs); - } else { - if (!irt_ispri(ir->t)) { - src = ra_alloc1(as, ir->op2, allow); - rset_clear(allow, src); - if (irt_isinteger(ir->t)) - type = ra_allock(as, (int64_t)LJ_TISNUM << 47, allow); - else - type = ra_allock(as, irt_toitype(ir->t), allow); - } else { - tmp = type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t)<<47), allow); - } - idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type), - A64I_STRx); - if (ofs & FUSE_REG) - emit_dnm(as, (A64I_STRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31)); - else - emit_lso(as, A64I_STRx, tmp, idx, ofs); - if (ra_hasreg(src)) { - if (irt_isinteger(ir->t)) { - emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), tmp, type, src); - } else { - emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, src, type); - } - } - } - } -} - -static void asm_sload(ASMState *as, IRIns *ir) -{ - int32_t ofs = 8*((int32_t)ir->op1-2); - IRType1 t = ir->t; - Reg dest = RID_NONE, base; - RegSet allow = RSET_GPR; - lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ - lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); - if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { - dest = ra_scratch(as, RSET_FPR); - asm_tointg(as, ir, dest); - t.irt = IRT_NUM; /* Continue with a regular number type check. */ - } else if (ra_used(ir)) { - Reg tmp = RID_NONE; - if ((ir->op2 & IRSLOAD_CONVERT)) - tmp = ra_scratch(as, irt_isint(t) ? RSET_FPR : RSET_GPR); - lua_assert((irt_isnum(t)) || irt_isint(t) || irt_isaddr(t)); - dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow); - base = ra_alloc1(as, REF_BASE, rset_clear(allow, dest)); - if (irt_isaddr(t)) { - emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest); - } else if ((ir->op2 & IRSLOAD_CONVERT)) { - if (irt_isint(t)) { - emit_dn(as, A64I_FCVT_S32_F64, dest, (tmp & 31)); - /* If value is already loaded for type check, move it to FPR. */ - if ((ir->op2 & IRSLOAD_TYPECHECK)) - emit_dn(as, A64I_FMOV_D_R, (tmp & 31), dest); - else - dest = tmp; - t.irt = IRT_NUM; /* Check for original type. */ - } else { - emit_dn(as, A64I_FCVT_F64_S32, (dest & 31), tmp); - dest = tmp; - t.irt = IRT_INT; /* Check for original type. */ - } - } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) { - emit_dm(as, A64I_MOVw, dest, dest); - } - goto dotypecheck; - } - base = ra_alloc1(as, REF_BASE, allow); -dotypecheck: - rset_clear(allow, base); - if ((ir->op2 & IRSLOAD_TYPECHECK)) { - Reg tmp; - if (ra_hasreg(dest) && rset_test(RSET_GPR, dest)) { - tmp = dest; - } else { - tmp = ra_scratch(as, allow); - rset_clear(allow, tmp); - } - if (irt_isnum(t) && !(ir->op2 & IRSLOAD_CONVERT)) - emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp); - /* Need type check, even if the load result is unused. */ - asm_guardcc(as, irt_isnum(t) ? CC_LS : CC_NE); - if (irt_type(t) >= IRT_NUM) { - lua_assert(irt_isinteger(t) || irt_isnum(t)); - emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), - ra_allock(as, LJ_TISNUM << 15, allow), tmp); - } else if (irt_isnil(t)) { - emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp); - } else if (irt_ispri(t)) { - emit_nm(as, A64I_CMPx, - ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow), tmp); - } else { - Reg type = ra_scratch(as, allow); - emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), type); - emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp); - } - emit_lso(as, A64I_LDRx, tmp, base, ofs); - return; - } - if (ra_hasreg(dest)) { - emit_lso(as, irt_isnum(t) ? A64I_LDRd : - (irt_isint(t) ? A64I_LDRw : A64I_LDRx), (dest & 31), base, ofs); - } -} - -/* -- Allocations --------------------------------------------------------- */ - -#if LJ_HASFFI -static void asm_cnew(ASMState *as, IRIns *ir) -{ - CTState *cts = ctype_ctsG(J2G(as->J)); - CTypeID id = (CTypeID)IR(ir->op1)->i; - CTSize sz; - CTInfo info = lj_ctype_info(cts, id, &sz); - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; - IRRef args[4]; - RegSet allow = (RSET_GPR & ~RSET_SCRATCH); - lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); - - as->gcsteps++; - asm_setupresult(as, ir, ci); /* GCcdata * */ - /* Initialize immutable cdata object. */ - if (ir->o == IR_CNEWI) { - int32_t ofs = sizeof(GCcdata); - Reg r = ra_alloc1(as, ir->op2, allow); - lua_assert(sz == 4 || sz == 8); - emit_lso(as, sz == 8 ? A64I_STRx : A64I_STRw, r, RID_RET, ofs); - } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ - ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ir->op1; /* CTypeID id */ - args[2] = ir->op2; /* CTSize sz */ - args[3] = ASMREF_TMP1; /* CTSize align */ - asm_gencall(as, ci, args); - emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); - return; - } - - /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ - { - Reg r = (id < 65536) ? RID_X1 : ra_allock(as, id, allow); - emit_lso(as, A64I_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); - emit_lso(as, A64I_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); - emit_d(as, A64I_MOVZw | A64F_U16(~LJ_TCDATA), RID_TMP); - if (id < 65536) emit_d(as, A64I_MOVZw | A64F_U16(id), RID_X1); - } - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ASMREF_TMP1; /* MSize size */ - asm_gencall(as, ci, args); - ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), - ra_releasetmp(as, ASMREF_TMP1)); -} -#else -#define asm_cnew(as, ir) ((void)0) -#endif - -/* -- Write barriers ------------------------------------------------------ */ - -static void asm_tbar(ASMState *as, IRIns *ir) -{ - Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); - Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab)); - Reg gr = ra_allock(as, i64ptr(J2G(as->J)), - rset_exclude(rset_exclude(RSET_GPR, tab), link)); - Reg mark = RID_TMP; - MCLabel l_end = emit_label(as); - emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist)); - emit_lso(as, A64I_STRB, mark, tab, (int32_t)offsetof(GCtab, marked)); - emit_lso(as, A64I_STRx, tab, gr, - (int32_t)offsetof(global_State, gc.grayagain)); - emit_dn(as, A64I_ANDw^emit_isk13(~LJ_GC_BLACK, 0), mark, mark); - emit_lso(as, A64I_LDRx, link, gr, - (int32_t)offsetof(global_State, gc.grayagain)); - emit_cond_branch(as, CC_EQ, l_end); - emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), mark); - emit_lso(as, A64I_LDRB, mark, tab, (int32_t)offsetof(GCtab, marked)); -} - -static void asm_obar(ASMState *as, IRIns *ir) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; - IRRef args[2]; - MCLabel l_end; - RegSet allow = RSET_GPR; - Reg obj, val, tmp; - /* No need for other object barriers (yet). */ - lua_assert(IR(ir->op1)->o == IR_UREFC); - ra_evictset(as, RSET_SCRATCH); - l_end = emit_label(as); - args[0] = ASMREF_TMP1; /* global_State *g */ - args[1] = ir->op1; /* TValue *tv */ - asm_gencall(as, ci, args); - ra_allockreg(as, i64ptr(J2G(as->J)), ra_releasetmp(as, ASMREF_TMP1) ); - obj = IR(ir->op1)->r; - tmp = ra_scratch(as, rset_exclude(allow, obj)); - emit_cond_branch(as, CC_EQ, l_end); - emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), tmp); - emit_cond_branch(as, CC_EQ, l_end); - emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_WHITES, 0), RID_TMP); - val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); - emit_lso(as, A64I_LDRB, tmp, obj, - (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); - emit_lso(as, A64I_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked)); -} - -/* -- Arithmetic and logic operations ------------------------------------- */ - -static void asm_fparith(ASMState *as, IRIns *ir, A64Ins ai) -{ - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = (left >> 8); left &= 255; - emit_dnm(as, ai, (dest & 31), (left & 31), (right & 31)); -} - -static void asm_fpunary(ASMState *as, IRIns *ir, A64Ins ai) -{ - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); - emit_dn(as, ai, (dest & 31), (left & 31)); -} - -static void asm_fpmath(ASMState *as, IRIns *ir) -{ - IRFPMathOp fpm = (IRFPMathOp)ir->op2; - if (fpm == IRFPM_SQRT) { - asm_fpunary(as, ir, A64I_FSQRTd); - } else if (fpm <= IRFPM_TRUNC) { - asm_fpunary(as, ir, fpm == IRFPM_FLOOR ? A64I_FRINTMd : - fpm == IRFPM_CEIL ? A64I_FRINTPd : A64I_FRINTZd); - } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) { - return; - } else { - asm_callid(as, ir, IRCALL_lj_vm_floor + fpm); - } -} - -static int asm_swapops(ASMState *as, IRRef lref, IRRef rref) -{ - IRIns *ir; - if (irref_isk(rref)) - return 0; /* Don't swap constants to the left. */ - if (irref_isk(lref)) - return 1; /* But swap constants to the right. */ - ir = IR(rref); - if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) || - (ir->o == IR_ADD && ir->op1 == ir->op2) || - (ir->o == IR_CONV && ir->op2 == ((IRT_I64<o >= IR_BSHL && ir->o <= IR_BSAR) || - (ir->o == IR_ADD && ir->op1 == ir->op2) || - (ir->o == IR_CONV && ir->op2 == ((IRT_I64<op1, rref = ir->op2; - Reg left, dest = ra_dest(as, ir, RSET_GPR); - uint32_t m; - if ((ai & ~A64I_S) != A64I_SUBw && asm_swapops(as, lref, rref)) { - IRRef tmp = lref; lref = rref; rref = tmp; - } - left = ra_hintalloc(as, lref, dest, RSET_GPR); - if (irt_is64(ir->t)) ai |= A64I_X; - m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left)); - if (irt_isguard(ir->t)) { /* For IR_ADDOV etc. */ - asm_guardcc(as, CC_VS); - ai |= A64I_S; - } - emit_dn(as, ai^m, dest, left); -} - -static void asm_intop_s(ASMState *as, IRIns *ir, A64Ins ai) -{ - if (as->flagmcp == as->mcp) { /* Drop cmp r, #0. */ - as->flagmcp = NULL; - as->mcp++; - ai |= A64I_S; - } - asm_intop(as, ir, ai); -} - -static void asm_intneg(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - emit_dm(as, irt_is64(ir->t) ? A64I_NEGx : A64I_NEGw, dest, left); -} - -/* NYI: use add/shift for MUL(OV) with constants. FOLD only does 2^k. */ -static void asm_intmul(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest)); - Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - if (irt_isguard(ir->t)) { /* IR_MULOV */ - asm_guardcc(as, CC_NE); - emit_dm(as, A64I_MOVw, dest, dest); /* Zero-extend. */ - emit_nm(as, A64I_CMPw | A64F_SH(A64SH_ASR, 31), RID_TMP, dest); - emit_dn(as, A64I_ASRx | A64F_IMMR(32), RID_TMP, dest); - emit_dnm(as, A64I_SMULL, dest, right, left); - } else { - emit_dnm(as, irt_is64(ir->t) ? A64I_MULx : A64I_MULw, dest, left, right); - } -} - -static void asm_add(ASMState *as, IRIns *ir) -{ - if (irt_isnum(ir->t)) { - if (!asm_fusemadd(as, ir, A64I_FMADDd, A64I_FMADDd)) - asm_fparith(as, ir, A64I_FADDd); - return; - } - asm_intop_s(as, ir, A64I_ADDw); -} - -static void asm_sub(ASMState *as, IRIns *ir) -{ - if (irt_isnum(ir->t)) { - if (!asm_fusemadd(as, ir, A64I_FNMSUBd, A64I_FMSUBd)) - asm_fparith(as, ir, A64I_FSUBd); - return; - } - asm_intop_s(as, ir, A64I_SUBw); -} - -static void asm_mul(ASMState *as, IRIns *ir) -{ - if (irt_isnum(ir->t)) { - asm_fparith(as, ir, A64I_FMULd); - return; - } - asm_intmul(as, ir); -} - -static void asm_div(ASMState *as, IRIns *ir) -{ -#if LJ_HASFFI - if (!irt_isnum(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : - IRCALL_lj_carith_divu64); - else -#endif - asm_fparith(as, ir, A64I_FDIVd); -} - -static void asm_pow(ASMState *as, IRIns *ir) -{ -#if LJ_HASFFI - if (!irt_isnum(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : - IRCALL_lj_carith_powu64); - else -#endif - asm_callid(as, ir, IRCALL_lj_vm_powi); -} - -#define asm_addov(as, ir) asm_add(as, ir) -#define asm_subov(as, ir) asm_sub(as, ir) -#define asm_mulov(as, ir) asm_mul(as, ir) - -#define asm_abs(as, ir) asm_fpunary(as, ir, A64I_FABS) -#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) -#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) - -static void asm_mod(ASMState *as, IRIns *ir) -{ -#if LJ_HASFFI - if (!irt_isint(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : - IRCALL_lj_carith_modu64); - else -#endif - asm_callid(as, ir, IRCALL_lj_vm_modi); -} - -static void asm_neg(ASMState *as, IRIns *ir) -{ - if (irt_isnum(ir->t)) { - asm_fpunary(as, ir, A64I_FNEGd); - return; - } - asm_intneg(as, ir); -} - -static void asm_band(ASMState *as, IRIns *ir) -{ - A64Ins ai = A64I_ANDw; - if (asm_fuseandshift(as, ir)) - return; - if (as->flagmcp == as->mcp) { - /* Try to drop cmp r, #0. */ - as->flagmcp = NULL; - as->mcp++; - ai = A64I_ANDSw; - } - asm_intop(as, ir, ai); -} - -static void asm_borbxor(ASMState *as, IRIns *ir, A64Ins ai) -{ - IRRef lref = ir->op1, rref = ir->op2; - IRIns *irl = IR(lref), *irr = IR(rref); - if ((canfuse(as, irl) && irl->o == IR_BNOT && !irref_isk(rref)) || - (canfuse(as, irr) && irr->o == IR_BNOT && !irref_isk(lref))) { - Reg left, dest = ra_dest(as, ir, RSET_GPR); - uint32_t m; - if (irl->o == IR_BNOT) { - IRRef tmp = lref; lref = rref; rref = tmp; - } - left = ra_alloc1(as, lref, RSET_GPR); - ai |= A64I_ON; - if (irt_is64(ir->t)) ai |= A64I_X; - m = asm_fuseopm(as, ai, IR(rref)->op1, rset_exclude(RSET_GPR, left)); - emit_dn(as, ai^m, dest, left); - } else { - asm_intop(as, ir, ai); - } -} - -static void asm_bor(ASMState *as, IRIns *ir) -{ - if (asm_fuseorshift(as, ir)) - return; - asm_borbxor(as, ir, A64I_ORRw); -} - -#define asm_bxor(as, ir) asm_borbxor(as, ir, A64I_EORw) - -static void asm_bnot(ASMState *as, IRIns *ir) -{ - A64Ins ai = A64I_MVNw; - Reg dest = ra_dest(as, ir, RSET_GPR); - uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR); - if (irt_is64(ir->t)) ai |= A64I_X; - emit_d(as, ai^m, dest); -} - -static void asm_bswap(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - emit_dn(as, irt_is64(ir->t) ? A64I_REVx : A64I_REVw, dest, left); -} - -static void asm_bitshift(ASMState *as, IRIns *ir, A64Ins ai, A64Shift sh) -{ - int32_t shmask = irt_is64(ir->t) ? 63 : 31; - if (irref_isk(ir->op2)) { /* Constant shifts. */ - Reg left, dest = ra_dest(as, ir, RSET_GPR); - int32_t shift = (IR(ir->op2)->i & shmask); - IRIns *irl = IR(ir->op1); - if (shmask == 63) ai += A64I_UBFMx - A64I_UBFMw; - - /* Fuse BSHL + BSHR/BSAR into UBFM/SBFM aka UBFX/SBFX/UBFIZ/SBFIZ. */ - if ((sh == A64SH_LSR || sh == A64SH_ASR) && canfuse(as, irl)) { - if (irl->o == IR_BSHL && irref_isk(irl->op2)) { - int32_t shift2 = (IR(irl->op2)->i & shmask); - shift = ((shift - shift2) & shmask); - shmask -= shift2; - ir = irl; - } - } - - left = ra_alloc1(as, ir->op1, RSET_GPR); - switch (sh) { - case A64SH_LSL: - emit_dn(as, ai | A64F_IMMS(shmask-shift) | - A64F_IMMR((shmask-shift+1)&shmask), dest, left); - break; - case A64SH_LSR: case A64SH_ASR: - emit_dn(as, ai | A64F_IMMS(shmask) | A64F_IMMR(shift), dest, left); - break; - case A64SH_ROR: - emit_dnm(as, ai | A64F_IMMS(shift), dest, left, left); - break; - } - } else { /* Variable-length shifts. */ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_dnm(as, (shmask == 63 ? A64I_SHRx : A64I_SHRw) | A64F_BSH(sh), dest, left, right); - } -} - -#define asm_bshl(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSL) -#define asm_bshr(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSR) -#define asm_bsar(as, ir) asm_bitshift(as, ir, A64I_SBFMw, A64SH_ASR) -#define asm_bror(as, ir) asm_bitshift(as, ir, A64I_EXTRw, A64SH_ROR) -#define asm_brol(as, ir) lua_assert(0) - -static void asm_intmin_max(ASMState *as, IRIns *ir, A64CC cc) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_dnm(as, A64I_CSELw|A64F_CC(cc), dest, left, right); - emit_nm(as, A64I_CMPw, left, right); -} - -static void asm_fpmin_max(ASMState *as, IRIns *ir, A64CC fcc) -{ - Reg dest = (ra_dest(as, ir, RSET_FPR) & 31); - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = ((left >> 8) & 31); left &= 31; - emit_dnm(as, A64I_FCSELd | A64F_CC(fcc), dest, left, right); - emit_nm(as, A64I_FCMPd, left, right); -} - -static void asm_min_max(ASMState *as, IRIns *ir, A64CC cc, A64CC fcc) -{ - if (irt_isnum(ir->t)) - asm_fpmin_max(as, ir, fcc); - else - asm_intmin_max(as, ir, cc); -} - -#define asm_max(as, ir) asm_min_max(as, ir, CC_GT, CC_HI) -#define asm_min(as, ir) asm_min_max(as, ir, CC_LT, CC_LO) - -/* -- Comparisons --------------------------------------------------------- */ - -/* Map of comparisons to flags. ORDER IR. */ -static const uint8_t asm_compmap[IR_ABC+1] = { - /* op FP swp int cc FP cc */ - /* LT */ CC_GE + (CC_HS << 4), - /* GE x */ CC_LT + (CC_HI << 4), - /* LE */ CC_GT + (CC_HI << 4), - /* GT x */ CC_LE + (CC_HS << 4), - /* ULT x */ CC_HS + (CC_LS << 4), - /* UGE */ CC_LO + (CC_LO << 4), - /* ULE x */ CC_HI + (CC_LO << 4), - /* UGT */ CC_LS + (CC_LS << 4), - /* EQ */ CC_NE + (CC_NE << 4), - /* NE */ CC_EQ + (CC_EQ << 4), - /* ABC */ CC_LS + (CC_LS << 4) /* Same as UGT. */ -}; - -/* FP comparisons. */ -static void asm_fpcomp(ASMState *as, IRIns *ir) -{ - Reg left, right; - A64Ins ai; - int swp = ((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1); - if (!swp && irref_isk(ir->op2) && ir_knum(IR(ir->op2))->u64 == 0) { - left = (ra_alloc1(as, ir->op1, RSET_FPR) & 31); - right = 0; - ai = A64I_FCMPZd; - } else { - left = ra_alloc2(as, ir, RSET_FPR); - if (swp) { - right = (left & 31); left = ((left >> 8) & 31); - } else { - right = ((left >> 8) & 31); left &= 31; - } - ai = A64I_FCMPd; - } - asm_guardcc(as, (asm_compmap[ir->o] >> 4)); - emit_nm(as, ai, left, right); -} - -/* Integer comparisons. */ -static void asm_intcomp(ASMState *as, IRIns *ir) -{ - A64CC oldcc, cc = (asm_compmap[ir->o] & 15); - A64Ins ai = irt_is64(ir->t) ? A64I_CMPx : A64I_CMPw; - IRRef lref = ir->op1, rref = ir->op2; - Reg left; - uint32_t m; - int cmpprev0 = 0; - lua_assert(irt_is64(ir->t) || irt_isint(ir->t) || - irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t)); - if (asm_swapops(as, lref, rref)) { - IRRef tmp = lref; lref = rref; rref = tmp; - if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */ - else if (cc > CC_NE) cc ^= 11; /* LO <-> HI, LS <-> HS */ - } - oldcc = cc; - if (irref_isk(rref) && get_k64val(IR(rref)) == 0) { - IRIns *irl = IR(lref); - if (cc == CC_GE) cc = CC_PL; - else if (cc == CC_LT) cc = CC_MI; - else if (cc > CC_NE) goto nocombine; /* Other conds don't work with tst. */ - cmpprev0 = (irl+1 == ir); - /* Combine and-cmp-bcc into tbz/tbnz or and-cmp into tst. */ - if (cmpprev0 && irl->o == IR_BAND && !ra_used(irl)) { - IRRef blref = irl->op1, brref = irl->op2; - uint32_t m2 = 0; - Reg bleft; - if (asm_swapops(as, blref, brref)) { - Reg tmp = blref; blref = brref; brref = tmp; - } - if (irref_isk(brref)) { - uint64_t k = get_k64val(IR(brref)); - if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE)) { - asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ, - ra_alloc1(as, blref, RSET_GPR), emit_ctz64(k)); - return; - } - m2 = emit_isk13(k, irt_is64(irl->t)); - } - bleft = ra_alloc1(as, blref, RSET_GPR); - ai = (irt_is64(irl->t) ? A64I_TSTx : A64I_TSTw); - if (!m2) - m2 = asm_fuseopm(as, ai, brref, rset_exclude(RSET_GPR, bleft)); - asm_guardcc(as, cc); - emit_n(as, ai^m2, bleft); - return; - } - if (cc == CC_EQ || cc == CC_NE) { - /* Combine cmp-bcc into cbz/cbnz. */ - ai = cc == CC_EQ ? A64I_CBZ : A64I_CBNZ; - if (irt_is64(ir->t)) ai |= A64I_X; - asm_guardcnb(as, ai, ra_alloc1(as, lref, RSET_GPR)); - return; - } - } -nocombine: - left = ra_alloc1(as, lref, RSET_GPR); - m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left)); - asm_guardcc(as, cc); - emit_n(as, ai^m, left); - /* Signed comparison with zero and referencing previous ins? */ - if (cmpprev0 && (oldcc <= CC_NE || oldcc >= CC_GE)) - as->flagmcp = as->mcp; /* Allow elimination of the compare. */ -} - -static void asm_comp(ASMState *as, IRIns *ir) -{ - if (irt_isnum(ir->t)) - asm_fpcomp(as, ir); - else - asm_intcomp(as, ir); -} - -#define asm_equal(as, ir) asm_comp(as, ir) - -/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ - -/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ -static void asm_hiop(ASMState *as, IRIns *ir) -{ - UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused on 64 bit. */ -} - -/* -- Profiling ----------------------------------------------------------- */ - -static void asm_prof(ASMState *as, IRIns *ir) -{ - uint32_t k = emit_isk13(HOOK_PROFILE, 0); - lua_assert(k != 0); - UNUSED(ir); - asm_guardcc(as, CC_NE); - emit_n(as, A64I_TSTw^k, RID_TMP); - emit_lsptr(as, A64I_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask); -} - -/* -- Stack handling ------------------------------------------------------ */ - -/* Check Lua stack size for overflow. Use exit handler as fallback. */ -static void asm_stack_check(ASMState *as, BCReg topslot, - IRIns *irp, RegSet allow, ExitNo exitno) -{ - Reg pbase; - uint32_t k; - if (irp) { - if (!ra_hasspill(irp->s)) { - pbase = irp->r; - lua_assert(ra_hasreg(pbase)); - } else if (allow) { - pbase = rset_pickbot(allow); - } else { - pbase = RID_RET; - emit_lso(as, A64I_LDRx, RID_RET, RID_SP, 0); /* Restore temp register. */ - } - } else { - pbase = RID_BASE; - } - emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno)); - k = emit_isk12((8*topslot)); - lua_assert(k); - emit_n(as, A64I_CMPx^k, RID_TMP); - emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, pbase); - emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP, - (int32_t)offsetof(lua_State, maxstack)); - if (irp) { /* Must not spill arbitrary registers in head of side trace. */ - if (ra_hasspill(irp->s)) - emit_lso(as, A64I_LDRx, pbase, RID_SP, sps_scale(irp->s)); - emit_lso(as, A64I_LDRx, RID_TMP, RID_GL, glofs(as, &J2G(as->J)->cur_L)); - if (ra_hasspill(irp->s) && !allow) - emit_lso(as, A64I_STRx, RID_RET, RID_SP, 0); /* Save temp register. */ - } else { - emit_getgl(as, RID_TMP, cur_L); - } -} - -/* Restore Lua stack from on-trace state. */ -static void asm_stack_restore(ASMState *as, SnapShot *snap) -{ - SnapEntry *map = &as->T->snapmap[snap->mapofs]; -#ifdef LUA_USE_ASSERT - SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; -#endif - MSize n, nent = snap->nent; - /* Store the value of all modified slots to the Lua stack. */ - for (n = 0; n < nent; n++) { - SnapEntry sn = map[n]; - BCReg s = snap_slot(sn); - int32_t ofs = 8*((int32_t)s-1-LJ_FR2); - IRRef ref = snap_ref(sn); - IRIns *ir = IR(ref); - if ((sn & SNAP_NORESTORE)) - continue; - if (irt_isnum(ir->t)) { - Reg src = ra_alloc1(as, ref, RSET_FPR); - emit_lso(as, A64I_STRd, (src & 31), RID_BASE, ofs); - } else { - asm_tvstore64(as, RID_BASE, ofs, ref); - } - checkmclim(as); - } - lua_assert(map + nent == flinks); -} - -/* -- GC handling --------------------------------------------------------- */ - -/* Check GC threshold and do one or more GC steps. */ -static void asm_gc_check(ASMState *as) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; - IRRef args[2]; - MCLabel l_end; - Reg tmp1, tmp2; - ra_evictset(as, RSET_SCRATCH); - l_end = emit_label(as); - /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ - asm_guardcnb(as, A64I_CBNZ, RID_RET); /* Assumes asm_snap_prep() is done. */ - args[0] = ASMREF_TMP1; /* global_State *g */ - args[1] = ASMREF_TMP2; /* MSize steps */ - asm_gencall(as, ci, args); - tmp1 = ra_releasetmp(as, ASMREF_TMP1); - tmp2 = ra_releasetmp(as, ASMREF_TMP2); - emit_loadi(as, tmp2, as->gcsteps); - /* Jump around GC step if GC total < GC threshold. */ - emit_cond_branch(as, CC_LS, l_end); - emit_nm(as, A64I_CMPx, RID_TMP, tmp2); - emit_lso(as, A64I_LDRx, tmp2, tmp1, - (int32_t)offsetof(global_State, gc.threshold)); - emit_lso(as, A64I_LDRx, RID_TMP, tmp1, - (int32_t)offsetof(global_State, gc.total)); - ra_allockreg(as, i64ptr(J2G(as->J)), tmp1); - as->gcsteps = 0; - checkmclim(as); -} - -/* -- Loop handling ------------------------------------------------------- */ - -/* Fixup the loop branch. */ -static void asm_loop_fixup(ASMState *as) -{ - MCode *p = as->mctop; - MCode *target = as->mcp; - if (as->loopinv) { /* Inverted loop branch? */ - uint32_t mask = (p[-2] & 0x7e000000) == 0x36000000 ? 0x3fffu : 0x7ffffu; - ptrdiff_t delta = target - (p - 2); - /* asm_guard* already inverted the bcc/tnb/cnb and patched the final b. */ - p[-2] |= ((uint32_t)delta & mask) << 5; - } else { - ptrdiff_t delta = target - (p - 1); - p[-1] = A64I_B | ((uint32_t)(delta) & 0x03ffffffu); - } -} - -/* -- Head of trace ------------------------------------------------------- */ - -/* Reload L register from g->cur_L. */ -static void asm_head_lreg(ASMState *as) -{ - IRIns *ir = IR(ASMREF_L); - if (ra_used(ir)) { - Reg r = ra_dest(as, ir, RSET_GPR); - emit_getgl(as, r, cur_L); - ra_evictk(as); - } -} - -/* Coalesce BASE register for a root trace. */ -static void asm_head_root_base(ASMState *as) -{ - IRIns *ir; - asm_head_lreg(as); - ir = IR(REF_BASE); - if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) - ra_spill(as, ir); - ra_destreg(as, ir, RID_BASE); -} - -/* Coalesce BASE register for a side trace. */ -static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) -{ - IRIns *ir; - asm_head_lreg(as); - ir = IR(REF_BASE); - if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) - ra_spill(as, ir); - if (ra_hasspill(irp->s)) { - rset_clear(allow, ra_dest(as, ir, allow)); - } else { - Reg r = irp->r; - lua_assert(ra_hasreg(r)); - rset_clear(allow, r); - if (r != ir->r && !rset_test(as->freeset, r)) - ra_restore(as, regcost_ref(as->cost[r])); - ra_destreg(as, ir, r); - } - return allow; -} - -/* -- Tail of trace ------------------------------------------------------- */ - -/* Fixup the tail code. */ -static void asm_tail_fixup(ASMState *as, TraceNo lnk) -{ - MCode *p = as->mctop; - MCode *target; - /* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */ - int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED)); - if (spadj == 0) { - *--p = A64I_NOP; - as->mctop = p; - } else { - /* Patch stack adjustment. */ - uint32_t k = emit_isk12(spadj); - lua_assert(k); - p[-2] = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP); - } - /* Patch exit branch. */ - target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; - p[-1] = A64I_B | (((target-p)+1)&0x03ffffffu); -} - -/* Prepare tail of code. */ -static void asm_tail_prep(ASMState *as) -{ - MCode *p = as->mctop - 1; /* Leave room for exit branch. */ - if (as->loopref) { - as->invmcp = as->mcp = p; - } else { - as->mcp = p-1; /* Leave room for stack pointer adjustment. */ - as->invmcp = NULL; - } - *p = 0; /* Prevent load/store merging. */ -} - -/* -- Trace setup --------------------------------------------------------- */ - -/* Ensure there are enough stack slots for call arguments. */ -static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) -{ - IRRef args[CCI_NARGS_MAX*2]; - uint32_t i, nargs = CCI_XNARGS(ci); - int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; - asm_collectargs(as, ir, ci, args); - for (i = 0; i < nargs; i++) { - if (args[i] && irt_isfp(IR(args[i])->t)) { - if (nfpr > 0) nfpr--; else nslots += 2; - } else { - if (ngpr > 0) ngpr--; else nslots += 2; - } - } - if (nslots > as->evenspill) /* Leave room for args in stack slots. */ - as->evenspill = nslots; - return REGSP_HINT(RID_RET); -} - -static void asm_setup_target(ASMState *as) -{ - /* May need extra exit for asm_stack_check on side traces. */ - asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0)); -} - -/* -- Trace patching ------------------------------------------------------ */ - -/* Patch exit jumps of existing machine code to a new target. */ -void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) -{ - MCode *p = T->mcode; - MCode *pe = (MCode *)((char *)p + T->szmcode); - MCode *cstart = NULL, *cend = p; - MCode *mcarea = lj_mcode_patch(J, p, 0); - MCode *px = exitstub_trace_addr(T, exitno); - for (; p < pe; p++) { - /* Look for exitstub branch, replace with branch to target. */ - uint32_t ins = *p; - if ((ins & 0xff000000u) == 0x54000000u && - ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) { - /* Patch bcc exitstub. */ - *p = (ins & 0xff00001fu) | (((target-p)<<5) & 0x00ffffe0u); - cend = p+1; - if (!cstart) cstart = p; - } else if ((ins & 0xfc000000u) == 0x14000000u && - ((ins ^ (px-p)) & 0x03ffffffu) == 0) { - /* Patch b exitstub. */ - *p = (ins & 0xfc000000u) | ((target-p) & 0x03ffffffu); - cend = p+1; - if (!cstart) cstart = p; - } else if ((ins & 0x7e000000u) == 0x34000000u && - ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) { - /* Patch cbz/cbnz exitstub. */ - *p = (ins & 0xff00001f) | (((target-p)<<5) & 0x00ffffe0u); - cend = p+1; - if (!cstart) cstart = p; - } else if ((ins & 0x7e000000u) == 0x36000000u && - ((ins ^ ((px-p)<<5)) & 0x0007ffe0u) == 0) { - /* Patch tbz/tbnz exitstub. */ - *p = (ins & 0xfff8001fu) | (((target-p)<<5) & 0x0007ffe0u); - cend = p+1; - if (!cstart) cstart = p; - } - } - lua_assert(cstart != NULL); - lj_mcode_sync(cstart, cend); - lj_mcode_patch(J, mcarea, 1); -} - diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h deleted file mode 100644 index affe7d8996..0000000000 --- a/src/lj_asm_mips.h +++ /dev/null @@ -1,2505 +0,0 @@ -/* -** MIPS IR assembler (SSA IR -> machine code). -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -/* -- Register allocator extensions --------------------------------------- */ - -/* Allocate a register with a hint. */ -static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) -{ - Reg r = IR(ref)->r; - if (ra_noreg(r)) { - if (!ra_hashint(r) && !iscrossref(as, ref)) - ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ - r = ra_allocref(as, ref, allow); - } - ra_noweak(as, r); - return r; -} - -/* Allocate a register or RID_ZERO. */ -static Reg ra_alloc1z(ASMState *as, IRRef ref, RegSet allow) -{ - Reg r = IR(ref)->r; - if (ra_noreg(r)) { - if (!(allow & RSET_FPR) && irref_isk(ref) && get_kval(IR(ref)) == 0) - return RID_ZERO; - r = ra_allocref(as, ref, allow); - } else { - ra_noweak(as, r); - } - return r; -} - -/* Allocate two source registers for three-operand instructions. */ -static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) -{ - IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); - Reg left = irl->r, right = irr->r; - if (ra_hasreg(left)) { - ra_noweak(as, left); - if (ra_noreg(right)) - right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left)); - else - ra_noweak(as, right); - } else if (ra_hasreg(right)) { - ra_noweak(as, right); - left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right)); - } else if (ra_hashint(right)) { - right = ra_alloc1z(as, ir->op2, allow); - left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right)); - } else { - left = ra_alloc1z(as, ir->op1, allow); - right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left)); - } - return left | (right << 8); -} - -/* -- Guard handling ------------------------------------------------------ */ - -/* Need some spare long-range jump slots, for out-of-range branches. */ -#define MIPS_SPAREJUMP 4 - -/* Setup spare long-range jump slots per mcarea. */ -static void asm_sparejump_setup(ASMState *as) -{ - MCode *mxp = as->mcbot; - /* Assumes sizeof(MCLink) == 8. */ - if (((uintptr_t)mxp & (LJ_PAGESIZE-1)) == 8) { - lua_assert(MIPSI_NOP == 0); - memset(mxp+2, 0, MIPS_SPAREJUMP*8); - mxp += MIPS_SPAREJUMP*2; - lua_assert(mxp < as->mctop); - lj_mcode_sync(as->mcbot, mxp); - lj_mcode_commitbot(as->J, mxp); - as->mcbot = mxp; - as->mclim = as->mcbot + MCLIM_REDZONE; - } -} - -/* Setup exit stub after the end of each trace. */ -static void asm_exitstub_setup(ASMState *as) -{ - MCode *mxp = as->mctop; - /* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */ - *--mxp = MIPSI_LI|MIPSF_T(RID_TMP)|as->T->traceno; - *--mxp = MIPSI_J|((((uintptr_t)(void *)lj_vm_exit_handler)>>2)&0x03ffffffu); - lua_assert(((uintptr_t)mxp ^ (uintptr_t)(void *)lj_vm_exit_handler)>>28 == 0); - *--mxp = MIPSI_SW|MIPSF_T(RID_TMP)|MIPSF_S(RID_SP)|0; - as->mctop = mxp; -} - -/* Keep this in-sync with exitstub_trace_addr(). */ -#define asm_exitstub_addr(as) ((as)->mctop) - -/* Emit conditional branch to exit for guard. */ -static void asm_guard(ASMState *as, MIPSIns mi, Reg rs, Reg rt) -{ - MCode *target = asm_exitstub_addr(as); - MCode *p = as->mcp; - if (LJ_UNLIKELY(p == as->invmcp)) { - as->invmcp = NULL; - as->loopinv = 1; - as->mcp = p+1; - mi = mi ^ ((mi>>28) == 1 ? 0x04000000u : 0x00010000u); /* Invert cond. */ - target = p; /* Patch target later in asm_loop_fixup. */ - } - emit_ti(as, MIPSI_LI, RID_TMP, as->snapno); - emit_branch(as, mi, rs, rt, target); -} - -/* -- Operand fusion ------------------------------------------------------ */ - -/* Limit linear search to this distance. Avoids O(n^2) behavior. */ -#define CONFLICT_SEARCH_LIM 31 - -/* Check if there's no conflicting instruction between curins and ref. */ -static int noconflict(ASMState *as, IRRef ref, IROp conflict) -{ - IRIns *ir = as->ir; - IRRef i = as->curins; - if (i > ref + CONFLICT_SEARCH_LIM) - return 0; /* Give up, ref is too far away. */ - while (--i > ref) - if (ir[i].o == conflict) - return 0; /* Conflict found. */ - return 1; /* Ok, no conflict. */ -} - -/* Fuse the array base of colocated arrays. */ -static int32_t asm_fuseabase(ASMState *as, IRRef ref) -{ - IRIns *ir = IR(ref); - if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE && - !neverfuse(as) && noconflict(as, ref, IR_NEWREF)) - return (int32_t)sizeof(GCtab); - return 0; -} - -/* Fuse array/hash/upvalue reference into register+offset operand. */ -static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) -{ - IRIns *ir = IR(ref); - if (ra_noreg(ir->r)) { - if (ir->o == IR_AREF) { - if (mayfuse(as, ref)) { - if (irref_isk(ir->op2)) { - IRRef tab = IR(ir->op1)->op1; - int32_t ofs = asm_fuseabase(as, tab); - IRRef refa = ofs ? tab : ir->op1; - ofs += 8*IR(ir->op2)->i; - if (checki16(ofs)) { - *ofsp = ofs; - return ra_alloc1(as, refa, allow); - } - } - } - } else if (ir->o == IR_HREFK) { - if (mayfuse(as, ref)) { - int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); - if (checki16(ofs)) { - *ofsp = ofs; - return ra_alloc1(as, ir->op1, allow); - } - } - } else if (ir->o == IR_UREFC) { - if (irref_isk(ir->op1)) { - GCfunc *fn = ir_kfunc(IR(ir->op1)); - intptr_t ofs = (intptr_t)&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv; - intptr_t jgl = (intptr_t)J2G(as->J); - if ((uintptr_t)(ofs-jgl) < 65536) { - *ofsp = ofs-jgl-32768; - return RID_JGL; - } else { - *ofsp = (int16_t)ofs; - return ra_allock(as, ofs-(int16_t)ofs, allow); - } - } - } - } - *ofsp = 0; - return ra_alloc1(as, ref, allow); -} - -/* Fuse XLOAD/XSTORE reference into load/store operand. */ -static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref, - RegSet allow, int32_t ofs) -{ - IRIns *ir = IR(ref); - Reg base; - if (ra_noreg(ir->r) && canfuse(as, ir)) { - if (ir->o == IR_ADD) { - intptr_t ofs2; - if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(IR(ir->op2)), - checki16(ofs2))) { - ref = ir->op1; - ofs = (int32_t)ofs2; - } - } else if (ir->o == IR_STRREF) { - intptr_t ofs2 = 65536; - lua_assert(ofs == 0); - ofs = (int32_t)sizeof(GCstr); - if (irref_isk(ir->op2)) { - ofs2 = ofs + get_kval(IR(ir->op2)); - ref = ir->op1; - } else if (irref_isk(ir->op1)) { - ofs2 = ofs + get_kval(IR(ir->op1)); - ref = ir->op2; - } - if (!checki16(ofs2)) { - /* NYI: Fuse ADD with constant. */ - Reg right, left = ra_alloc2(as, ir, allow); - right = (left >> 8); left &= 255; - emit_hsi(as, mi, rt, RID_TMP, ofs); - emit_dst(as, MIPSI_AADDU, RID_TMP, left, right); - return; - } - ofs = ofs2; - } - } - base = ra_alloc1(as, ref, allow); - emit_hsi(as, mi, rt, base, ofs); -} - -/* -- Calls --------------------------------------------------------------- */ - -/* Generate a call to a C function. */ -static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) -{ - uint32_t n, nargs = CCI_XNARGS(ci); - int32_t ofs = LJ_32 ? 16 : 0; -#if LJ_SOFTFP - Reg gpr = REGARG_FIRSTGPR; -#else - Reg gpr, fpr = REGARG_FIRSTFPR; -#endif - if ((void *)ci->func) - emit_call(as, (void *)ci->func, 1); -#if !LJ_SOFTFP - for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) - as->cost[gpr] = REGCOST(~0u, ASMREF_L); - gpr = REGARG_FIRSTGPR; -#endif - for (n = 0; n < nargs; n++) { /* Setup args. */ - IRRef ref = args[n]; - if (ref) { - IRIns *ir = IR(ref); -#if !LJ_SOFTFP - if (irt_isfp(ir->t) && fpr <= REGARG_LASTFPR && - !(ci->flags & CCI_VARARG)) { - lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ - ra_leftov(as, fpr, ref); - fpr += LJ_32 ? 2 : 1; - gpr += (LJ_32 && irt_isnum(ir->t)) ? 2 : 1; - } else -#endif - { -#if LJ_32 && !LJ_SOFTFP - fpr = REGARG_LASTFPR+1; -#endif - if (LJ_32 && irt_isnum(ir->t)) gpr = (gpr+1) & ~1; - if (gpr <= REGARG_LASTGPR) { - lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ -#if !LJ_SOFTFP - if (irt_isfp(ir->t)) { - RegSet of = as->freeset; - Reg r; - /* Workaround to protect argument GPRs from being used for remat. */ - as->freeset &= ~RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1); - r = ra_alloc1(as, ref, RSET_FPR); - as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); - if (irt_isnum(ir->t)) { -#if LJ_32 - emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?0:1), r+1); - emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?1:0), r); - lua_assert(rset_test(as->freeset, gpr+1)); /* Already evicted. */ - gpr += 2; -#else - emit_tg(as, MIPSI_DMFC1, gpr, r); - gpr++; fpr++; -#endif - } else if (irt_isfloat(ir->t)) { - emit_tg(as, MIPSI_MFC1, gpr, r); - gpr++; -#if LJ_64 - fpr++; -#endif - } - } else -#endif - { - ra_leftov(as, gpr, ref); - gpr++; -#if LJ_64 - fpr++; -#endif - } - } else { - Reg r = ra_alloc1z(as, ref, !LJ_SOFTFP && irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); -#if LJ_32 - if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4; - emit_spstore(as, ir, r, ofs); - ofs += irt_isnum(ir->t) ? 8 : 4; -#else - emit_spstore(as, ir, r, ofs + ((LJ_BE && (LJ_SOFTFP || r < RID_MAX_GPR) && !irt_is64(ir->t)) ? 4 : 0)); - ofs += 8; -#endif - } - } - } else { -#if !LJ_SOFTFP - fpr = REGARG_LASTFPR+1; -#endif - if (gpr <= REGARG_LASTGPR) { - gpr++; -#if LJ_64 - fpr++; -#endif - } else { - ofs += LJ_32 ? 4 : 8; - } - } - checkmclim(as); - } -} - -/* Setup result reg/sp for call. Evict scratch regs. */ -static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) -{ - RegSet drop = RSET_SCRATCH; -#if LJ_32 - int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); -#endif -#if !LJ_SOFTFP - if ((ci->flags & CCI_NOFPRCLOBBER)) - drop &= ~RSET_FPR; -#endif - if (ra_hasreg(ir->r)) - rset_clear(drop, ir->r); /* Dest reg handled below. */ -#if LJ_32 - if (hiop && ra_hasreg((ir+1)->r)) - rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ -#endif - ra_evictset(as, drop); /* Evictions must be performed first. */ - if (ra_used(ir)) { - lua_assert(!irt_ispri(ir->t)); - if (!LJ_SOFTFP && irt_isfp(ir->t)) { - if ((ci->flags & CCI_CASTU64)) { - int32_t ofs = sps_scale(ir->s); - Reg dest = ir->r; - if (ra_hasreg(dest)) { - ra_free(as, dest); - ra_modified(as, dest); -#if LJ_32 - emit_tg(as, MIPSI_MTC1, RID_RETHI, dest+1); - emit_tg(as, MIPSI_MTC1, RID_RETLO, dest); -#else - emit_tg(as, MIPSI_DMTC1, RID_RET, dest); -#endif - } - if (ofs) { -#if LJ_32 - emit_tsi(as, MIPSI_SW, RID_RETLO, RID_SP, ofs+(LJ_BE?4:0)); - emit_tsi(as, MIPSI_SW, RID_RETHI, RID_SP, ofs+(LJ_BE?0:4)); -#else - emit_tsi(as, MIPSI_SD, RID_RET, RID_SP, ofs); -#endif - } - } else { - ra_destreg(as, ir, RID_FPRET); - } -#if LJ_32 - } else if (hiop) { - ra_destpair(as, ir); -#endif - } else { - ra_destreg(as, ir, RID_RET); - } - } -} - -static void asm_callx(ASMState *as, IRIns *ir) -{ - IRRef args[CCI_NARGS_MAX*2]; - CCallInfo ci; - IRRef func; - IRIns *irf; - ci.flags = asm_callx_flags(as, ir); - asm_collectargs(as, ir, &ci, args); - asm_setupresult(as, ir, &ci); - func = ir->op2; irf = IR(func); - if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } - if (irref_isk(func)) { /* Call to constant address. */ - ci.func = (ASMFunction)(void *)get_kval(irf); - } else { /* Need specific register for indirect calls. */ - Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR)); - MCode *p = as->mcp; - if (r == RID_CFUNCADDR) - *--p = MIPSI_NOP; - else - *--p = MIPSI_MOVE | MIPSF_D(RID_CFUNCADDR) | MIPSF_S(r); - *--p = MIPSI_JALR | MIPSF_S(r); - as->mcp = p; - ci.func = (ASMFunction)(void *)0; - } - asm_gencall(as, &ci, args); -} - -#if !LJ_SOFTFP -static void asm_callround(ASMState *as, IRIns *ir, IRCallID id) -{ - /* The modified regs must match with the *.dasc implementation. */ - RegSet drop = RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_FPRET)| - RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR); - if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); - ra_evictset(as, drop); - ra_destreg(as, ir, RID_FPRET); - emit_call(as, (void *)lj_ir_callinfo[id].func, 0); - ra_leftov(as, REGARG_FIRSTFPR, ir->op1); -} -#endif - -/* -- Returns ------------------------------------------------------------- */ - -/* Return to lower frame. Guard that it goes to the right spot. */ -static void asm_retf(ASMState *as, IRIns *ir) -{ - Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); - void *pc = ir_kptr(IR(ir->op2)); - int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); - as->topslot -= (BCReg)delta; - if ((int32_t)as->topslot < 0) as->topslot = 0; - irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ - emit_setgl(as, base, jit_base); - emit_addptr(as, base, -8*delta); - asm_guard(as, MIPSI_BNE, RID_TMP, - ra_allock(as, igcptr(pc), rset_exclude(RSET_GPR, base))); - emit_tsi(as, MIPSI_AL, RID_TMP, base, -8); -} - -/* -- Type conversions ---------------------------------------------------- */ - -#if !LJ_SOFTFP -static void asm_tointg(ASMState *as, IRIns *ir, Reg left) -{ - Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); - Reg dest = ra_dest(as, ir, RSET_GPR); - asm_guard(as, MIPSI_BC1F, 0, 0); - emit_fgh(as, MIPSI_C_EQ_D, 0, tmp, left); - emit_fg(as, MIPSI_CVT_D_W, tmp, tmp); - emit_tg(as, MIPSI_MFC1, dest, tmp); - emit_fg(as, MIPSI_CVT_W_D, tmp, left); -} - -static void asm_tobit(ASMState *as, IRIns *ir) -{ - RegSet allow = RSET_FPR; - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, allow); - Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); - Reg tmp = ra_scratch(as, rset_clear(allow, right)); - emit_tg(as, MIPSI_MFC1, dest, tmp); - emit_fgh(as, MIPSI_ADD_D, tmp, left, right); -} -#endif - -static void asm_conv(ASMState *as, IRIns *ir) -{ - IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); -#if !LJ_SOFTFP - int stfp = (st == IRT_NUM || st == IRT_FLOAT); -#endif -#if LJ_64 - int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64); -#endif - IRRef lref = ir->op1; -#if LJ_32 - lua_assert(!(irt_isint64(ir->t) || - (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ -#endif -#if LJ_32 && LJ_SOFTFP - /* FP conversions are handled by SPLIT. */ - lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); - /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ -#else - lua_assert(irt_type(ir->t) != st); - if (irt_isfp(ir->t)) { - Reg dest = ra_dest(as, ir, RSET_FPR); - if (stfp) { /* FP to FP conversion. */ - emit_fg(as, st == IRT_NUM ? MIPSI_CVT_S_D : MIPSI_CVT_D_S, - dest, ra_alloc1(as, lref, RSET_FPR)); - } else if (st == IRT_U32) { /* U32 to FP conversion. */ - /* y = (x ^ 0x8000000) + 2147483648.0 */ - Reg left = ra_alloc1(as, lref, RSET_GPR); - Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, dest)); - if (irt_isfloat(ir->t)) - emit_fg(as, MIPSI_CVT_S_D, dest, dest); - /* Must perform arithmetic with doubles to keep the precision. */ - emit_fgh(as, MIPSI_ADD_D, dest, dest, tmp); - emit_fg(as, MIPSI_CVT_D_W, dest, dest); - emit_lsptr(as, MIPSI_LDC1, (tmp & 31), - (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR); - emit_tg(as, MIPSI_MTC1, RID_TMP, dest); - emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, left); - emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); -#if LJ_64 - } else if(st == IRT_U64) { /* U64 to FP conversion. */ - /* if (x >= 1u<<63) y = (double)(int64_t)(x&(1u<<63)-1) + pow(2.0, 63) */ - Reg left = ra_alloc1(as, lref, RSET_GPR); - Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, dest)); - MCLabel l_end = emit_label(as); - if (irt_isfloat(ir->t)) { - emit_fgh(as, MIPSI_ADD_S, dest, dest, tmp); - emit_lsptr(as, MIPSI_LWC1, (tmp & 31), (void *)&as->J->k32[LJ_K32_2P63], - rset_exclude(RSET_GPR, left)); - emit_fg(as, MIPSI_CVT_S_L, dest, dest); - } else { - emit_fgh(as, MIPSI_ADD_D, dest, dest, tmp); - emit_lsptr(as, MIPSI_LDC1, (tmp & 31), (void *)&as->J->k64[LJ_K64_2P63], - rset_exclude(RSET_GPR, left)); - emit_fg(as, MIPSI_CVT_D_L, dest, dest); - } - emit_branch(as, MIPSI_BGEZ, left, RID_ZERO, l_end); - emit_tg(as, MIPSI_DMTC1, RID_TMP, dest); - emit_tsml(as, MIPSI_DEXTM, RID_TMP, left, 30, 0); -#endif - } else { /* Integer to FP conversion. */ - Reg left = ra_alloc1(as, lref, RSET_GPR); -#if LJ_32 - emit_fg(as, irt_isfloat(ir->t) ? MIPSI_CVT_S_W : MIPSI_CVT_D_W, - dest, dest); - emit_tg(as, MIPSI_MTC1, left, dest); -#else - MIPSIns mi = irt_isfloat(ir->t) ? - (st64 ? MIPSI_CVT_S_L : MIPSI_CVT_S_W) : - (st64 ? MIPSI_CVT_D_L : MIPSI_CVT_D_W); - emit_fg(as, mi, dest, dest); - emit_tg(as, st64 ? MIPSI_DMTC1 : MIPSI_MTC1, left, dest); -#endif - } - } else if (stfp) { /* FP to integer conversion. */ - if (irt_isguard(ir->t)) { - /* Checked conversions are only supported from number to int. */ - lua_assert(irt_isint(ir->t) && st == IRT_NUM); - asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); - } else { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, lref, RSET_FPR); - Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); - if (irt_isu32(ir->t)) { /* FP to U32 conversion. */ - /* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */ - emit_dst(as, MIPSI_XOR, dest, dest, RID_TMP); - emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); - emit_tg(as, MIPSI_MFC1, dest, tmp); - emit_fg(as, st == IRT_FLOAT ? MIPSI_FLOOR_W_S : MIPSI_FLOOR_W_D, - tmp, tmp); - emit_fgh(as, st == IRT_FLOAT ? MIPSI_SUB_S : MIPSI_SUB_D, - tmp, left, tmp); - if (st == IRT_FLOAT) - emit_lsptr(as, MIPSI_LWC1, (tmp & 31), - (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR); - else - emit_lsptr(as, MIPSI_LDC1, (tmp & 31), - (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR); -#if LJ_64 - } else if (irt_isu64(ir->t)) { /* FP to U64 conversion. */ - MCLabel l_end; - emit_tg(as, MIPSI_DMFC1, dest, tmp); - l_end = emit_label(as); - /* For inputs >= 2^63 add -2^64 and convert again. */ - if (st == IRT_NUM) { - emit_fg(as, MIPSI_TRUNC_L_D, tmp, tmp); - emit_fgh(as, MIPSI_ADD_D, tmp, left, tmp); - emit_lsptr(as, MIPSI_LDC1, (tmp & 31), - (void *)&as->J->k64[LJ_K64_M2P64], - rset_exclude(RSET_GPR, dest)); - emit_fg(as, MIPSI_TRUNC_L_D, tmp, left); /* Delay slot. */ - emit_branch(as, MIPSI_BC1T, 0, 0, l_end); - emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp); - emit_lsptr(as, MIPSI_LDC1, (tmp & 31), - (void *)&as->J->k64[LJ_K64_2P63], - rset_exclude(RSET_GPR, dest)); - } else { - emit_fg(as, MIPSI_TRUNC_L_S, tmp, tmp); - emit_fgh(as, MIPSI_ADD_S, tmp, left, tmp); - emit_lsptr(as, MIPSI_LWC1, (tmp & 31), - (void *)&as->J->k32[LJ_K32_M2P64], - rset_exclude(RSET_GPR, dest)); - emit_fg(as, MIPSI_TRUNC_L_S, tmp, left); /* Delay slot. */ - emit_branch(as, MIPSI_BC1T, 0, 0, l_end); - emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp); - emit_lsptr(as, MIPSI_LWC1, (tmp & 31), - (void *)&as->J->k32[LJ_K32_2P63], - rset_exclude(RSET_GPR, dest)); - } -#endif - } else { -#if LJ_32 - emit_tg(as, MIPSI_MFC1, dest, tmp); - emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D, - tmp, left); -#else - MIPSIns mi = irt_is64(ir->t) ? - (st == IRT_NUM ? MIPSI_TRUNC_L_D : MIPSI_TRUNC_L_S) : - (st == IRT_NUM ? MIPSI_TRUNC_W_D : MIPSI_TRUNC_W_S); - emit_tg(as, irt_is64(ir->t) ? MIPSI_DMFC1 : MIPSI_MFC1, dest, left); - emit_fg(as, mi, left, left); -#endif - } - } - } else -#endif - { - Reg dest = ra_dest(as, ir, RSET_GPR); - if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); - if ((ir->op2 & IRCONV_SEXT)) { - if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) { - emit_dst(as, st == IRT_I8 ? MIPSI_SEB : MIPSI_SEH, dest, 0, left); - } else { - uint32_t shift = st == IRT_I8 ? 24 : 16; - emit_dta(as, MIPSI_SRA, dest, dest, shift); - emit_dta(as, MIPSI_SLL, dest, left, shift); - } - } else { - emit_tsi(as, MIPSI_ANDI, dest, left, - (int32_t)(st == IRT_U8 ? 0xff : 0xffff)); - } - } else { /* 32/64 bit integer conversions. */ -#if LJ_32 - /* Only need to handle 32/32 bit no-op (cast) on 32 bit archs. */ - ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ -#else - if (irt_is64(ir->t)) { - if (st64) { - /* 64/64 bit no-op (cast)*/ - ra_leftov(as, dest, lref); - } else { - Reg left = ra_alloc1(as, lref, RSET_GPR); - if ((ir->op2 & IRCONV_SEXT)) { /* 32 to 64 bit sign extension. */ - emit_dta(as, MIPSI_SLL, dest, left, 0); - } else { /* 32 to 64 bit zero extension. */ - emit_tsml(as, MIPSI_DEXT, dest, left, 31, 0); - } - } - } else { - if (st64) { - /* This is either a 32 bit reg/reg mov which zeroes the hiword - ** or a load of the loword from a 64 bit address. - */ - Reg left = ra_alloc1(as, lref, RSET_GPR); - emit_tsml(as, MIPSI_DEXT, dest, left, 31, 0); - } else { /* 32/32 bit no-op (cast). */ - /* Do nothing, but may need to move regs. */ - ra_leftov(as, dest, lref); - } - } -#endif - } - } -} - -static void asm_strto(ASMState *as, IRIns *ir) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; - IRRef args[2]; - int32_t ofs = 0; -#if LJ_SOFTFP - ra_evictset(as, RSET_SCRATCH); - if (ra_used(ir)) { - if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) && - (ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) { - int i; - for (i = 0; i < 2; i++) { - Reg r = (ir+i)->r; - if (ra_hasreg(r)) { - ra_free(as, r); - ra_modified(as, r); - emit_spload(as, ir+i, r, sps_scale((ir+i)->s)); - } - } - ofs = sps_scale(ir->s & ~1); - } else { - Reg rhi = ra_dest(as, ir+1, RSET_GPR); - Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi)); - emit_tsi(as, MIPSI_LW, rhi, RID_SP, ofs+(LJ_BE?0:4)); - emit_tsi(as, MIPSI_LW, rlo, RID_SP, ofs+(LJ_BE?4:0)); - } - } -#else - RegSet drop = RSET_SCRATCH; - if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ - ra_evictset(as, drop); - ofs = sps_scale(ir->s); -#endif - asm_guard(as, MIPSI_BEQ, RID_RET, RID_ZERO); /* Test return status. */ - args[0] = ir->op1; /* GCstr *str */ - args[1] = ASMREF_TMP1; /* TValue *n */ - asm_gencall(as, ci, args); - /* Store the result to the spill slot or temp slots. */ - emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), - RID_SP, ofs); -} - -/* -- Memory references --------------------------------------------------- */ - -#if LJ_64 -/* Store tagged value for ref at base+ofs. */ -static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) -{ - RegSet allow = rset_exclude(RSET_GPR, base); - IRIns *ir = IR(ref); - lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); - if (irref_isk(ref)) { - TValue k; - lj_ir_kvalue(as->J->L, &k, ir); - emit_tsi(as, MIPSI_SD, ra_allock(as, (int64_t)k.u64, allow), base, ofs); - } else { - Reg src = ra_alloc1(as, ref, allow); - Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, - rset_exclude(allow, src)); - emit_tsi(as, MIPSI_SD, RID_TMP, base, ofs); - if (irt_isinteger(ir->t)) { - emit_dst(as, MIPSI_DADDU, RID_TMP, RID_TMP, type); - emit_tsml(as, MIPSI_DEXT, RID_TMP, src, 31, 0); - } else { - emit_dst(as, MIPSI_DADDU, RID_TMP, src, type); - } - } -} -#endif - -/* Get pointer to TValue. */ -static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) -{ - IRIns *ir = IR(ref); - if (irt_isnum(ir->t)) { - if (irref_isk(ref)) /* Use the number constant itself as a TValue. */ - ra_allockreg(as, igcptr(ir_knum(ir)), dest); - else /* Otherwise force a spill and use the spill slot. */ - emit_tsi(as, MIPSI_AADDIU, dest, RID_SP, ra_spill(as, ir)); - } else { - /* Otherwise use g->tmptv to hold the TValue. */ -#if LJ_32 - RegSet allow = rset_exclude(RSET_GPR, dest); - Reg type; - emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, (int32_t)(offsetof(global_State, tmptv)-32768)); - if (!irt_ispri(ir->t)) { - Reg src = ra_alloc1(as, ref, allow); - emit_setgl(as, src, tmptv.gcr); - } - if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) - type = ra_alloc1(as, ref+1, allow); - else - type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); - emit_setgl(as, type, tmptv.it); -#else - asm_tvstore64(as, dest, 0, ref); - emit_tsi(as, MIPSI_DADDIU, dest, RID_JGL, - (int32_t)(offsetof(global_State, tmptv)-32768)); -#endif - } -} - -static void asm_aref(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg idx, base; - if (irref_isk(ir->op2)) { - IRRef tab = IR(ir->op1)->op1; - int32_t ofs = asm_fuseabase(as, tab); - IRRef refa = ofs ? tab : ir->op1; - ofs += 8*IR(ir->op2)->i; - if (checki16(ofs)) { - base = ra_alloc1(as, refa, RSET_GPR); - emit_tsi(as, MIPSI_AADDIU, dest, base, ofs); - return; - } - } - base = ra_alloc1(as, ir->op1, RSET_GPR); - idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); - emit_dst(as, MIPSI_AADDU, dest, RID_TMP, base); - emit_dta(as, MIPSI_SLL, RID_TMP, idx, 3); -} - -/* Inlined hash lookup. Specialized for key type and for const keys. -** The equivalent C code is: -** Node *n = hashkey(t, key); -** do { -** if (lj_obj_equal(&n->key, key)) return &n->val; -** } while ((n = nextnode(n))); -** return niltv(L); -*/ -static void asm_href(ASMState *as, IRIns *ir, IROp merge) -{ - RegSet allow = RSET_GPR; - int destused = ra_used(ir); - Reg dest = ra_dest(as, ir, allow); - Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); - Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1 = RID_TMP, tmp2; - IRRef refkey = ir->op2; - IRIns *irkey = IR(refkey); - int isk = irref_isk(refkey); - IRType1 kt = irkey->t; - uint32_t khash; - MCLabel l_end, l_loop, l_next; - - rset_clear(allow, tab); -#if LJ_32 && LJ_SOFTFP - if (!isk) { - key = ra_alloc1(as, refkey, allow); - rset_clear(allow, key); - if (irkey[1].o == IR_HIOP) { - if (ra_hasreg((irkey+1)->r)) { - type = tmpnum = (irkey+1)->r; - tmp1 = ra_scratch(as, allow); - rset_clear(allow, tmp1); - ra_noweak(as, tmpnum); - } else { - type = tmpnum = ra_allocref(as, refkey+1, allow); - } - rset_clear(allow, tmpnum); - } else { - type = ra_allock(as, (int32_t)irt_toitype(irkey->t), allow); - rset_clear(allow, type); - } - } -#else - if (irt_isnum(kt)) { - key = ra_alloc1(as, refkey, RSET_FPR); - tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); - } else if (!irt_ispri(kt)) { - key = ra_alloc1(as, refkey, allow); - rset_clear(allow, key); -#if LJ_32 - type = ra_allock(as, (int32_t)irt_toitype(irkey->t), allow); - rset_clear(allow, type); -#endif - } -#endif - tmp2 = ra_scratch(as, allow); - rset_clear(allow, tmp2); - - /* Key not found in chain: jump to exit (if merged) or load niltv. */ - l_end = emit_label(as); - as->invmcp = NULL; - if (merge == IR_NE) - asm_guard(as, MIPSI_B, RID_ZERO, RID_ZERO); - else if (destused) - emit_loada(as, dest, niltvg(J2G(as->J))); - /* Follow hash chain until the end. */ - emit_move(as, dest, tmp1); - l_loop = --as->mcp; - emit_tsi(as, MIPSI_AL, tmp1, dest, (int32_t)offsetof(Node, next)); - l_next = emit_label(as); - - /* Type and value comparison. */ - if (merge == IR_EQ) { /* Must match asm_guard(). */ - emit_ti(as, MIPSI_LI, RID_TMP, as->snapno); - l_end = asm_exitstub_addr(as); - } - if (!LJ_SOFTFP && irt_isnum(kt)) { - emit_branch(as, MIPSI_BC1T, 0, 0, l_end); - emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key); - *--as->mcp = MIPSI_NOP; /* Avoid NaN comparison overhead. */ - emit_branch(as, MIPSI_BEQ, tmp1, RID_ZERO, l_next); - emit_tsi(as, MIPSI_SLTIU, tmp1, tmp1, (int32_t)LJ_TISNUM); -#if LJ_32 - emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n)); - } else { - if (irt_ispri(kt)) { - emit_branch(as, MIPSI_BEQ, tmp1, type, l_end); - } else { - emit_branch(as, MIPSI_BEQ, tmp2, key, l_end); - emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, key.gcr)); - emit_branch(as, MIPSI_BNE, tmp1, type, l_next); - } - } - emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.it)); - *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu); -#else - emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 15); - emit_tg(as, MIPSI_DMTC1, tmp1, tmpnum); - emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); - } else if (irt_isaddr(kt)) { - Reg refk = tmp2; - if (isk) { - int64_t k = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; - refk = ra_allock(as, k, allow); - rset_clear(allow, refk); - } - emit_branch(as, MIPSI_BEQ, tmp1, refk, l_end); - emit_tsi(as, MIPSI_LD, tmp1, dest, offsetof(Node, key)); - } else { - Reg pri = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); - rset_clear(allow, pri); - lua_assert(irt_ispri(kt) && !irt_isnil(kt)); - emit_branch(as, MIPSI_BEQ, tmp1, pri, l_end); - emit_tsi(as, MIPSI_LD, tmp1, dest, offsetof(Node, key)); - } - *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu); - if (!isk && irt_isaddr(kt)) { - type = ra_allock(as, (int64_t)irt_toitype(kt) << 47, allow); - emit_dst(as, MIPSI_DADDU, tmp2, key, type); - rset_clear(allow, type); - } -#endif - - /* Load main position relative to tab->node into dest. */ - khash = isk ? ir_khash(irkey) : 1; - if (khash == 0) { - emit_tsi(as, MIPSI_AL, dest, tab, (int32_t)offsetof(GCtab, node)); - } else { - Reg tmphash = tmp1; - if (isk) - tmphash = ra_allock(as, khash, allow); - emit_dst(as, MIPSI_AADDU, dest, dest, tmp1); - lua_assert(sizeof(Node) == 24); - emit_dst(as, MIPSI_SUBU, tmp1, tmp2, tmp1); - emit_dta(as, MIPSI_SLL, tmp1, tmp1, 3); - emit_dta(as, MIPSI_SLL, tmp2, tmp1, 5); - emit_dst(as, MIPSI_AND, tmp1, tmp2, tmphash); - emit_tsi(as, MIPSI_AL, dest, tab, (int32_t)offsetof(GCtab, node)); - emit_tsi(as, MIPSI_LW, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); - if (isk) { - /* Nothing to do. */ - } else if (irt_isstr(kt)) { - emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, hash)); - } else { /* Must match with hash*() in lj_tab.c. */ - emit_dst(as, MIPSI_SUBU, tmp1, tmp1, tmp2); - emit_rotr(as, tmp2, tmp2, dest, (-HASH_ROT3)&31); - emit_dst(as, MIPSI_XOR, tmp1, tmp1, tmp2); - emit_rotr(as, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&31); - emit_dst(as, MIPSI_SUBU, tmp2, tmp2, dest); -#if LJ_32 - if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) { - emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1); - if ((as->flags & JIT_F_MIPSXXR2)) { - emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31); - } else { - emit_dst(as, MIPSI_OR, dest, dest, tmp1); - emit_dta(as, MIPSI_SLL, tmp1, tmp1, HASH_ROT1); - emit_dta(as, MIPSI_SRL, dest, tmp1, (-HASH_ROT1)&31); - } - emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1); -#if LJ_SOFTFP - emit_ds(as, MIPSI_MOVE, tmp1, type); - emit_ds(as, MIPSI_MOVE, tmp2, key); -#else - emit_tg(as, MIPSI_MFC1, tmp2, key); - emit_tg(as, MIPSI_MFC1, tmp1, key+1); -#endif - } else { - emit_dst(as, MIPSI_XOR, tmp2, key, tmp1); - emit_rotr(as, dest, tmp1, tmp2, (-HASH_ROT1)&31); - emit_dst(as, MIPSI_ADDU, tmp1, key, ra_allock(as, HASH_BIAS, allow)); - } -#else - emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1); - emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31); - if (irt_isnum(kt)) { - emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1); - emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 0); - emit_dta(as, MIPSI_SLL, tmp2, LJ_SOFTFP ? key : tmp1, 0); -#if !LJ_SOFTFP - emit_tg(as, MIPSI_DMFC1, tmp1, key); -#endif - } else { - checkmclim(as); - emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 0); - emit_dta(as, MIPSI_SLL, tmp2, key, 0); - emit_dst(as, MIPSI_DADDU, tmp1, key, type); - } -#endif - } - } -} - -static void asm_hrefk(ASMState *as, IRIns *ir) -{ - IRIns *kslot = IR(ir->op2); - IRIns *irkey = IR(kslot->op1); - int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); - int32_t kofs = ofs + (int32_t)offsetof(Node, key); - Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; - Reg node = ra_alloc1(as, ir->op1, RSET_GPR); - RegSet allow = rset_exclude(RSET_GPR, node); - Reg idx = node; -#if LJ_32 - Reg key = RID_NONE, type = RID_TMP; - int32_t lo, hi; -#else - Reg key = ra_scratch(as, allow); - int64_t k; -#endif - lua_assert(ofs % sizeof(Node) == 0); - if (ofs > 32736) { - idx = dest; - rset_clear(allow, dest); - kofs = (int32_t)offsetof(Node, key); - } else if (ra_hasreg(dest)) { - emit_tsi(as, MIPSI_AADDIU, dest, node, ofs); - } -#if LJ_32 - if (!irt_ispri(irkey->t)) { - key = ra_scratch(as, allow); - rset_clear(allow, key); - } - if (irt_isnum(irkey->t)) { - lo = (int32_t)ir_knum(irkey)->u32.lo; - hi = (int32_t)ir_knum(irkey)->u32.hi; - } else { - lo = irkey->i; - hi = irt_toitype(irkey->t); - if (!ra_hasreg(key)) - goto nolo; - } - asm_guard(as, MIPSI_BNE, key, lo ? ra_allock(as, lo, allow) : RID_ZERO); -nolo: - asm_guard(as, MIPSI_BNE, type, hi ? ra_allock(as, hi, allow) : RID_ZERO); - if (ra_hasreg(key)) emit_tsi(as, MIPSI_LW, key, idx, kofs+(LJ_BE?4:0)); - emit_tsi(as, MIPSI_LW, type, idx, kofs+(LJ_BE?0:4)); -#else - if (irt_ispri(irkey->t)) { - lua_assert(!irt_isnil(irkey->t)); - k = ~((int64_t)~irt_toitype(irkey->t) << 47); - } else if (irt_isnum(irkey->t)) { - k = (int64_t)ir_knum(irkey)->u64; - } else { - k = ((int64_t)irt_toitype(irkey->t) << 47) | (int64_t)ir_kgc(irkey); - } - asm_guard(as, MIPSI_BNE, key, ra_allock(as, k, allow)); - emit_tsi(as, MIPSI_LD, key, idx, kofs); -#endif - if (ofs > 32736) - emit_tsi(as, MIPSI_AADDU, dest, node, ra_allock(as, ofs, allow)); -} - -static void asm_uref(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - if (irref_isk(ir->op1)) { - GCfunc *fn = ir_kfunc(IR(ir->op1)); - MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; - emit_lsptr(as, MIPSI_AL, dest, v, RSET_GPR); - } else { - Reg uv = ra_scratch(as, RSET_GPR); - Reg func = ra_alloc1(as, ir->op1, RSET_GPR); - if (ir->o == IR_UREFC) { - asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); - emit_tsi(as, MIPSI_AADDIU, dest, uv, (int32_t)offsetof(GCupval, tv)); - emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); - } else { - emit_tsi(as, MIPSI_AL, dest, uv, (int32_t)offsetof(GCupval, v)); - } - emit_tsi(as, MIPSI_AL, uv, func, (int32_t)offsetof(GCfuncL, uvptr) + - (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); - } -} - -static void asm_fref(ASMState *as, IRIns *ir) -{ - UNUSED(as); UNUSED(ir); - lua_assert(!ra_used(ir)); -} - -static void asm_strref(ASMState *as, IRIns *ir) -{ -#if LJ_32 - Reg dest = ra_dest(as, ir, RSET_GPR); - IRRef ref = ir->op2, refk = ir->op1; - int32_t ofs = (int32_t)sizeof(GCstr); - Reg r; - if (irref_isk(ref)) { - IRRef tmp = refk; refk = ref; ref = tmp; - } else if (!irref_isk(refk)) { - Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); - IRIns *irr = IR(ir->op2); - if (ra_hasreg(irr->r)) { - ra_noweak(as, irr->r); - right = irr->r; - } else if (mayfuse(as, irr->op2) && - irr->o == IR_ADD && irref_isk(irr->op2) && - checki16(ofs + IR(irr->op2)->i)) { - ofs += IR(irr->op2)->i; - right = ra_alloc1(as, irr->op1, rset_exclude(RSET_GPR, left)); - } else { - right = ra_allocref(as, ir->op2, rset_exclude(RSET_GPR, left)); - } - emit_tsi(as, MIPSI_ADDIU, dest, dest, ofs); - emit_dst(as, MIPSI_ADDU, dest, left, right); - return; - } - r = ra_alloc1(as, ref, RSET_GPR); - ofs += IR(refk)->i; - if (checki16(ofs)) - emit_tsi(as, MIPSI_ADDIU, dest, r, ofs); - else - emit_dst(as, MIPSI_ADDU, dest, r, - ra_allock(as, ofs, rset_exclude(RSET_GPR, r))); -#else - RegSet allow = RSET_GPR; - Reg dest = ra_dest(as, ir, allow); - Reg base = ra_alloc1(as, ir->op1, allow); - IRIns *irr = IR(ir->op2); - int32_t ofs = sizeof(GCstr); - rset_clear(allow, base); - if (irref_isk(ir->op2) && checki16(ofs + irr->i)) { - emit_tsi(as, MIPSI_DADDIU, dest, base, ofs + irr->i); - } else { - emit_tsi(as, MIPSI_DADDIU, dest, dest, ofs); - emit_dst(as, MIPSI_DADDU, dest, base, ra_alloc1(as, ir->op2, allow)); - } -#endif -} - -/* -- Loads and stores ---------------------------------------------------- */ - -static MIPSIns asm_fxloadins(IRIns *ir) -{ - switch (irt_type(ir->t)) { - case IRT_I8: return MIPSI_LB; - case IRT_U8: return MIPSI_LBU; - case IRT_I16: return MIPSI_LH; - case IRT_U16: return MIPSI_LHU; - case IRT_NUM: lua_assert(!LJ_SOFTFP); return MIPSI_LDC1; - case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_LWC1; - default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_LD : MIPSI_LW; - } -} - -static MIPSIns asm_fxstoreins(IRIns *ir) -{ - switch (irt_type(ir->t)) { - case IRT_I8: case IRT_U8: return MIPSI_SB; - case IRT_I16: case IRT_U16: return MIPSI_SH; - case IRT_NUM: lua_assert(!LJ_SOFTFP); return MIPSI_SDC1; - case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_SWC1; - default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_SD : MIPSI_SW; - } -} - -static void asm_fload(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - MIPSIns mi = asm_fxloadins(ir); - Reg idx; - int32_t ofs; - if (ir->op1 == REF_NIL) { - idx = RID_JGL; - ofs = (ir->op2 << 2) - 32768 - GG_OFS(g); - } else { - idx = ra_alloc1(as, ir->op1, RSET_GPR); - if (ir->op2 == IRFL_TAB_ARRAY) { - ofs = asm_fuseabase(as, ir->op1); - if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ - emit_tsi(as, MIPSI_AADDIU, dest, idx, ofs); - return; - } - } - ofs = field_ofs[ir->op2]; - } - lua_assert(!irt_isfp(ir->t)); - emit_tsi(as, mi, dest, idx, ofs); -} - -static void asm_fstore(ASMState *as, IRIns *ir) -{ - if (ir->r != RID_SINK) { - Reg src = ra_alloc1z(as, ir->op2, RSET_GPR); - IRIns *irf = IR(ir->op1); - Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); - int32_t ofs = field_ofs[irf->op2]; - MIPSIns mi = asm_fxstoreins(ir); - lua_assert(!irt_isfp(ir->t)); - emit_tsi(as, mi, src, idx, ofs); - } -} - -static void asm_xload(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, - (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); - lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); - asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); -} - -static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) -{ - if (ir->r != RID_SINK) { - Reg src = ra_alloc1z(as, ir->op2, - (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); - asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, - rset_exclude(RSET_GPR, src), ofs); - } -} - -#define asm_xstore(as, ir) asm_xstore_(as, ir, 0) - -static void asm_ahuvload(ASMState *as, IRIns *ir) -{ - int hiop = (LJ_32 && LJ_SOFTFP && (ir+1)->o == IR_HIOP); - Reg dest = RID_NONE, type = RID_TMP, idx; - RegSet allow = RSET_GPR; - int32_t ofs = 0; - IRType1 t = ir->t; - if (hiop) { - t.irt = IRT_NUM; - if (ra_used(ir+1)) { - type = ra_dest(as, ir+1, allow); - rset_clear(allow, type); - } - } - if (ra_used(ir)) { - lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || - irt_isint(ir->t) || irt_isaddr(ir->t)); - dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); - rset_clear(allow, dest); -#if LJ_64 - if (irt_isaddr(t)) - emit_tsml(as, MIPSI_DEXTM, dest, dest, 14, 0); - else if (irt_isint(t)) - emit_dta(as, MIPSI_SLL, dest, dest, 0); -#endif - } - idx = asm_fuseahuref(as, ir->op1, &ofs, allow); - rset_clear(allow, idx); - if (irt_isnum(t)) { - asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); - emit_tsi(as, MIPSI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM); - } else { - asm_guard(as, MIPSI_BNE, type, - ra_allock(as, (int32_t)irt_toitype(t), allow)); - } -#if LJ_32 - if (ra_hasreg(dest)) { - if (!LJ_SOFTFP && irt_isnum(t)) - emit_hsi(as, MIPSI_LDC1, dest, idx, ofs); - else - emit_tsi(as, MIPSI_LW, dest, idx, ofs+(LJ_BE?4:0)); - } - emit_tsi(as, MIPSI_LW, type, idx, ofs+(LJ_BE?0:4)); -#else - if (ra_hasreg(dest)) { - if (!LJ_SOFTFP && irt_isnum(t)) { - emit_hsi(as, MIPSI_LDC1, dest, idx, ofs); - dest = type; - } - } else { - dest = type; - } - emit_dta(as, MIPSI_DSRA32, type, dest, 15); - emit_tsi(as, MIPSI_LD, dest, idx, ofs); -#endif -} - -static void asm_ahustore(ASMState *as, IRIns *ir) -{ - RegSet allow = RSET_GPR; - Reg idx, src = RID_NONE, type = RID_NONE; - int32_t ofs = 0; - if (ir->r == RID_SINK) - return; - if (!LJ_SOFTFP && irt_isnum(ir->t)) { - src = ra_alloc1(as, ir->op2, RSET_FPR); - idx = asm_fuseahuref(as, ir->op1, &ofs, allow); - emit_hsi(as, MIPSI_SDC1, src, idx, ofs); - } else { -#if LJ_32 - if (!irt_ispri(ir->t)) { - src = ra_alloc1(as, ir->op2, allow); - rset_clear(allow, src); - } - if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) - type = ra_alloc1(as, (ir+1)->op2, allow); - else - type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); - rset_clear(allow, type); - idx = asm_fuseahuref(as, ir->op1, &ofs, allow); - if (ra_hasreg(src)) - emit_tsi(as, MIPSI_SW, src, idx, ofs+(LJ_BE?4:0)); - emit_tsi(as, MIPSI_SW, type, idx, ofs+(LJ_BE?0:4)); -#else - Reg tmp = RID_TMP; - if (irt_ispri(ir->t)) { - tmp = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); - rset_clear(allow, tmp); - } else { - src = ra_alloc1(as, ir->op2, allow); - rset_clear(allow, src); - type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow); - rset_clear(allow, type); - } - idx = asm_fuseahuref(as, ir->op1, &ofs, allow); - emit_tsi(as, MIPSI_SD, tmp, idx, ofs); - if (ra_hasreg(src)) { - if (irt_isinteger(ir->t)) { - emit_dst(as, MIPSI_DADDU, tmp, tmp, type); - emit_tsml(as, MIPSI_DEXT, tmp, src, 31, 0); - } else { - emit_dst(as, MIPSI_DADDU, tmp, src, type); - } - } -#endif - } -} - -static void asm_sload(ASMState *as, IRIns *ir) -{ - Reg dest = RID_NONE, type = RID_NONE, base; - RegSet allow = RSET_GPR; - IRType1 t = ir->t; -#if LJ_32 - int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); - int hiop = (LJ_32 && LJ_SOFTFP && (ir+1)->o == IR_HIOP); - if (hiop) - t.irt = IRT_NUM; -#else - int32_t ofs = 8*((int32_t)ir->op1-2); -#endif - lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ - lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); -#if LJ_32 && LJ_SOFTFP - lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ - if (hiop && ra_used(ir+1)) { - type = ra_dest(as, ir+1, allow); - rset_clear(allow, type); - } -#else - if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { - dest = ra_scratch(as, RSET_FPR); - asm_tointg(as, ir, dest); - t.irt = IRT_NUM; /* Continue with a regular number type check. */ - } else -#endif - if (ra_used(ir)) { - lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || - irt_isint(ir->t) || irt_isaddr(ir->t)); - dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); - rset_clear(allow, dest); - base = ra_alloc1(as, REF_BASE, allow); - rset_clear(allow, base); - if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) { - if (irt_isint(t)) { - Reg tmp = ra_scratch(as, RSET_FPR); - emit_tg(as, MIPSI_MFC1, dest, tmp); - emit_fg(as, MIPSI_TRUNC_W_D, tmp, tmp); - dest = tmp; - t.irt = IRT_NUM; /* Check for original type. */ - } else { - Reg tmp = ra_scratch(as, RSET_GPR); - emit_fg(as, MIPSI_CVT_D_W, dest, dest); - emit_tg(as, MIPSI_MTC1, tmp, dest); - dest = tmp; - t.irt = IRT_INT; /* Check for original type. */ - } - } -#if LJ_64 - else if (irt_isaddr(t)) { - /* Clear type from pointers. */ - emit_tsml(as, MIPSI_DEXTM, dest, dest, 14, 0); - } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) { - /* Sign-extend integers. */ - emit_dta(as, MIPSI_SLL, dest, dest, 0); - } -#endif - goto dotypecheck; - } - base = ra_alloc1(as, REF_BASE, allow); - rset_clear(allow, base); -dotypecheck: -#if LJ_32 - if ((ir->op2 & IRSLOAD_TYPECHECK)) { - if (ra_noreg(type)) - type = RID_TMP; - if (irt_isnum(t)) { - asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); - emit_tsi(as, MIPSI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM); - } else { - Reg ktype = ra_allock(as, irt_toitype(t), allow); - asm_guard(as, MIPSI_BNE, type, ktype); - } - } - if (ra_hasreg(dest)) { - if (!LJ_SOFTFP && irt_isnum(t)) - emit_hsi(as, MIPSI_LDC1, dest, base, ofs); - else - emit_tsi(as, MIPSI_LW, dest, base, ofs ^ (LJ_BE?4:0)); - } - if (ra_hasreg(type)) - emit_tsi(as, MIPSI_LW, type, base, ofs ^ (LJ_BE?0:4)); -#else - if ((ir->op2 & IRSLOAD_TYPECHECK)) { - type = dest < RID_MAX_GPR ? dest : RID_TMP; - if (irt_ispri(t)) { - asm_guard(as, MIPSI_BNE, type, - ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow)); - } else { - if (irt_isnum(t)) { - asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); - emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)LJ_TISNUM); - if (ra_hasreg(dest)) - emit_hsi(as, MIPSI_LDC1, dest, base, ofs); - } else { - asm_guard(as, MIPSI_BNE, RID_TMP, - ra_allock(as, (int32_t)irt_toitype(t), allow)); - } - emit_dta(as, MIPSI_DSRA32, RID_TMP, type, 15); - } - emit_tsi(as, MIPSI_LD, type, base, ofs); - } else if (ra_hasreg(dest)) { - if (irt_isnum(t)) - emit_hsi(as, MIPSI_LDC1, dest, base, ofs); - else - emit_tsi(as, irt_isint(t) ? MIPSI_LW : MIPSI_LD, dest, base, - ofs ^ ((LJ_BE && irt_isint(t)) ? 4 : 0)); - } -#endif -} - -/* -- Allocations --------------------------------------------------------- */ - -#if LJ_HASFFI -static void asm_cnew(ASMState *as, IRIns *ir) -{ - CTState *cts = ctype_ctsG(J2G(as->J)); - CTypeID id = (CTypeID)IR(ir->op1)->i; - CTSize sz; - CTInfo info = lj_ctype_info(cts, id, &sz); - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; - IRRef args[4]; - RegSet drop = RSET_SCRATCH; - lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); - - as->gcsteps++; - if (ra_hasreg(ir->r)) - rset_clear(drop, ir->r); /* Dest reg handled below. */ - ra_evictset(as, drop); - if (ra_used(ir)) - ra_destreg(as, ir, RID_RET); /* GCcdata * */ - - /* Initialize immutable cdata object. */ - if (ir->o == IR_CNEWI) { - RegSet allow = (RSET_GPR & ~RSET_SCRATCH); -#if LJ_32 - int32_t ofs = sizeof(GCcdata); - if (sz == 8) { - ofs += 4; - lua_assert((ir+1)->o == IR_HIOP); - if (LJ_LE) ir++; - } - for (;;) { - Reg r = ra_alloc1z(as, ir->op2, allow); - emit_tsi(as, MIPSI_SW, r, RID_RET, ofs); - rset_clear(allow, r); - if (ofs == sizeof(GCcdata)) break; - ofs -= 4; if (LJ_BE) ir++; else ir--; - } -#else - emit_tsi(as, MIPSI_SD, ra_alloc1(as, ir->op2, allow), - RID_RET, sizeof(GCcdata)); -#endif - lua_assert(sz == 4 || sz == 8); - } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ - ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ir->op1; /* CTypeID id */ - args[2] = ir->op2; /* CTSize sz */ - args[3] = ASMREF_TMP1; /* CTSize align */ - asm_gencall(as, ci, args); - emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); - return; - } - - /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ - emit_tsi(as, MIPSI_SB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); - emit_tsi(as, MIPSI_SH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); - emit_ti(as, MIPSI_LI, RID_RET+1, ~LJ_TCDATA); - emit_ti(as, MIPSI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */ - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ASMREF_TMP1; /* MSize size */ - asm_gencall(as, ci, args); - ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), - ra_releasetmp(as, ASMREF_TMP1)); -} -#else -#define asm_cnew(as, ir) ((void)0) -#endif - -/* -- Write barriers ------------------------------------------------------ */ - -static void asm_tbar(ASMState *as, IRIns *ir) -{ - Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); - Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab)); - Reg link = RID_TMP; - MCLabel l_end = emit_label(as); - emit_tsi(as, MIPSI_AS, link, tab, (int32_t)offsetof(GCtab, gclist)); - emit_tsi(as, MIPSI_SB, mark, tab, (int32_t)offsetof(GCtab, marked)); - emit_setgl(as, tab, gc.grayagain); - emit_getgl(as, link, gc.grayagain); - emit_dst(as, MIPSI_XOR, mark, mark, RID_TMP); /* Clear black bit. */ - emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end); - emit_tsi(as, MIPSI_ANDI, RID_TMP, mark, LJ_GC_BLACK); - emit_tsi(as, MIPSI_LBU, mark, tab, (int32_t)offsetof(GCtab, marked)); -} - -static void asm_obar(ASMState *as, IRIns *ir) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; - IRRef args[2]; - MCLabel l_end; - Reg obj, val, tmp; - /* No need for other object barriers (yet). */ - lua_assert(IR(ir->op1)->o == IR_UREFC); - ra_evictset(as, RSET_SCRATCH); - l_end = emit_label(as); - args[0] = ASMREF_TMP1; /* global_State *g */ - args[1] = ir->op1; /* TValue *tv */ - asm_gencall(as, ci, args); - emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); - obj = IR(ir->op1)->r; - tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); - emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end); - emit_tsi(as, MIPSI_ANDI, tmp, tmp, LJ_GC_BLACK); - emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end); - emit_tsi(as, MIPSI_ANDI, RID_TMP, RID_TMP, LJ_GC_WHITES); - val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); - emit_tsi(as, MIPSI_LBU, tmp, obj, - (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); - emit_tsi(as, MIPSI_LBU, RID_TMP, val, (int32_t)offsetof(GChead, marked)); -} - -/* -- Arithmetic and logic operations ------------------------------------- */ - -#if !LJ_SOFTFP -static void asm_fparith(ASMState *as, IRIns *ir, MIPSIns mi) -{ - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = (left >> 8); left &= 255; - emit_fgh(as, mi, dest, left, right); -} - -static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi) -{ - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); - emit_fg(as, mi, dest, left); -} - -static void asm_fpmath(ASMState *as, IRIns *ir) -{ - if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) - return; - if (ir->op2 <= IRFPM_TRUNC) - asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2); - else if (ir->op2 == IRFPM_SQRT) - asm_fpunary(as, ir, MIPSI_SQRT_D); - else - asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); -} -#endif - -static void asm_add(ASMState *as, IRIns *ir) -{ - IRType1 t = ir->t; -#if !LJ_SOFTFP - if (irt_isnum(t)) { - asm_fparith(as, ir, MIPSI_ADD_D); - } else -#endif - { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - if (irref_isk(ir->op2)) { - intptr_t k = get_kval(IR(ir->op2)); - if (checki16(k)) { - emit_tsi(as, (LJ_64 && irt_is64(t)) ? MIPSI_DADDIU : MIPSI_ADDIU, dest, - left, k); - return; - } - } - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_dst(as, (LJ_64 && irt_is64(t)) ? MIPSI_DADDU : MIPSI_ADDU, dest, - left, right); - } -} - -static void asm_sub(ASMState *as, IRIns *ir) -{ -#if !LJ_SOFTFP - if (irt_isnum(ir->t)) { - asm_fparith(as, ir, MIPSI_SUB_D); - } else -#endif - { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - emit_dst(as, (LJ_64 && irt_is64(ir->t)) ? MIPSI_DSUBU : MIPSI_SUBU, dest, - left, right); - } -} - -static void asm_mul(ASMState *as, IRIns *ir) -{ -#if !LJ_SOFTFP - if (irt_isnum(ir->t)) { - asm_fparith(as, ir, MIPSI_MUL_D); - } else -#endif - { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - if (LJ_64 && irt_is64(ir->t)) { - emit_dst(as, MIPSI_MFLO, dest, 0, 0); - emit_dst(as, MIPSI_DMULT, 0, left, right); - } else { - emit_dst(as, MIPSI_MUL, dest, left, right); - } - } -} - -static void asm_mod(ASMState *as, IRIns *ir) -{ -#if LJ_64 && LJ_HASFFI - if (!irt_isint(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : - IRCALL_lj_carith_modu64); - else -#endif - asm_callid(as, ir, IRCALL_lj_vm_modi); -} - -#if !LJ_SOFTFP -static void asm_pow(ASMState *as, IRIns *ir) -{ -#if LJ_64 && LJ_HASFFI - if (!irt_isnum(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : - IRCALL_lj_carith_powu64); - else -#endif - asm_callid(as, ir, IRCALL_lj_vm_powi); -} - -static void asm_div(ASMState *as, IRIns *ir) -{ -#if LJ_64 && LJ_HASFFI - if (!irt_isnum(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : - IRCALL_lj_carith_divu64); - else -#endif - asm_fparith(as, ir, MIPSI_DIV_D); -} -#endif - -static void asm_neg(ASMState *as, IRIns *ir) -{ -#if !LJ_SOFTFP - if (irt_isnum(ir->t)) { - asm_fpunary(as, ir, MIPSI_NEG_D); - } else -#endif - { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - emit_dst(as, (LJ_64 && irt_is64(ir->t)) ? MIPSI_DSUBU : MIPSI_SUBU, dest, - RID_ZERO, left); - } -} - -#define asm_abs(as, ir) asm_fpunary(as, ir, MIPSI_ABS_D) -#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) -#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) - -static void asm_arithov(ASMState *as, IRIns *ir) -{ - Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR); - lua_assert(!irt_is64(ir->t)); - if (irref_isk(ir->op2)) { - int k = IR(ir->op2)->i; - if (ir->o == IR_SUBOV) k = -k; - if (checki16(k)) { /* (dest < left) == (k >= 0 ? 1 : 0) */ - left = ra_alloc1(as, ir->op1, RSET_GPR); - asm_guard(as, k >= 0 ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO); - emit_dst(as, MIPSI_SLT, RID_TMP, dest, dest == left ? RID_TMP : left); - emit_tsi(as, MIPSI_ADDIU, dest, left, k); - if (dest == left) emit_move(as, RID_TMP, left); - return; - } - } - left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left), - right), dest)); - asm_guard(as, MIPSI_BLTZ, RID_TMP, 0); - emit_dst(as, MIPSI_AND, RID_TMP, RID_TMP, tmp); - if (ir->o == IR_ADDOV) { /* ((dest^left) & (dest^right)) < 0 */ - emit_dst(as, MIPSI_XOR, RID_TMP, dest, dest == right ? RID_TMP : right); - } else { /* ((dest^left) & (dest^~right)) < 0 */ - emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, dest); - emit_dst(as, MIPSI_NOR, RID_TMP, dest == right ? RID_TMP : right, RID_ZERO); - } - emit_dst(as, MIPSI_XOR, tmp, dest, dest == left ? RID_TMP : left); - emit_dst(as, ir->o == IR_ADDOV ? MIPSI_ADDU : MIPSI_SUBU, dest, left, right); - if (dest == left || dest == right) - emit_move(as, RID_TMP, dest == left ? left : right); -} - -#define asm_addov(as, ir) asm_arithov(as, ir) -#define asm_subov(as, ir) asm_arithov(as, ir) - -static void asm_mulov(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg tmp, right, left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left), - right), dest)); - asm_guard(as, MIPSI_BNE, RID_TMP, tmp); - emit_dta(as, MIPSI_SRA, RID_TMP, dest, 31); - emit_dst(as, MIPSI_MFHI, tmp, 0, 0); - emit_dst(as, MIPSI_MFLO, dest, 0, 0); - emit_dst(as, MIPSI_MULT, 0, left, right); -} - -#if LJ_32 && LJ_HASFFI -static void asm_add64(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); - if (irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; - if (k == 0) { - emit_dst(as, MIPSI_ADDU, dest, left, RID_TMP); - goto loarith; - } else if (checki16(k)) { - emit_dst(as, MIPSI_ADDU, dest, dest, RID_TMP); - emit_tsi(as, MIPSI_ADDIU, dest, left, k); - goto loarith; - } - } - emit_dst(as, MIPSI_ADDU, dest, dest, RID_TMP); - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_dst(as, MIPSI_ADDU, dest, left, right); -loarith: - ir--; - dest = ra_dest(as, ir, RSET_GPR); - left = ra_alloc1(as, ir->op1, RSET_GPR); - if (irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; - if (k == 0) { - if (dest != left) - emit_move(as, dest, left); - return; - } else if (checki16(k)) { - if (dest == left) { - Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, left)); - emit_move(as, dest, tmp); - dest = tmp; - } - emit_dst(as, MIPSI_SLTU, RID_TMP, dest, left); - emit_tsi(as, MIPSI_ADDIU, dest, left, k); - return; - } - } - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - if (dest == left && dest == right) { - Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, left), right)); - emit_move(as, dest, tmp); - dest = tmp; - } - emit_dst(as, MIPSI_SLTU, RID_TMP, dest, dest == left ? right : left); - emit_dst(as, MIPSI_ADDU, dest, left, right); -} - -static void asm_sub64(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - emit_dst(as, MIPSI_SUBU, dest, dest, RID_TMP); - emit_dst(as, MIPSI_SUBU, dest, left, right); - ir--; - dest = ra_dest(as, ir, RSET_GPR); - left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - if (dest == left) { - Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, left), right)); - emit_move(as, dest, tmp); - dest = tmp; - } - emit_dst(as, MIPSI_SLTU, RID_TMP, left, dest); - emit_dst(as, MIPSI_SUBU, dest, left, right); -} - -static void asm_neg64(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - emit_dst(as, MIPSI_SUBU, dest, dest, RID_TMP); - emit_dst(as, MIPSI_SUBU, dest, RID_ZERO, left); - ir--; - dest = ra_dest(as, ir, RSET_GPR); - left = ra_alloc1(as, ir->op1, RSET_GPR); - emit_dst(as, MIPSI_SLTU, RID_TMP, RID_ZERO, dest); - emit_dst(as, MIPSI_SUBU, dest, RID_ZERO, left); -} -#endif - -static void asm_bnot(ASMState *as, IRIns *ir) -{ - Reg left, right, dest = ra_dest(as, ir, RSET_GPR); - IRIns *irl = IR(ir->op1); - if (mayfuse(as, ir->op1) && irl->o == IR_BOR) { - left = ra_alloc2(as, irl, RSET_GPR); - right = (left >> 8); left &= 255; - } else { - left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - right = RID_ZERO; - } - emit_dst(as, MIPSI_NOR, dest, left, right); -} - -static void asm_bswap(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); -#if LJ_32 - if ((as->flags & JIT_F_MIPSXXR2)) { - emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16); - emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left); - } else { - Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, left), dest)); - emit_dst(as, MIPSI_OR, dest, dest, tmp); - emit_dst(as, MIPSI_OR, dest, dest, RID_TMP); - emit_tsi(as, MIPSI_ANDI, dest, dest, 0xff00); - emit_dta(as, MIPSI_SLL, RID_TMP, RID_TMP, 8); - emit_dta(as, MIPSI_SRL, dest, left, 8); - emit_tsi(as, MIPSI_ANDI, RID_TMP, left, 0xff00); - emit_dst(as, MIPSI_OR, tmp, tmp, RID_TMP); - emit_dta(as, MIPSI_SRL, tmp, left, 24); - emit_dta(as, MIPSI_SLL, RID_TMP, left, 24); - } -#else - if (irt_is64(ir->t)) { - emit_dst(as, MIPSI_DSHD, dest, 0, RID_TMP); - emit_dst(as, MIPSI_DSBH, RID_TMP, 0, left); - } else { - emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16); - emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left); - } -#endif -} - -static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - if (irref_isk(ir->op2)) { - intptr_t k = get_kval(IR(ir->op2)); - if (checku16(k)) { - emit_tsi(as, mik, dest, left, k); - return; - } - } - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_dst(as, mi, dest, left, right); -} - -#define asm_band(as, ir) asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI) -#define asm_bor(as, ir) asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI) -#define asm_bxor(as, ir) asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI) - -static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - if (irref_isk(ir->op2)) { /* Constant shifts. */ - uint32_t shift = (uint32_t)IR(ir->op2)->i; - if (LJ_64 && irt_is64(ir->t)) mik |= (shift & 32) ? MIPSI_D32 : MIPSI_D; - emit_dta(as, mik, dest, ra_hintalloc(as, ir->op1, dest, RSET_GPR), - (shift & 31)); - } else { - Reg right, left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - if (LJ_64 && irt_is64(ir->t)) mi |= MIPSI_DV; - emit_dst(as, mi, dest, right, left); /* Shift amount is in rs. */ - } -} - -#define asm_bshl(as, ir) asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL) -#define asm_bshr(as, ir) asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL) -#define asm_bsar(as, ir) asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA) -#define asm_brol(as, ir) lua_assert(0) - -static void asm_bror(ASMState *as, IRIns *ir) -{ - if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) { - asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR); - } else { - Reg dest = ra_dest(as, ir, RSET_GPR); - if (irref_isk(ir->op2)) { /* Constant shifts. */ - uint32_t shift = (uint32_t)(IR(ir->op2)->i & 31); - Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - emit_rotr(as, dest, left, RID_TMP, shift); - } else { - Reg right, left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - emit_dst(as, MIPSI_OR, dest, dest, RID_TMP); - emit_dst(as, MIPSI_SRLV, dest, right, left); - emit_dst(as, MIPSI_SLLV, RID_TMP, RID_TMP, left); - emit_dst(as, MIPSI_SUBU, RID_TMP, ra_allock(as, 32, RSET_GPR), right); - } - } -} - -#if LJ_32 && LJ_SOFTFP -static void asm_sfpmin_max(ASMState *as, IRIns *ir) -{ - CCallInfo ci = lj_ir_callinfo[(IROp)ir->o == IR_MIN ? IRCALL_lj_vm_sfmin : IRCALL_lj_vm_sfmax]; - IRRef args[4]; - args[0^LJ_BE] = ir->op1; - args[1^LJ_BE] = (ir+1)->op1; - args[2^LJ_BE] = ir->op2; - args[3^LJ_BE] = (ir+1)->op2; - asm_setupresult(as, ir, &ci); - emit_call(as, (void *)ci.func, 0); - ci.func = NULL; - asm_gencall(as, &ci, args); -} -#endif - -static void asm_min_max(ASMState *as, IRIns *ir, int ismax) -{ - if (!LJ_SOFTFP && irt_isnum(ir->t)) { - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = (left >> 8); left &= 255; - if (dest == left) { - emit_fg(as, MIPSI_MOVT_D, dest, right); - } else { - emit_fg(as, MIPSI_MOVF_D, dest, left); - if (dest != right) emit_fg(as, MIPSI_MOV_D, dest, right); - } - emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? left : right, ismax ? right : left); - } else { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - if (dest == left) { - emit_dst(as, MIPSI_MOVN, dest, right, RID_TMP); - } else { - emit_dst(as, MIPSI_MOVZ, dest, left, RID_TMP); - if (dest != right) emit_move(as, dest, right); - } - emit_dst(as, MIPSI_SLT, RID_TMP, - ismax ? left : right, ismax ? right : left); - } -} - -#define asm_min(as, ir) asm_min_max(as, ir, 0) -#define asm_max(as, ir) asm_min_max(as, ir, 1) - -/* -- Comparisons --------------------------------------------------------- */ - -#if LJ_32 && LJ_SOFTFP -/* SFP comparisons. */ -static void asm_sfpcomp(ASMState *as, IRIns *ir) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; - RegSet drop = RSET_SCRATCH; - Reg r; - IRRef args[4]; - args[LJ_LE ? 0 : 1] = ir->op1; args[LJ_LE ? 1 : 0] = (ir+1)->op1; - args[LJ_LE ? 2 : 3] = ir->op2; args[LJ_LE ? 3 : 2] = (ir+1)->op2; - - for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) { - if (!rset_test(as->freeset, r) && - regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR]) - rset_clear(drop, r); - } - ra_evictset(as, drop); - - asm_setupresult(as, ir, ci); - - switch ((IROp)ir->o) { - case IR_LT: - asm_guard(as, MIPSI_BGEZ, RID_RET, 0); - break; - case IR_ULT: - asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP); - emit_loadi(as, RID_TMP, 1); - asm_guard(as, MIPSI_BEQ, RID_RET, RID_ZERO); - break; - case IR_GE: - asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP); - emit_loadi(as, RID_TMP, 2); - asm_guard(as, MIPSI_BLTZ, RID_RET, 0); - break; - case IR_LE: - asm_guard(as, MIPSI_BGTZ, RID_RET, 0); - break; - case IR_GT: - asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP); - emit_loadi(as, RID_TMP, 2); - asm_guard(as, MIPSI_BLEZ, RID_RET, 0); - break; - case IR_UGE: - asm_guard(as, MIPSI_BLTZ, RID_RET, 0); - break; - case IR_ULE: - asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP); - emit_loadi(as, RID_TMP, 1); - break; - case IR_UGT: case IR_ABC: - asm_guard(as, MIPSI_BLEZ, RID_RET, 0); - break; - case IR_EQ: case IR_NE: - asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, RID_RET, RID_ZERO); - default: - break; - } - asm_gencall(as, ci, args); -} -#endif - -static void asm_comp(ASMState *as, IRIns *ir) -{ - /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */ - IROp op = ir->o; - if (!LJ_SOFTFP && irt_isnum(ir->t)) { - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = (left >> 8); left &= 255; - asm_guard(as, (op&1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); - emit_fgh(as, MIPSI_C_OLT_D + ((op&3) ^ ((op>>2)&1)), 0, left, right); - } else { - Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); - if (op == IR_ABC) op = IR_UGT; - if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(IR(ir->op2)) == 0) { - MIPSIns mi = (op&2) ? ((op&1) ? MIPSI_BLEZ : MIPSI_BGTZ) : - ((op&1) ? MIPSI_BLTZ : MIPSI_BGEZ); - asm_guard(as, mi, left, 0); - } else { - if (irref_isk(ir->op2)) { - intptr_t k = get_kval(IR(ir->op2)); - if ((op&2)) k++; - if (checki16(k)) { - asm_guard(as, (op&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO); - emit_tsi(as, (op&4) ? MIPSI_SLTIU : MIPSI_SLTI, - RID_TMP, left, k); - return; - } - } - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - asm_guard(as, ((op^(op>>1))&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO); - emit_dst(as, (op&4) ? MIPSI_SLTU : MIPSI_SLT, - RID_TMP, (op&2) ? right : left, (op&2) ? left : right); - } - } -} - -static void asm_equal(ASMState *as, IRIns *ir) -{ - Reg right, left = ra_alloc2(as, ir, (!LJ_SOFTFP && irt_isnum(ir->t)) ? - RSET_FPR : RSET_GPR); - right = (left >> 8); left &= 255; - if (!LJ_SOFTFP && irt_isnum(ir->t)) { - asm_guard(as, (ir->o & 1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); - emit_fgh(as, MIPSI_C_EQ_D, 0, left, right); - } else { - asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, left, right); - } -} - -#if LJ_32 && LJ_HASFFI -/* 64 bit integer comparisons. */ -static void asm_comp64(ASMState *as, IRIns *ir) -{ - /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */ - IROp op = (ir-1)->o; - MCLabel l_end; - Reg rightlo, leftlo, righthi, lefthi = ra_alloc2(as, ir, RSET_GPR); - righthi = (lefthi >> 8); lefthi &= 255; - leftlo = ra_alloc2(as, ir-1, - rset_exclude(rset_exclude(RSET_GPR, lefthi), righthi)); - rightlo = (leftlo >> 8); leftlo &= 255; - asm_guard(as, ((op^(op>>1))&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO); - l_end = emit_label(as); - if (lefthi != righthi) - emit_dst(as, (op&4) ? MIPSI_SLTU : MIPSI_SLT, RID_TMP, - (op&2) ? righthi : lefthi, (op&2) ? lefthi : righthi); - emit_dst(as, MIPSI_SLTU, RID_TMP, - (op&2) ? rightlo : leftlo, (op&2) ? leftlo : rightlo); - if (lefthi != righthi) - emit_branch(as, MIPSI_BEQ, lefthi, righthi, l_end); -} - -static void asm_comp64eq(ASMState *as, IRIns *ir) -{ - Reg tmp, right, left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - asm_guard(as, ((ir-1)->o & 1) ? MIPSI_BEQ : MIPSI_BNE, RID_TMP, RID_ZERO); - tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, left), right)); - emit_dst(as, MIPSI_OR, RID_TMP, RID_TMP, tmp); - emit_dst(as, MIPSI_XOR, tmp, left, right); - left = ra_alloc2(as, ir-1, RSET_GPR); - right = (left >> 8); left &= 255; - emit_dst(as, MIPSI_XOR, RID_TMP, left, right); -} -#endif - -/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ - -/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ -static void asm_hiop(ASMState *as, IRIns *ir) -{ -#if LJ_32 && (LJ_HASFFI || LJ_SOFTFP) - /* HIOP is marked as a store because it needs its own DCE logic. */ - int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ - if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; - if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ - as->curins--; /* Always skip the CONV. */ -#if LJ_HASFFI && !LJ_SOFTFP - if (usehi || uselo) - asm_conv64(as, ir); - return; -#endif - } else if ((ir-1)->o < IR_EQ) { /* 64 bit integer comparisons. ORDER IR. */ - as->curins--; /* Always skip the loword comparison. */ -#if LJ_SOFTFP - if (!irt_isint(ir->t)) { - asm_sfpcomp(as, ir-1); - return; - } -#endif -#if LJ_HASFFI - asm_comp64(as, ir); -#endif - return; - } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ - as->curins--; /* Always skip the loword comparison. */ -#if LJ_SOFTFP - if (!irt_isint(ir->t)) { - asm_sfpcomp(as, ir-1); - return; - } -#endif -#if LJ_HASFFI - asm_comp64eq(as, ir); -#endif - return; -#if LJ_SOFTFP - } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { - as->curins--; /* Always skip the loword min/max. */ - if (uselo || usehi) - asm_sfpmin_max(as, ir-1); - return; -#endif - } else if ((ir-1)->o == IR_XSTORE) { - as->curins--; /* Handle both stores here. */ - if ((ir-1)->r != RID_SINK) { - asm_xstore_(as, ir, LJ_LE ? 4 : 0); - asm_xstore_(as, ir-1, LJ_LE ? 0 : 4); - } - return; - } - if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ - switch ((ir-1)->o) { -#if LJ_HASFFI - case IR_ADD: as->curins--; asm_add64(as, ir); break; - case IR_SUB: as->curins--; asm_sub64(as, ir); break; - case IR_NEG: as->curins--; asm_neg64(as, ir); break; -#endif -#if LJ_SOFTFP - case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: - case IR_STRTO: - if (!uselo) - ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ - break; -#endif - case IR_CALLN: - case IR_CALLS: - case IR_CALLXS: - if (!uselo) - ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ - break; -#if LJ_SOFTFP - case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: -#endif - case IR_CNEWI: - /* Nothing to do here. Handled by lo op itself. */ - break; - default: lua_assert(0); break; - } -#else - UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused without FFI. */ -#endif -} - -/* -- Profiling ----------------------------------------------------------- */ - -static void asm_prof(ASMState *as, IRIns *ir) -{ - UNUSED(ir); - asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO); - emit_tsi(as, MIPSI_ANDI, RID_TMP, RID_TMP, HOOK_PROFILE); - emit_lsglptr(as, MIPSI_LBU, RID_TMP, - (int32_t)offsetof(global_State, hookmask)); -} - -/* -- Stack handling ------------------------------------------------------ */ - -/* Check Lua stack size for overflow. Use exit handler as fallback. */ -static void asm_stack_check(ASMState *as, BCReg topslot, - IRIns *irp, RegSet allow, ExitNo exitno) -{ - /* Try to get an unused temp. register, otherwise spill/restore RID_RET*. */ - Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE; - ExitNo oldsnap = as->snapno; - rset_clear(allow, pbase); -#if LJ_32 - tmp = allow ? rset_pickbot(allow) : - (pbase == RID_RETHI ? RID_RETLO : RID_RETHI); -#else - tmp = allow ? rset_pickbot(allow) : RID_RET; -#endif - as->snapno = exitno; - asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO); - as->snapno = oldsnap; - if (allow == RSET_EMPTY) /* Restore temp. register. */ - emit_tsi(as, MIPSI_AL, tmp, RID_SP, 0); - else - ra_modified(as, tmp); - emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)(8*topslot)); - emit_dst(as, MIPSI_ASUBU, RID_TMP, tmp, pbase); - emit_tsi(as, MIPSI_AL, tmp, tmp, offsetof(lua_State, maxstack)); - if (pbase == RID_TMP) - emit_getgl(as, RID_TMP, jit_base); - emit_getgl(as, tmp, cur_L); - if (allow == RSET_EMPTY) /* Spill temp. register. */ - emit_tsi(as, MIPSI_AS, tmp, RID_SP, 0); -} - -/* Restore Lua stack from on-trace state. */ -static void asm_stack_restore(ASMState *as, SnapShot *snap) -{ - SnapEntry *map = &as->T->snapmap[snap->mapofs]; -#if LJ_32 || defined(LUA_USE_ASSERT) - SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; -#endif - MSize n, nent = snap->nent; - /* Store the value of all modified slots to the Lua stack. */ - for (n = 0; n < nent; n++) { - SnapEntry sn = map[n]; - BCReg s = snap_slot(sn); - int32_t ofs = 8*((int32_t)s-1-LJ_FR2); - IRRef ref = snap_ref(sn); - IRIns *ir = IR(ref); - if ((sn & SNAP_NORESTORE)) - continue; - if (irt_isnum(ir->t)) { -#if LJ_SOFTFP - Reg tmp; - RegSet allow = rset_exclude(RSET_GPR, RID_BASE); - lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */ - tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow); - emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?4:0)); - if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1); - tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow); - emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?0:4)); -#else - Reg src = ra_alloc1(as, ref, RSET_FPR); - emit_hsi(as, MIPSI_SDC1, src, RID_BASE, ofs); -#endif - } else { -#if LJ_32 - RegSet allow = rset_exclude(RSET_GPR, RID_BASE); - Reg type; - lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); - if (!irt_ispri(ir->t)) { - Reg src = ra_alloc1(as, ref, allow); - rset_clear(allow, src); - emit_tsi(as, MIPSI_SW, src, RID_BASE, ofs+(LJ_BE?4:0)); - } - if ((sn & (SNAP_CONT|SNAP_FRAME))) { - if (s == 0) continue; /* Do not overwrite link to previous frame. */ - type = ra_allock(as, (int32_t)(*flinks--), allow); -#if LJ_SOFTFP - } else if ((sn & SNAP_SOFTFPNUM)) { - type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE)); -#endif - } else { - type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); - } - emit_tsi(as, MIPSI_SW, type, RID_BASE, ofs+(LJ_BE?0:4)); -#else - asm_tvstore64(as, RID_BASE, ofs, ref); -#endif - } - checkmclim(as); - } - lua_assert(map + nent == flinks); -} - -/* -- GC handling --------------------------------------------------------- */ - -/* Check GC threshold and do one or more GC steps. */ -static void asm_gc_check(ASMState *as) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; - IRRef args[2]; - MCLabel l_end; - Reg tmp; - ra_evictset(as, RSET_SCRATCH); - l_end = emit_label(as); - /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ - /* Assumes asm_snap_prep() already done. */ - asm_guard(as, MIPSI_BNE, RID_RET, RID_ZERO); - args[0] = ASMREF_TMP1; /* global_State *g */ - args[1] = ASMREF_TMP2; /* MSize steps */ - asm_gencall(as, ci, args); - emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); - tmp = ra_releasetmp(as, ASMREF_TMP2); - emit_loadi(as, tmp, as->gcsteps); - /* Jump around GC step if GC total < GC threshold. */ - emit_branch(as, MIPSI_BNE, RID_TMP, RID_ZERO, l_end); - emit_dst(as, MIPSI_SLTU, RID_TMP, RID_TMP, tmp); - emit_getgl(as, tmp, gc.threshold); - emit_getgl(as, RID_TMP, gc.total); - as->gcsteps = 0; - checkmclim(as); -} - -/* -- Loop handling ------------------------------------------------------- */ - -/* Fixup the loop branch. */ -static void asm_loop_fixup(ASMState *as) -{ - MCode *p = as->mctop; - MCode *target = as->mcp; - p[-1] = MIPSI_NOP; - if (as->loopinv) { /* Inverted loop branch? */ - /* asm_guard already inverted the cond branch. Only patch the target. */ - p[-3] |= ((target-p+2) & 0x0000ffffu); - } else { - p[-2] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu); - } -} - -/* -- Head of trace ------------------------------------------------------- */ - -/* Coalesce BASE register for a root trace. */ -static void asm_head_root_base(ASMState *as) -{ - IRIns *ir = IR(REF_BASE); - Reg r = ir->r; - if (as->loopinv) as->mctop--; - if (ra_hasreg(r)) { - ra_free(as, r); - if (rset_test(as->modset, r) || irt_ismarked(ir->t)) - ir->r = RID_INIT; /* No inheritance for modified BASE register. */ - if (r != RID_BASE) - emit_move(as, r, RID_BASE); - } -} - -/* Coalesce BASE register for a side trace. */ -static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) -{ - IRIns *ir = IR(REF_BASE); - Reg r = ir->r; - if (as->loopinv) as->mctop--; - if (ra_hasreg(r)) { - ra_free(as, r); - if (rset_test(as->modset, r) || irt_ismarked(ir->t)) - ir->r = RID_INIT; /* No inheritance for modified BASE register. */ - if (irp->r == r) { - rset_clear(allow, r); /* Mark same BASE register as coalesced. */ - } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { - rset_clear(allow, irp->r); - emit_move(as, r, irp->r); /* Move from coalesced parent reg. */ - } else { - emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ - } - } - return allow; -} - -/* -- Tail of trace ------------------------------------------------------- */ - -/* Fixup the tail code. */ -static void asm_tail_fixup(ASMState *as, TraceNo lnk) -{ - MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp; - int32_t spadj = as->T->spadjust; - MCode *p = as->mctop-1; - *p = spadj ? (MIPSI_AADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP; - p[-1] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu); -} - -/* Prepare tail of code. */ -static void asm_tail_prep(ASMState *as) -{ - as->mcp = as->mctop-2; /* Leave room for branch plus nop or stack adj. */ - as->invmcp = as->loopref ? as->mcp : NULL; -} - -/* -- Trace setup --------------------------------------------------------- */ - -/* Ensure there are enough stack slots for call arguments. */ -static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) -{ - IRRef args[CCI_NARGS_MAX*2]; - uint32_t i, nargs = CCI_XNARGS(ci); -#if LJ_32 - int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; -#else - int nslots = 0, ngpr = REGARG_NUMGPR; -#endif - asm_collectargs(as, ir, ci, args); - for (i = 0; i < nargs; i++) { -#if LJ_32 - if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t) && - nfpr > 0 && !(ci->flags & CCI_VARARG)) { - nfpr--; - ngpr -= irt_isnum(IR(args[i])->t) ? 2 : 1; - } else if (!LJ_SOFTFP && args[i] && irt_isnum(IR(args[i])->t)) { - nfpr = 0; - ngpr = ngpr & ~1; - if (ngpr > 0) ngpr -= 2; else nslots = (nslots+3) & ~1; - } else { - nfpr = 0; - if (ngpr > 0) ngpr--; else nslots++; - } -#else - if (ngpr > 0) ngpr--; else nslots += 2; -#endif - } - if (nslots > as->evenspill) /* Leave room for args in stack slots. */ - as->evenspill = nslots; - return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET); -} - -static void asm_setup_target(ASMState *as) -{ - asm_sparejump_setup(as); - asm_exitstub_setup(as); -} - -/* -- Trace patching ------------------------------------------------------ */ - -/* Patch exit jumps of existing machine code to a new target. */ -void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) -{ - MCode *p = T->mcode; - MCode *pe = (MCode *)((char *)p + T->szmcode); - MCode *px = exitstub_trace_addr(T, exitno); - MCode *cstart = NULL, *cstop = NULL; - MCode *mcarea = lj_mcode_patch(J, p, 0); - MCode exitload = MIPSI_LI | MIPSF_T(RID_TMP) | exitno; - MCode tjump = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu); - for (p++; p < pe; p++) { - if (*p == exitload) { /* Look for load of exit number. */ - if (((p[-1] ^ (px-p)) & 0xffffu) == 0) { /* Look for exitstub branch. */ - ptrdiff_t delta = target - p; - if (((delta + 0x8000) >> 16) == 0) { /* Patch in-range branch. */ - patchbranch: - p[-1] = (p[-1] & 0xffff0000u) | (delta & 0xffffu); - *p = MIPSI_NOP; /* Replace the load of the exit number. */ - cstop = p; - if (!cstart) cstart = p-1; - } else { /* Branch out of range. Use spare jump slot in mcarea. */ - int i; - for (i = 2; i < 2+MIPS_SPAREJUMP*2; i += 2) { - if (mcarea[i] == tjump) { - delta = mcarea+i - p; - goto patchbranch; - } else if (mcarea[i] == MIPSI_NOP) { - mcarea[i] = tjump; - cstart = mcarea+i; - delta = mcarea+i - p; - goto patchbranch; - } - } - /* Ignore jump slot overflow. Child trace is simply not attached. */ - } - } else if (p+1 == pe) { - /* Patch NOP after code for inverted loop branch. Use of J is ok. */ - lua_assert(p[1] == MIPSI_NOP); - p[1] = tjump; - *p = MIPSI_NOP; /* Replace the load of the exit number. */ - cstop = p+2; - if (!cstart) cstart = p+1; - } - } - } - if (cstart) lj_mcode_sync(cstart, cstop); - lj_mcode_patch(J, mcarea, 1); -} - diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h deleted file mode 100644 index 6daa861b91..0000000000 --- a/src/lj_asm_ppc.h +++ /dev/null @@ -1,2016 +0,0 @@ -/* -** PPC IR assembler (SSA IR -> machine code). -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -/* -- Register allocator extensions --------------------------------------- */ - -/* Allocate a register with a hint. */ -static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) -{ - Reg r = IR(ref)->r; - if (ra_noreg(r)) { - if (!ra_hashint(r) && !iscrossref(as, ref)) - ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ - r = ra_allocref(as, ref, allow); - } - ra_noweak(as, r); - return r; -} - -/* Allocate two source registers for three-operand instructions. */ -static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) -{ - IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); - Reg left = irl->r, right = irr->r; - if (ra_hasreg(left)) { - ra_noweak(as, left); - if (ra_noreg(right)) - right = ra_allocref(as, ir->op2, rset_exclude(allow, left)); - else - ra_noweak(as, right); - } else if (ra_hasreg(right)) { - ra_noweak(as, right); - left = ra_allocref(as, ir->op1, rset_exclude(allow, right)); - } else if (ra_hashint(right)) { - right = ra_allocref(as, ir->op2, allow); - left = ra_alloc1(as, ir->op1, rset_exclude(allow, right)); - } else { - left = ra_allocref(as, ir->op1, allow); - right = ra_alloc1(as, ir->op2, rset_exclude(allow, left)); - } - return left | (right << 8); -} - -/* -- Guard handling ------------------------------------------------------ */ - -/* Setup exit stubs after the end of each trace. */ -static void asm_exitstub_setup(ASMState *as, ExitNo nexits) -{ - ExitNo i; - MCode *mxp = as->mctop; - if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim) - asm_mclimit(as); - /* 1: mflr r0; bl ->vm_exit_handler; li r0, traceno; bl <1; bl <1; ... */ - for (i = nexits-1; (int32_t)i >= 0; i--) - *--mxp = PPCI_BL|(((-3-i)&0x00ffffffu)<<2); - *--mxp = PPCI_LI|PPCF_T(RID_TMP)|as->T->traceno; /* Read by exit handler. */ - mxp--; - *mxp = PPCI_BL|((((MCode *)(void *)lj_vm_exit_handler-mxp)&0x00ffffffu)<<2); - *--mxp = PPCI_MFLR|PPCF_T(RID_TMP); - as->mctop = mxp; -} - -static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno) -{ - /* Keep this in-sync with exitstub_trace_addr(). */ - return as->mctop + exitno + 3; -} - -/* Emit conditional branch to exit for guard. */ -static void asm_guardcc(ASMState *as, PPCCC cc) -{ - MCode *target = asm_exitstub_addr(as, as->snapno); - MCode *p = as->mcp; - if (LJ_UNLIKELY(p == as->invmcp)) { - as->loopinv = 1; - *p = PPCI_B | (((target-p) & 0x00ffffffu) << 2); - emit_condbranch(as, PPCI_BC, cc^4, p); - return; - } - emit_condbranch(as, PPCI_BC, cc, target); -} - -/* -- Operand fusion ------------------------------------------------------ */ - -/* Limit linear search to this distance. Avoids O(n^2) behavior. */ -#define CONFLICT_SEARCH_LIM 31 - -/* Check if there's no conflicting instruction between curins and ref. */ -static int noconflict(ASMState *as, IRRef ref, IROp conflict) -{ - IRIns *ir = as->ir; - IRRef i = as->curins; - if (i > ref + CONFLICT_SEARCH_LIM) - return 0; /* Give up, ref is too far away. */ - while (--i > ref) - if (ir[i].o == conflict) - return 0; /* Conflict found. */ - return 1; /* Ok, no conflict. */ -} - -/* Fuse the array base of colocated arrays. */ -static int32_t asm_fuseabase(ASMState *as, IRRef ref) -{ - IRIns *ir = IR(ref); - if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE && - !neverfuse(as) && noconflict(as, ref, IR_NEWREF)) - return (int32_t)sizeof(GCtab); - return 0; -} - -/* Indicates load/store indexed is ok. */ -#define AHUREF_LSX ((int32_t)0x80000000) - -/* Fuse array/hash/upvalue reference into register+offset operand. */ -static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) -{ - IRIns *ir = IR(ref); - if (ra_noreg(ir->r)) { - if (ir->o == IR_AREF) { - if (mayfuse(as, ref)) { - if (irref_isk(ir->op2)) { - IRRef tab = IR(ir->op1)->op1; - int32_t ofs = asm_fuseabase(as, tab); - IRRef refa = ofs ? tab : ir->op1; - ofs += 8*IR(ir->op2)->i; - if (checki16(ofs)) { - *ofsp = ofs; - return ra_alloc1(as, refa, allow); - } - } - if (*ofsp == AHUREF_LSX) { - Reg base = ra_alloc1(as, ir->op1, allow); - Reg idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); - return base | (idx << 8); - } - } - } else if (ir->o == IR_HREFK) { - if (mayfuse(as, ref)) { - int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); - if (checki16(ofs)) { - *ofsp = ofs; - return ra_alloc1(as, ir->op1, allow); - } - } - } else if (ir->o == IR_UREFC) { - if (irref_isk(ir->op1)) { - GCfunc *fn = ir_kfunc(IR(ir->op1)); - int32_t ofs = i32ptr(&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv); - int32_t jgl = (intptr_t)J2G(as->J); - if ((uint32_t)(ofs-jgl) < 65536) { - *ofsp = ofs-jgl-32768; - return RID_JGL; - } else { - *ofsp = (int16_t)ofs; - return ra_allock(as, ofs-(int16_t)ofs, allow); - } - } - } - } - *ofsp = 0; - return ra_alloc1(as, ref, allow); -} - -/* Fuse XLOAD/XSTORE reference into load/store operand. */ -static void asm_fusexref(ASMState *as, PPCIns pi, Reg rt, IRRef ref, - RegSet allow, int32_t ofs) -{ - IRIns *ir = IR(ref); - Reg base; - if (ra_noreg(ir->r) && canfuse(as, ir)) { - if (ir->o == IR_ADD) { - int32_t ofs2; - if (irref_isk(ir->op2) && (ofs2 = ofs + IR(ir->op2)->i, checki16(ofs2))) { - ofs = ofs2; - ref = ir->op1; - } else if (ofs == 0) { - Reg right, left = ra_alloc2(as, ir, allow); - right = (left >> 8); left &= 255; - emit_fab(as, PPCI_LWZX | ((pi >> 20) & 0x780), rt, left, right); - return; - } - } else if (ir->o == IR_STRREF) { - lua_assert(ofs == 0); - ofs = (int32_t)sizeof(GCstr); - if (irref_isk(ir->op2)) { - ofs += IR(ir->op2)->i; - ref = ir->op1; - } else if (irref_isk(ir->op1)) { - ofs += IR(ir->op1)->i; - ref = ir->op2; - } else { - /* NYI: Fuse ADD with constant. */ - Reg tmp, right, left = ra_alloc2(as, ir, allow); - right = (left >> 8); left &= 255; - tmp = ra_scratch(as, rset_exclude(rset_exclude(allow, left), right)); - emit_fai(as, pi, rt, tmp, ofs); - emit_tab(as, PPCI_ADD, tmp, left, right); - return; - } - if (!checki16(ofs)) { - Reg left = ra_alloc1(as, ref, allow); - Reg right = ra_allock(as, ofs, rset_exclude(allow, left)); - emit_fab(as, PPCI_LWZX | ((pi >> 20) & 0x780), rt, left, right); - return; - } - } - } - base = ra_alloc1(as, ref, allow); - emit_fai(as, pi, rt, base, ofs); -} - -/* Fuse XLOAD/XSTORE reference into indexed-only load/store operand. */ -static void asm_fusexrefx(ASMState *as, PPCIns pi, Reg rt, IRRef ref, - RegSet allow) -{ - IRIns *ira = IR(ref); - Reg right, left; - if (canfuse(as, ira) && ira->o == IR_ADD && ra_noreg(ira->r)) { - left = ra_alloc2(as, ira, allow); - right = (left >> 8); left &= 255; - } else { - right = ra_alloc1(as, ref, allow); - left = RID_R0; - } - emit_tab(as, pi, rt, left, right); -} - -/* Fuse to multiply-add/sub instruction. */ -static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) -{ - IRRef lref = ir->op1, rref = ir->op2; - IRIns *irm; - if (lref != rref && - ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && - ra_noreg(irm->r)) || - (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && - (rref = lref, pi = pir, ra_noreg(irm->r))))) { - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg add = ra_alloc1(as, rref, RSET_FPR); - Reg right, left = ra_alloc2(as, irm, rset_exclude(RSET_FPR, add)); - right = (left >> 8); left &= 255; - emit_facb(as, pi, dest, left, right, add); - return 1; - } - return 0; -} - -/* -- Calls --------------------------------------------------------------- */ - -/* Generate a call to a C function. */ -static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) -{ - uint32_t n, nargs = CCI_XNARGS(ci); - int32_t ofs = 8; - Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR; - if ((void *)ci->func) - emit_call(as, (void *)ci->func); - for (n = 0; n < nargs; n++) { /* Setup args. */ - IRRef ref = args[n]; - if (ref) { - IRIns *ir = IR(ref); - if (irt_isfp(ir->t)) { - if (fpr <= REGARG_LASTFPR) { - lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ - ra_leftov(as, fpr, ref); - fpr++; - } else { - Reg r = ra_alloc1(as, ref, RSET_FPR); - if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4; - emit_spstore(as, ir, r, ofs); - ofs += irt_isnum(ir->t) ? 8 : 4; - } - } else { - if (gpr <= REGARG_LASTGPR) { - lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ - ra_leftov(as, gpr, ref); - gpr++; - } else { - Reg r = ra_alloc1(as, ref, RSET_GPR); - emit_spstore(as, ir, r, ofs); - ofs += 4; - } - } - } else { - if (gpr <= REGARG_LASTGPR) - gpr++; - else - ofs += 4; - } - checkmclim(as); - } - if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */ - emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6); -} - -/* Setup result reg/sp for call. Evict scratch regs. */ -static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) -{ - RegSet drop = RSET_SCRATCH; - int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); - if ((ci->flags & CCI_NOFPRCLOBBER)) - drop &= ~RSET_FPR; - if (ra_hasreg(ir->r)) - rset_clear(drop, ir->r); /* Dest reg handled below. */ - if (hiop && ra_hasreg((ir+1)->r)) - rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ - ra_evictset(as, drop); /* Evictions must be performed first. */ - if (ra_used(ir)) { - lua_assert(!irt_ispri(ir->t)); - if (irt_isfp(ir->t)) { - if ((ci->flags & CCI_CASTU64)) { - /* Use spill slot or temp slots. */ - int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; - Reg dest = ir->r; - if (ra_hasreg(dest)) { - ra_free(as, dest); - ra_modified(as, dest); - emit_fai(as, PPCI_LFD, dest, RID_SP, ofs); - } - emit_tai(as, PPCI_STW, RID_RETHI, RID_SP, ofs); - emit_tai(as, PPCI_STW, RID_RETLO, RID_SP, ofs+4); - } else { - ra_destreg(as, ir, RID_FPRET); - } -#if LJ_32 - } else if (hiop) { - ra_destpair(as, ir); -#endif - } else { - ra_destreg(as, ir, RID_RET); - } - } -} - -static void asm_callx(ASMState *as, IRIns *ir) -{ - IRRef args[CCI_NARGS_MAX*2]; - CCallInfo ci; - IRRef func; - IRIns *irf; - ci.flags = asm_callx_flags(as, ir); - asm_collectargs(as, ir, &ci, args); - asm_setupresult(as, ir, &ci); - func = ir->op2; irf = IR(func); - if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } - if (irref_isk(func)) { /* Call to constant address. */ - ci.func = (ASMFunction)(void *)(intptr_t)(irf->i); - } else { /* Need a non-argument register for indirect calls. */ - RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1); - Reg freg = ra_alloc1(as, func, allow); - *--as->mcp = PPCI_BCTRL; - *--as->mcp = PPCI_MTCTR | PPCF_T(freg); - ci.func = (ASMFunction)(void *)0; - } - asm_gencall(as, &ci, args); -} - -/* -- Returns ------------------------------------------------------------- */ - -/* Return to lower frame. Guard that it goes to the right spot. */ -static void asm_retf(ASMState *as, IRIns *ir) -{ - Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); - void *pc = ir_kptr(IR(ir->op2)); - int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); - as->topslot -= (BCReg)delta; - if ((int32_t)as->topslot < 0) as->topslot = 0; - irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ - emit_setgl(as, base, jit_base); - emit_addptr(as, base, -8*delta); - asm_guardcc(as, CC_NE); - emit_ab(as, PPCI_CMPW, RID_TMP, - ra_allock(as, i32ptr(pc), rset_exclude(RSET_GPR, base))); - emit_tai(as, PPCI_LWZ, RID_TMP, base, -8); -} - -/* -- Type conversions ---------------------------------------------------- */ - -static void asm_tointg(ASMState *as, IRIns *ir, Reg left) -{ - RegSet allow = RSET_FPR; - Reg tmp = ra_scratch(as, rset_clear(allow, left)); - Reg fbias = ra_scratch(as, rset_clear(allow, tmp)); - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg hibias = ra_allock(as, 0x43300000, rset_exclude(RSET_GPR, dest)); - asm_guardcc(as, CC_NE); - emit_fab(as, PPCI_FCMPU, 0, tmp, left); - emit_fab(as, PPCI_FSUB, tmp, tmp, fbias); - emit_fai(as, PPCI_LFD, tmp, RID_SP, SPOFS_TMP); - emit_tai(as, PPCI_STW, RID_TMP, RID_SP, SPOFS_TMPLO); - emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI); - emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000); - emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); - emit_lsptr(as, PPCI_LFS, (fbias & 31), - (void *)&as->J->k32[LJ_K32_2P52_2P31], RSET_GPR); - emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); - emit_fb(as, PPCI_FCTIWZ, tmp, left); -} - -static void asm_tobit(ASMState *as, IRIns *ir) -{ - RegSet allow = RSET_FPR; - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, allow); - Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); - Reg tmp = ra_scratch(as, rset_clear(allow, right)); - emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); - emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); - emit_fab(as, PPCI_FADD, tmp, left, right); -} - -static void asm_conv(ASMState *as, IRIns *ir) -{ - IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); - int stfp = (st == IRT_NUM || st == IRT_FLOAT); - IRRef lref = ir->op1; - lua_assert(irt_type(ir->t) != st); - lua_assert(!(irt_isint64(ir->t) || - (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ - if (irt_isfp(ir->t)) { - Reg dest = ra_dest(as, ir, RSET_FPR); - if (stfp) { /* FP to FP conversion. */ - if (st == IRT_NUM) /* double -> float conversion. */ - emit_fb(as, PPCI_FRSP, dest, ra_alloc1(as, lref, RSET_FPR)); - else /* float -> double conversion is a no-op on PPC. */ - ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ - } else { /* Integer to FP conversion. */ - /* IRT_INT: Flip hibit, bias with 2^52, subtract 2^52+2^31. */ - /* IRT_U32: Bias with 2^52, subtract 2^52. */ - RegSet allow = RSET_GPR; - Reg left = ra_alloc1(as, lref, allow); - Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left)); - Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); - if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest); - emit_fab(as, PPCI_FSUB, dest, dest, fbias); - emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); - emit_lsptr(as, PPCI_LFS, (fbias & 31), - &as->J->k32[st == IRT_U32 ? LJ_K32_2P52 : LJ_K32_2P52_2P31], - rset_clear(allow, hibias)); - emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP, - RID_SP, SPOFS_TMPLO); - emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI); - if (st != IRT_U32) emit_asi(as, PPCI_XORIS, RID_TMP, left, 0x8000); - } - } else if (stfp) { /* FP to integer conversion. */ - if (irt_isguard(ir->t)) { - /* Checked conversions are only supported from number to int. */ - lua_assert(irt_isint(ir->t) && st == IRT_NUM); - asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); - } else { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, lref, RSET_FPR); - Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); - if (irt_isu32(ir->t)) { - /* Convert both x and x-2^31 to int and merge results. */ - Reg tmpi = ra_scratch(as, rset_exclude(RSET_GPR, dest)); - emit_asb(as, PPCI_OR, dest, dest, tmpi); /* Select with mask idiom. */ - emit_asb(as, PPCI_AND, tmpi, tmpi, RID_TMP); - emit_asb(as, PPCI_ANDC, dest, dest, RID_TMP); - emit_tai(as, PPCI_LWZ, tmpi, RID_SP, SPOFS_TMPLO); /* tmp = (int)(x) */ - emit_tai(as, PPCI_ADDIS, dest, dest, 0x8000); /* dest += 2^31 */ - emit_asb(as, PPCI_SRAWI, RID_TMP, dest, 31); /* mask = -(dest < 0) */ - emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); - emit_tai(as, PPCI_LWZ, dest, - RID_SP, SPOFS_TMPLO); /* dest = (int)(x-2^31) */ - emit_fb(as, PPCI_FCTIWZ, tmp, left); - emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); - emit_fb(as, PPCI_FCTIWZ, tmp, tmp); - emit_fab(as, PPCI_FSUB, tmp, left, tmp); - emit_lsptr(as, PPCI_LFS, (tmp & 31), - (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR); - } else { - emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); - emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); - emit_fb(as, PPCI_FCTIWZ, tmp, left); - } - } - } else { - Reg dest = ra_dest(as, ir, RSET_GPR); - if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); - if ((ir->op2 & IRCONV_SEXT)) - emit_as(as, st == IRT_I8 ? PPCI_EXTSB : PPCI_EXTSH, dest, left); - else - emit_rot(as, PPCI_RLWINM, dest, left, 0, st == IRT_U8 ? 24 : 16, 31); - } else { /* 32/64 bit integer conversions. */ - /* Only need to handle 32/32 bit no-op (cast) on 32 bit archs. */ - ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ - } - } -} - -static void asm_strto(ASMState *as, IRIns *ir) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; - IRRef args[2]; - int32_t ofs; - RegSet drop = RSET_SCRATCH; - if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ - ra_evictset(as, drop); - asm_guardcc(as, CC_EQ); - emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */ - args[0] = ir->op1; /* GCstr *str */ - args[1] = ASMREF_TMP1; /* TValue *n */ - asm_gencall(as, ci, args); - /* Store the result to the spill slot or temp slots. */ - ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; - emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs); -} - -/* -- Memory references --------------------------------------------------- */ - -/* Get pointer to TValue. */ -static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) -{ - IRIns *ir = IR(ref); - if (irt_isnum(ir->t)) { - if (irref_isk(ref)) /* Use the number constant itself as a TValue. */ - ra_allockreg(as, i32ptr(ir_knum(ir)), dest); - else /* Otherwise force a spill and use the spill slot. */ - emit_tai(as, PPCI_ADDI, dest, RID_SP, ra_spill(as, ir)); - } else { - /* Otherwise use g->tmptv to hold the TValue. */ - RegSet allow = rset_exclude(RSET_GPR, dest); - Reg type; - emit_tai(as, PPCI_ADDI, dest, RID_JGL, (int32_t)offsetof(global_State, tmptv)-32768); - if (!irt_ispri(ir->t)) { - Reg src = ra_alloc1(as, ref, allow); - emit_setgl(as, src, tmptv.gcr); - } - type = ra_allock(as, irt_toitype(ir->t), allow); - emit_setgl(as, type, tmptv.it); - } -} - -static void asm_aref(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg idx, base; - if (irref_isk(ir->op2)) { - IRRef tab = IR(ir->op1)->op1; - int32_t ofs = asm_fuseabase(as, tab); - IRRef refa = ofs ? tab : ir->op1; - ofs += 8*IR(ir->op2)->i; - if (checki16(ofs)) { - base = ra_alloc1(as, refa, RSET_GPR); - emit_tai(as, PPCI_ADDI, dest, base, ofs); - return; - } - } - base = ra_alloc1(as, ir->op1, RSET_GPR); - idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); - emit_tab(as, PPCI_ADD, dest, RID_TMP, base); - emit_slwi(as, RID_TMP, idx, 3); -} - -/* Inlined hash lookup. Specialized for key type and for const keys. -** The equivalent C code is: -** Node *n = hashkey(t, key); -** do { -** if (lj_obj_equal(&n->key, key)) return &n->val; -** } while ((n = nextnode(n))); -** return niltv(L); -*/ -static void asm_href(ASMState *as, IRIns *ir, IROp merge) -{ - RegSet allow = RSET_GPR; - int destused = ra_used(ir); - Reg dest = ra_dest(as, ir, allow); - Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); - Reg key = RID_NONE, tmp1 = RID_TMP, tmp2; - Reg tisnum = RID_NONE, tmpnum = RID_NONE; - IRRef refkey = ir->op2; - IRIns *irkey = IR(refkey); - IRType1 kt = irkey->t; - uint32_t khash; - MCLabel l_end, l_loop, l_next; - - rset_clear(allow, tab); - if (irt_isnum(kt)) { - key = ra_alloc1(as, refkey, RSET_FPR); - tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); - tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow); - rset_clear(allow, tisnum); - } else if (!irt_ispri(kt)) { - key = ra_alloc1(as, refkey, allow); - rset_clear(allow, key); - } - tmp2 = ra_scratch(as, allow); - rset_clear(allow, tmp2); - - /* Key not found in chain: jump to exit (if merged) or load niltv. */ - l_end = emit_label(as); - as->invmcp = NULL; - if (merge == IR_NE) - asm_guardcc(as, CC_EQ); - else if (destused) - emit_loada(as, dest, niltvg(J2G(as->J))); - - /* Follow hash chain until the end. */ - l_loop = --as->mcp; - emit_ai(as, PPCI_CMPWI, dest, 0); - emit_tai(as, PPCI_LWZ, dest, dest, (int32_t)offsetof(Node, next)); - l_next = emit_label(as); - - /* Type and value comparison. */ - if (merge == IR_EQ) - asm_guardcc(as, CC_EQ); - else - emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); - if (irt_isnum(kt)) { - emit_fab(as, PPCI_FCMPU, 0, tmpnum, key); - emit_condbranch(as, PPCI_BC, CC_GE, l_next); - emit_ab(as, PPCI_CMPLW, tmp1, tisnum); - emit_fai(as, PPCI_LFD, tmpnum, dest, (int32_t)offsetof(Node, key.n)); - } else { - if (!irt_ispri(kt)) { - emit_ab(as, PPCI_CMPW, tmp2, key); - emit_condbranch(as, PPCI_BC, CC_NE, l_next); - } - emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t)); - if (!irt_ispri(kt)) - emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr)); - } - emit_tai(as, PPCI_LWZ, tmp1, dest, (int32_t)offsetof(Node, key.it)); - *l_loop = PPCI_BC | PPCF_Y | PPCF_CC(CC_NE) | - (((char *)as->mcp-(char *)l_loop) & 0xffffu); - - /* Load main position relative to tab->node into dest. */ - khash = irref_isk(refkey) ? ir_khash(irkey) : 1; - if (khash == 0) { - emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); - } else { - Reg tmphash = tmp1; - if (irref_isk(refkey)) - tmphash = ra_allock(as, khash, allow); - emit_tab(as, PPCI_ADD, dest, dest, tmp1); - emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node)); - emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash); - emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); - emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); - if (irref_isk(refkey)) { - /* Nothing to do. */ - } else if (irt_isstr(kt)) { - emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash)); - } else { /* Must match with hash*() in lj_tab.c. */ - emit_tab(as, PPCI_SUBF, tmp1, tmp2, tmp1); - emit_rotlwi(as, tmp2, tmp2, HASH_ROT3); - emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2); - emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31); - emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2); - if (irt_isnum(kt)) { - int32_t ofs = ra_spill(as, irkey); - emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1); - emit_rotlwi(as, dest, tmp1, HASH_ROT1); - emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1); - emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4); - emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs); - } else { - emit_asb(as, PPCI_XOR, tmp2, key, tmp1); - emit_rotlwi(as, dest, tmp1, HASH_ROT1); - emit_tai(as, PPCI_ADDI, tmp1, tmp2, HASH_BIAS); - emit_tai(as, PPCI_ADDIS, tmp2, key, (HASH_BIAS + 32768)>>16); - } - } - } -} - -static void asm_hrefk(ASMState *as, IRIns *ir) -{ - IRIns *kslot = IR(ir->op2); - IRIns *irkey = IR(kslot->op1); - int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); - int32_t kofs = ofs + (int32_t)offsetof(Node, key); - Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; - Reg node = ra_alloc1(as, ir->op1, RSET_GPR); - Reg key = RID_NONE, type = RID_TMP, idx = node; - RegSet allow = rset_exclude(RSET_GPR, node); - lua_assert(ofs % sizeof(Node) == 0); - if (ofs > 32736) { - idx = dest; - rset_clear(allow, dest); - kofs = (int32_t)offsetof(Node, key); - } else if (ra_hasreg(dest)) { - emit_tai(as, PPCI_ADDI, dest, node, ofs); - } - asm_guardcc(as, CC_NE); - if (!irt_ispri(irkey->t)) { - key = ra_scratch(as, allow); - rset_clear(allow, key); - } - rset_clear(allow, type); - if (irt_isnum(irkey->t)) { - emit_cmpi(as, key, (int32_t)ir_knum(irkey)->u32.lo); - asm_guardcc(as, CC_NE); - emit_cmpi(as, type, (int32_t)ir_knum(irkey)->u32.hi); - } else { - if (ra_hasreg(key)) { - emit_cmpi(as, key, irkey->i); /* May use RID_TMP, i.e. type. */ - asm_guardcc(as, CC_NE); - } - emit_ai(as, PPCI_CMPWI, type, irt_toitype(irkey->t)); - } - if (ra_hasreg(key)) emit_tai(as, PPCI_LWZ, key, idx, kofs+4); - emit_tai(as, PPCI_LWZ, type, idx, kofs); - if (ofs > 32736) { - emit_tai(as, PPCI_ADDIS, dest, dest, (ofs + 32768) >> 16); - emit_tai(as, PPCI_ADDI, dest, node, ofs); - } -} - -static void asm_uref(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - if (irref_isk(ir->op1)) { - GCfunc *fn = ir_kfunc(IR(ir->op1)); - MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; - emit_lsptr(as, PPCI_LWZ, dest, v, RSET_GPR); - } else { - Reg uv = ra_scratch(as, RSET_GPR); - Reg func = ra_alloc1(as, ir->op1, RSET_GPR); - if (ir->o == IR_UREFC) { - asm_guardcc(as, CC_NE); - emit_ai(as, PPCI_CMPWI, RID_TMP, 1); - emit_tai(as, PPCI_ADDI, dest, uv, (int32_t)offsetof(GCupval, tv)); - emit_tai(as, PPCI_LBZ, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); - } else { - emit_tai(as, PPCI_LWZ, dest, uv, (int32_t)offsetof(GCupval, v)); - } - emit_tai(as, PPCI_LWZ, uv, func, - (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); - } -} - -static void asm_fref(ASMState *as, IRIns *ir) -{ - UNUSED(as); UNUSED(ir); - lua_assert(!ra_used(ir)); -} - -static void asm_strref(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - IRRef ref = ir->op2, refk = ir->op1; - int32_t ofs = (int32_t)sizeof(GCstr); - Reg r; - if (irref_isk(ref)) { - IRRef tmp = refk; refk = ref; ref = tmp; - } else if (!irref_isk(refk)) { - Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); - IRIns *irr = IR(ir->op2); - if (ra_hasreg(irr->r)) { - ra_noweak(as, irr->r); - right = irr->r; - } else if (mayfuse(as, irr->op2) && - irr->o == IR_ADD && irref_isk(irr->op2) && - checki16(ofs + IR(irr->op2)->i)) { - ofs += IR(irr->op2)->i; - right = ra_alloc1(as, irr->op1, rset_exclude(RSET_GPR, left)); - } else { - right = ra_allocref(as, ir->op2, rset_exclude(RSET_GPR, left)); - } - emit_tai(as, PPCI_ADDI, dest, dest, ofs); - emit_tab(as, PPCI_ADD, dest, left, right); - return; - } - r = ra_alloc1(as, ref, RSET_GPR); - ofs += IR(refk)->i; - if (checki16(ofs)) - emit_tai(as, PPCI_ADDI, dest, r, ofs); - else - emit_tab(as, PPCI_ADD, dest, r, - ra_allock(as, ofs, rset_exclude(RSET_GPR, r))); -} - -/* -- Loads and stores ---------------------------------------------------- */ - -static PPCIns asm_fxloadins(IRIns *ir) -{ - switch (irt_type(ir->t)) { - case IRT_I8: return PPCI_LBZ; /* Needs sign-extension. */ - case IRT_U8: return PPCI_LBZ; - case IRT_I16: return PPCI_LHA; - case IRT_U16: return PPCI_LHZ; - case IRT_NUM: return PPCI_LFD; - case IRT_FLOAT: return PPCI_LFS; - default: return PPCI_LWZ; - } -} - -static PPCIns asm_fxstoreins(IRIns *ir) -{ - switch (irt_type(ir->t)) { - case IRT_I8: case IRT_U8: return PPCI_STB; - case IRT_I16: case IRT_U16: return PPCI_STH; - case IRT_NUM: return PPCI_STFD; - case IRT_FLOAT: return PPCI_STFS; - default: return PPCI_STW; - } -} - -static void asm_fload(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - PPCIns pi = asm_fxloadins(ir); - Reg idx; - int32_t ofs; - if (ir->op1 == REF_NIL) { - idx = RID_JGL; - ofs = (ir->op2 << 2) - 32768; - } else { - idx = ra_alloc1(as, ir->op1, RSET_GPR); - if (ir->op2 == IRFL_TAB_ARRAY) { - ofs = asm_fuseabase(as, ir->op1); - if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ - emit_tai(as, PPCI_ADDI, dest, idx, ofs); - return; - } - } - ofs = field_ofs[ir->op2]; - } - lua_assert(!irt_isi8(ir->t)); - emit_tai(as, pi, dest, idx, ofs); -} - -static void asm_fstore(ASMState *as, IRIns *ir) -{ - if (ir->r != RID_SINK) { - Reg src = ra_alloc1(as, ir->op2, RSET_GPR); - IRIns *irf = IR(ir->op1); - Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); - int32_t ofs = field_ofs[irf->op2]; - PPCIns pi = asm_fxstoreins(ir); - emit_tai(as, pi, src, idx, ofs); - } -} - -static void asm_xload(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); - lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); - if (irt_isi8(ir->t)) - emit_as(as, PPCI_EXTSB, dest, dest); - asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); -} - -static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) -{ - IRIns *irb; - if (ir->r == RID_SINK) - return; - if (ofs == 0 && mayfuse(as, ir->op2) && (irb = IR(ir->op2))->o == IR_BSWAP && - ra_noreg(irb->r) && (irt_isint(ir->t) || irt_isu32(ir->t))) { - /* Fuse BSWAP with XSTORE to stwbrx. */ - Reg src = ra_alloc1(as, irb->op1, RSET_GPR); - asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src)); - } else { - Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); - asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, - rset_exclude(RSET_GPR, src), ofs); - } -} - -#define asm_xstore(as, ir) asm_xstore_(as, ir, 0) - -static void asm_ahuvload(ASMState *as, IRIns *ir) -{ - IRType1 t = ir->t; - Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx; - RegSet allow = RSET_GPR; - int32_t ofs = AHUREF_LSX; - if (ra_used(ir)) { - lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); - if (!irt_isnum(t)) ofs = 0; - dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); - rset_clear(allow, dest); - } - idx = asm_fuseahuref(as, ir->op1, &ofs, allow); - if (irt_isnum(t)) { - Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, rset_exclude(allow, idx)); - asm_guardcc(as, CC_GE); - emit_ab(as, PPCI_CMPLW, type, tisnum); - if (ra_hasreg(dest)) { - if (ofs == AHUREF_LSX) { - tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, - (idx&255)), (idx>>8))); - emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp); - } else { - emit_fai(as, PPCI_LFD, dest, idx, ofs); - } - } - } else { - asm_guardcc(as, CC_NE); - emit_ai(as, PPCI_CMPWI, type, irt_toitype(t)); - if (ra_hasreg(dest)) emit_tai(as, PPCI_LWZ, dest, idx, ofs+4); - } - if (ofs == AHUREF_LSX) { - emit_tab(as, PPCI_LWZX, type, (idx&255), tmp); - emit_slwi(as, tmp, (idx>>8), 3); - } else { - emit_tai(as, PPCI_LWZ, type, idx, ofs); - } -} - -static void asm_ahustore(ASMState *as, IRIns *ir) -{ - RegSet allow = RSET_GPR; - Reg idx, src = RID_NONE, type = RID_NONE; - int32_t ofs = AHUREF_LSX; - if (ir->r == RID_SINK) - return; - if (irt_isnum(ir->t)) { - src = ra_alloc1(as, ir->op2, RSET_FPR); - } else { - if (!irt_ispri(ir->t)) { - src = ra_alloc1(as, ir->op2, allow); - rset_clear(allow, src); - ofs = 0; - } - type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); - rset_clear(allow, type); - } - idx = asm_fuseahuref(as, ir->op1, &ofs, allow); - if (irt_isnum(ir->t)) { - if (ofs == AHUREF_LSX) { - emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP); - emit_slwi(as, RID_TMP, (idx>>8), 3); - } else { - emit_fai(as, PPCI_STFD, src, idx, ofs); - } - } else { - if (ra_hasreg(src)) - emit_tai(as, PPCI_STW, src, idx, ofs+4); - if (ofs == AHUREF_LSX) { - emit_tab(as, PPCI_STWX, type, (idx&255), RID_TMP); - emit_slwi(as, RID_TMP, (idx>>8), 3); - } else { - emit_tai(as, PPCI_STW, type, idx, ofs); - } - } -} - -static void asm_sload(ASMState *as, IRIns *ir) -{ - int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 0 : 4); - IRType1 t = ir->t; - Reg dest = RID_NONE, type = RID_NONE, base; - RegSet allow = RSET_GPR; - lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ - lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); - lua_assert(LJ_DUALNUM || - !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); - if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { - dest = ra_scratch(as, RSET_FPR); - asm_tointg(as, ir, dest); - t.irt = IRT_NUM; /* Continue with a regular number type check. */ - } else if (ra_used(ir)) { - lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); - dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); - rset_clear(allow, dest); - base = ra_alloc1(as, REF_BASE, allow); - rset_clear(allow, base); - if ((ir->op2 & IRSLOAD_CONVERT)) { - if (irt_isint(t)) { - emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); - dest = ra_scratch(as, RSET_FPR); - emit_fai(as, PPCI_STFD, dest, RID_SP, SPOFS_TMP); - emit_fb(as, PPCI_FCTIWZ, dest, dest); - t.irt = IRT_NUM; /* Check for original type. */ - } else { - Reg tmp = ra_scratch(as, allow); - Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, tmp)); - Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); - emit_fab(as, PPCI_FSUB, dest, dest, fbias); - emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); - emit_lsptr(as, PPCI_LFS, (fbias & 31), - (void *)&as->J->k32[LJ_K32_2P52_2P31], - rset_clear(allow, hibias)); - emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO); - emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI); - emit_asi(as, PPCI_XORIS, tmp, tmp, 0x8000); - dest = tmp; - t.irt = IRT_INT; /* Check for original type. */ - } - } - goto dotypecheck; - } - base = ra_alloc1(as, REF_BASE, allow); - rset_clear(allow, base); -dotypecheck: - if (irt_isnum(t)) { - if ((ir->op2 & IRSLOAD_TYPECHECK)) { - Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow); - asm_guardcc(as, CC_GE); - emit_ab(as, PPCI_CMPLW, RID_TMP, tisnum); - type = RID_TMP; - } - if (ra_hasreg(dest)) emit_fai(as, PPCI_LFD, dest, base, ofs-4); - } else { - if ((ir->op2 & IRSLOAD_TYPECHECK)) { - asm_guardcc(as, CC_NE); - emit_ai(as, PPCI_CMPWI, RID_TMP, irt_toitype(t)); - type = RID_TMP; - } - if (ra_hasreg(dest)) emit_tai(as, PPCI_LWZ, dest, base, ofs); - } - if (ra_hasreg(type)) emit_tai(as, PPCI_LWZ, type, base, ofs-4); -} - -/* -- Allocations --------------------------------------------------------- */ - -#if LJ_HASFFI -static void asm_cnew(ASMState *as, IRIns *ir) -{ - CTState *cts = ctype_ctsG(J2G(as->J)); - CTypeID id = (CTypeID)IR(ir->op1)->i; - CTSize sz; - CTInfo info = lj_ctype_info(cts, id, &sz); - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; - IRRef args[4]; - RegSet drop = RSET_SCRATCH; - lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); - - as->gcsteps++; - if (ra_hasreg(ir->r)) - rset_clear(drop, ir->r); /* Dest reg handled below. */ - ra_evictset(as, drop); - if (ra_used(ir)) - ra_destreg(as, ir, RID_RET); /* GCcdata * */ - - /* Initialize immutable cdata object. */ - if (ir->o == IR_CNEWI) { - RegSet allow = (RSET_GPR & ~RSET_SCRATCH); - int32_t ofs = sizeof(GCcdata); - lua_assert(sz == 4 || sz == 8); - if (sz == 8) { - ofs += 4; - lua_assert((ir+1)->o == IR_HIOP); - } - for (;;) { - Reg r = ra_alloc1(as, ir->op2, allow); - emit_tai(as, PPCI_STW, r, RID_RET, ofs); - rset_clear(allow, r); - if (ofs == sizeof(GCcdata)) break; - ofs -= 4; ir++; - } - } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ - ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ir->op1; /* CTypeID id */ - args[2] = ir->op2; /* CTSize sz */ - args[3] = ASMREF_TMP1; /* CTSize align */ - asm_gencall(as, ci, args); - emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); - return; - } - - /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ - emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); - emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); - emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA); - emit_ti(as, PPCI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */ - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ASMREF_TMP1; /* MSize size */ - asm_gencall(as, ci, args); - ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), - ra_releasetmp(as, ASMREF_TMP1)); -} -#else -#define asm_cnew(as, ir) ((void)0) -#endif - -/* -- Write barriers ------------------------------------------------------ */ - -static void asm_tbar(ASMState *as, IRIns *ir) -{ - Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); - Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab)); - Reg link = RID_TMP; - MCLabel l_end = emit_label(as); - emit_tai(as, PPCI_STW, link, tab, (int32_t)offsetof(GCtab, gclist)); - emit_tai(as, PPCI_STB, mark, tab, (int32_t)offsetof(GCtab, marked)); - emit_setgl(as, tab, gc.grayagain); - lua_assert(LJ_GC_BLACK == 0x04); - emit_rot(as, PPCI_RLWINM, mark, mark, 0, 30, 28); /* Clear black bit. */ - emit_getgl(as, link, gc.grayagain); - emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); - emit_asi(as, PPCI_ANDIDOT, RID_TMP, mark, LJ_GC_BLACK); - emit_tai(as, PPCI_LBZ, mark, tab, (int32_t)offsetof(GCtab, marked)); -} - -static void asm_obar(ASMState *as, IRIns *ir) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; - IRRef args[2]; - MCLabel l_end; - Reg obj, val, tmp; - /* No need for other object barriers (yet). */ - lua_assert(IR(ir->op1)->o == IR_UREFC); - ra_evictset(as, RSET_SCRATCH); - l_end = emit_label(as); - args[0] = ASMREF_TMP1; /* global_State *g */ - args[1] = ir->op1; /* TValue *tv */ - asm_gencall(as, ci, args); - emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); - obj = IR(ir->op1)->r; - tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); - emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); - emit_asi(as, PPCI_ANDIDOT, tmp, tmp, LJ_GC_BLACK); - emit_condbranch(as, PPCI_BC, CC_EQ, l_end); - emit_asi(as, PPCI_ANDIDOT, RID_TMP, RID_TMP, LJ_GC_WHITES); - val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); - emit_tai(as, PPCI_LBZ, tmp, obj, - (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); - emit_tai(as, PPCI_LBZ, RID_TMP, val, (int32_t)offsetof(GChead, marked)); -} - -/* -- Arithmetic and logic operations ------------------------------------- */ - -static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi) -{ - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = (left >> 8); left &= 255; - if (pi == PPCI_FMUL) - emit_fac(as, pi, dest, left, right); - else - emit_fab(as, pi, dest, left, right); -} - -static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi) -{ - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); - emit_fb(as, pi, dest, left); -} - -static void asm_fpmath(ASMState *as, IRIns *ir) -{ - if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) - return; - if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT)) - asm_fpunary(as, ir, PPCI_FSQRT); - else - asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); -} - -static void asm_add(ASMState *as, IRIns *ir) -{ - if (irt_isnum(ir->t)) { - if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD)) - asm_fparith(as, ir, PPCI_FADD); - } else { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - PPCIns pi; - if (irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; - if (checki16(k)) { - pi = PPCI_ADDI; - /* May fail due to spills/restores above, but simplifies the logic. */ - if (as->flagmcp == as->mcp) { - as->flagmcp = NULL; - as->mcp++; - pi = PPCI_ADDICDOT; - } - emit_tai(as, pi, dest, left, k); - return; - } else if ((k & 0xffff) == 0) { - emit_tai(as, PPCI_ADDIS, dest, left, (k >> 16)); - return; - } else if (!as->sectref) { - emit_tai(as, PPCI_ADDIS, dest, dest, (k + 32768) >> 16); - emit_tai(as, PPCI_ADDI, dest, left, k); - return; - } - } - pi = PPCI_ADD; - /* May fail due to spills/restores above, but simplifies the logic. */ - if (as->flagmcp == as->mcp) { - as->flagmcp = NULL; - as->mcp++; - pi |= PPCF_DOT; - } - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_tab(as, pi, dest, left, right); - } -} - -static void asm_sub(ASMState *as, IRIns *ir) -{ - if (irt_isnum(ir->t)) { - if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB)) - asm_fparith(as, ir, PPCI_FSUB); - } else { - PPCIns pi = PPCI_SUBF; - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left, right; - if (irref_isk(ir->op1)) { - int32_t k = IR(ir->op1)->i; - if (checki16(k)) { - right = ra_alloc1(as, ir->op2, RSET_GPR); - emit_tai(as, PPCI_SUBFIC, dest, right, k); - return; - } - } - /* May fail due to spills/restores above, but simplifies the logic. */ - if (as->flagmcp == as->mcp) { - as->flagmcp = NULL; - as->mcp++; - pi |= PPCF_DOT; - } - left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_tab(as, pi, dest, right, left); /* Subtract right _from_ left. */ - } -} - -static void asm_mul(ASMState *as, IRIns *ir) -{ - if (irt_isnum(ir->t)) { - asm_fparith(as, ir, PPCI_FMUL); - } else { - PPCIns pi = PPCI_MULLW; - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - if (irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; - if (checki16(k)) { - emit_tai(as, PPCI_MULLI, dest, left, k); - return; - } - } - /* May fail due to spills/restores above, but simplifies the logic. */ - if (as->flagmcp == as->mcp) { - as->flagmcp = NULL; - as->mcp++; - pi |= PPCF_DOT; - } - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_tab(as, pi, dest, left, right); - } -} - -#define asm_div(as, ir) asm_fparith(as, ir, PPCI_FDIV) -#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi) -#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi) - -static void asm_neg(ASMState *as, IRIns *ir) -{ - if (irt_isnum(ir->t)) { - asm_fpunary(as, ir, PPCI_FNEG); - } else { - Reg dest, left; - PPCIns pi = PPCI_NEG; - if (as->flagmcp == as->mcp) { - as->flagmcp = NULL; - as->mcp++; - pi |= PPCF_DOT; - } - dest = ra_dest(as, ir, RSET_GPR); - left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - emit_tab(as, pi, dest, left, 0); - } -} - -#define asm_abs(as, ir) asm_fpunary(as, ir, PPCI_FABS) -#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) -#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) - -static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) -{ - Reg dest, left, right; - if (as->flagmcp == as->mcp) { - as->flagmcp = NULL; - as->mcp++; - } - asm_guardcc(as, CC_SO); - dest = ra_dest(as, ir, RSET_GPR); - left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - if (pi == PPCI_SUBFO) { Reg tmp = left; left = right; right = tmp; } - emit_tab(as, pi|PPCF_DOT, dest, left, right); -} - -#define asm_addov(as, ir) asm_arithov(as, ir, PPCI_ADDO) -#define asm_subov(as, ir) asm_arithov(as, ir, PPCI_SUBFO) -#define asm_mulov(as, ir) asm_arithov(as, ir, PPCI_MULLWO) - -#if LJ_HASFFI -static void asm_add64(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); - PPCIns pi = PPCI_ADDE; - if (irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; - if (k == 0) - pi = PPCI_ADDZE; - else if (k == -1) - pi = PPCI_ADDME; - else - goto needright; - right = 0; - } else { - needright: - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - } - emit_tab(as, pi, dest, left, right); - ir--; - dest = ra_dest(as, ir, RSET_GPR); - left = ra_alloc1(as, ir->op1, RSET_GPR); - if (irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; - if (checki16(k)) { - emit_tai(as, PPCI_ADDIC, dest, left, k); - return; - } - } - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_tab(as, PPCI_ADDC, dest, left, right); -} - -static void asm_sub64(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left, right = ra_alloc1(as, ir->op2, RSET_GPR); - PPCIns pi = PPCI_SUBFE; - if (irref_isk(ir->op1)) { - int32_t k = IR(ir->op1)->i; - if (k == 0) - pi = PPCI_SUBFZE; - else if (k == -1) - pi = PPCI_SUBFME; - else - goto needleft; - left = 0; - } else { - needleft: - left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, right)); - } - emit_tab(as, pi, dest, right, left); /* Subtract right _from_ left. */ - ir--; - dest = ra_dest(as, ir, RSET_GPR); - right = ra_alloc1(as, ir->op2, RSET_GPR); - if (irref_isk(ir->op1)) { - int32_t k = IR(ir->op1)->i; - if (checki16(k)) { - emit_tai(as, PPCI_SUBFIC, dest, right, k); - return; - } - } - left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, right)); - emit_tab(as, PPCI_SUBFC, dest, right, left); -} - -static void asm_neg64(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - emit_tab(as, PPCI_SUBFZE, dest, left, 0); - ir--; - dest = ra_dest(as, ir, RSET_GPR); - left = ra_alloc1(as, ir->op1, RSET_GPR); - emit_tai(as, PPCI_SUBFIC, dest, left, 0); -} -#endif - -static void asm_bnot(ASMState *as, IRIns *ir) -{ - Reg dest, left, right; - PPCIns pi = PPCI_NOR; - if (as->flagmcp == as->mcp) { - as->flagmcp = NULL; - as->mcp++; - pi |= PPCF_DOT; - } - dest = ra_dest(as, ir, RSET_GPR); - if (mayfuse(as, ir->op1)) { - IRIns *irl = IR(ir->op1); - if (irl->o == IR_BAND) - pi ^= (PPCI_NOR ^ PPCI_NAND); - else if (irl->o == IR_BXOR) - pi ^= (PPCI_NOR ^ PPCI_EQV); - else if (irl->o != IR_BOR) - goto nofuse; - left = ra_hintalloc(as, irl->op1, dest, RSET_GPR); - right = ra_alloc1(as, irl->op2, rset_exclude(RSET_GPR, left)); - } else { -nofuse: - left = right = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - } - emit_asb(as, pi, dest, left, right); -} - -static void asm_bswap(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - IRIns *irx; - if (mayfuse(as, ir->op1) && (irx = IR(ir->op1))->o == IR_XLOAD && - ra_noreg(irx->r) && (irt_isint(irx->t) || irt_isu32(irx->t))) { - /* Fuse BSWAP with XLOAD to lwbrx. */ - asm_fusexrefx(as, PPCI_LWBRX, dest, irx->op1, RSET_GPR); - } else { - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - Reg tmp = dest; - if (tmp == left) { - tmp = RID_TMP; - emit_mr(as, dest, RID_TMP); - } - emit_rot(as, PPCI_RLWIMI, tmp, left, 24, 16, 23); - emit_rot(as, PPCI_RLWIMI, tmp, left, 24, 0, 7); - emit_rotlwi(as, tmp, left, 8); - } -} - -/* Fuse BAND with contiguous bitmask and a shift to rlwinm. */ -static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref) -{ - IRIns *ir; - Reg left; - if (mayfuse(as, ref) && (ir = IR(ref), ra_noreg(ir->r)) && - irref_isk(ir->op2) && ir->o >= IR_BSHL && ir->o <= IR_BROR) { - int32_t sh = (IR(ir->op2)->i & 31); - switch (ir->o) { - case IR_BSHL: - if ((mask & ((1u<>sh))) goto nofuse; - sh = ((32-sh)&31); - break; - case IR_BROL: - break; - default: - goto nofuse; - } - left = ra_alloc1(as, ir->op1, RSET_GPR); - *--as->mcp = pi | PPCF_T(left) | PPCF_B(sh); - return; - } -nofuse: - left = ra_alloc1(as, ref, RSET_GPR); - *--as->mcp = pi | PPCF_T(left); -} - -static void asm_band(ASMState *as, IRIns *ir) -{ - Reg dest, left, right; - IRRef lref = ir->op1; - PPCIns dot = 0; - IRRef op2; - if (as->flagmcp == as->mcp) { - as->flagmcp = NULL; - as->mcp++; - dot = PPCF_DOT; - } - dest = ra_dest(as, ir, RSET_GPR); - if (irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; - if (k) { - /* First check for a contiguous bitmask as used by rlwinm. */ - uint32_t s1 = lj_ffs((uint32_t)k); - uint32_t k1 = ((uint32_t)k >> s1); - if ((k1 & (k1+1)) == 0) { - asm_fuseandsh(as, PPCI_RLWINM|dot | PPCF_A(dest) | - PPCF_MB(31-lj_fls((uint32_t)k)) | PPCF_ME(31-s1), - k, lref); - return; - } - if (~(uint32_t)k) { - uint32_t s2 = lj_ffs(~(uint32_t)k); - uint32_t k2 = (~(uint32_t)k >> s2); - if ((k2 & (k2+1)) == 0) { - asm_fuseandsh(as, PPCI_RLWINM|dot | PPCF_A(dest) | - PPCF_MB(32-s2) | PPCF_ME(30-lj_fls(~(uint32_t)k)), - k, lref); - return; - } - } - } - if (checku16(k)) { - left = ra_alloc1(as, lref, RSET_GPR); - emit_asi(as, PPCI_ANDIDOT, dest, left, k); - return; - } else if ((k & 0xffff) == 0) { - left = ra_alloc1(as, lref, RSET_GPR); - emit_asi(as, PPCI_ANDISDOT, dest, left, (k >> 16)); - return; - } - } - op2 = ir->op2; - if (mayfuse(as, op2) && IR(op2)->o == IR_BNOT && ra_noreg(IR(op2)->r)) { - dot ^= (PPCI_AND ^ PPCI_ANDC); - op2 = IR(op2)->op1; - } - left = ra_hintalloc(as, lref, dest, RSET_GPR); - right = ra_alloc1(as, op2, rset_exclude(RSET_GPR, left)); - emit_asb(as, PPCI_AND ^ dot, dest, left, right); -} - -static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - if (irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; - Reg tmp = left; - if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) { - if (!checku16(k)) { - emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16)); - if ((k & 0xffff) == 0) return; - } - emit_asi(as, pik, dest, left, k); - return; - } - } - /* May fail due to spills/restores above, but simplifies the logic. */ - if (as->flagmcp == as->mcp) { - as->flagmcp = NULL; - as->mcp++; - pi |= PPCF_DOT; - } - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_asb(as, pi, dest, left, right); -} - -#define asm_bor(as, ir) asm_bitop(as, ir, PPCI_OR, PPCI_ORI) -#define asm_bxor(as, ir) asm_bitop(as, ir, PPCI_XOR, PPCI_XORI) - -static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) -{ - Reg dest, left; - Reg dot = 0; - if (as->flagmcp == as->mcp) { - as->flagmcp = NULL; - as->mcp++; - dot = PPCF_DOT; - } - dest = ra_dest(as, ir, RSET_GPR); - left = ra_alloc1(as, ir->op1, RSET_GPR); - if (irref_isk(ir->op2)) { /* Constant shifts. */ - int32_t shift = (IR(ir->op2)->i & 31); - if (pik == 0) /* SLWI */ - emit_rot(as, PPCI_RLWINM|dot, dest, left, shift, 0, 31-shift); - else if (pik == 1) /* SRWI */ - emit_rot(as, PPCI_RLWINM|dot, dest, left, (32-shift)&31, shift, 31); - else - emit_asb(as, pik|dot, dest, left, shift); - } else { - Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_asb(as, pi|dot, dest, left, right); - } -} - -#define asm_bshl(as, ir) asm_bitshift(as, ir, PPCI_SLW, 0) -#define asm_bshr(as, ir) asm_bitshift(as, ir, PPCI_SRW, 1) -#define asm_bsar(as, ir) asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI) -#define asm_brol(as, ir) \ - asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), \ - PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)) -#define asm_bror(as, ir) lua_assert(0) - -static void asm_min_max(ASMState *as, IRIns *ir, int ismax) -{ - if (irt_isnum(ir->t)) { - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg tmp = dest; - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = (left >> 8); left &= 255; - if (tmp == left || tmp == right) - tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_FPR, - dest), left), right)); - emit_facb(as, PPCI_FSEL, dest, tmp, - ismax ? left : right, ismax ? right : left); - emit_fab(as, PPCI_FSUB, tmp, left, right); - } else { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg tmp1 = RID_TMP, tmp2 = dest; - Reg right, left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - if (tmp2 == left || tmp2 == right) - tmp2 = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, - dest), left), right)); - emit_tab(as, PPCI_ADD, dest, tmp2, right); - emit_asb(as, ismax ? PPCI_ANDC : PPCI_AND, tmp2, tmp2, tmp1); - emit_tab(as, PPCI_SUBFE, tmp1, tmp1, tmp1); - emit_tab(as, PPCI_SUBFC, tmp2, tmp2, tmp1); - emit_asi(as, PPCI_XORIS, tmp2, right, 0x8000); - emit_asi(as, PPCI_XORIS, tmp1, left, 0x8000); - } -} - -#define asm_min(as, ir) asm_min_max(as, ir, 0) -#define asm_max(as, ir) asm_min_max(as, ir, 1) - -/* -- Comparisons --------------------------------------------------------- */ - -#define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */ -#define CC_TWO 0x80 /* Check two flags for FP comparison. */ - -/* Map of comparisons to flags. ORDER IR. */ -static const uint8_t asm_compmap[IR_ABC+1] = { - /* op int cc FP cc */ - /* LT */ CC_GE + (CC_GE<<4), - /* GE */ CC_LT + (CC_LE<<4) + CC_TWO, - /* LE */ CC_GT + (CC_GE<<4) + CC_TWO, - /* GT */ CC_LE + (CC_LE<<4), - /* ULT */ CC_GE + CC_UNSIGNED + (CC_GT<<4) + CC_TWO, - /* UGE */ CC_LT + CC_UNSIGNED + (CC_LT<<4), - /* ULE */ CC_GT + CC_UNSIGNED + (CC_GT<<4), - /* UGT */ CC_LE + CC_UNSIGNED + (CC_LT<<4) + CC_TWO, - /* EQ */ CC_NE + (CC_NE<<4), - /* NE */ CC_EQ + (CC_EQ<<4), - /* ABC */ CC_LE + CC_UNSIGNED + (CC_LT<<4) + CC_TWO /* Same as UGT. */ -}; - -static void asm_intcomp_(ASMState *as, IRRef lref, IRRef rref, Reg cr, PPCCC cc) -{ - Reg right, left = ra_alloc1(as, lref, RSET_GPR); - if (irref_isk(rref)) { - int32_t k = IR(rref)->i; - if ((cc & CC_UNSIGNED) == 0) { /* Signed comparison with constant. */ - if (checki16(k)) { - emit_tai(as, PPCI_CMPWI, cr, left, k); - /* Signed comparison with zero and referencing previous ins? */ - if (k == 0 && lref == as->curins-1) - as->flagmcp = as->mcp; /* Allow elimination of the compare. */ - return; - } else if ((cc & 3) == (CC_EQ & 3)) { /* Use CMPLWI for EQ or NE. */ - if (checku16(k)) { - emit_tai(as, PPCI_CMPLWI, cr, left, k); - return; - } else if (!as->sectref && ra_noreg(IR(rref)->r)) { - emit_tai(as, PPCI_CMPLWI, cr, RID_TMP, k); - emit_asi(as, PPCI_XORIS, RID_TMP, left, (k >> 16)); - return; - } - } - } else { /* Unsigned comparison with constant. */ - if (checku16(k)) { - emit_tai(as, PPCI_CMPLWI, cr, left, k); - return; - } - } - } - right = ra_alloc1(as, rref, rset_exclude(RSET_GPR, left)); - emit_tab(as, (cc & CC_UNSIGNED) ? PPCI_CMPLW : PPCI_CMPW, cr, left, right); -} - -static void asm_comp(ASMState *as, IRIns *ir) -{ - PPCCC cc = asm_compmap[ir->o]; - if (irt_isnum(ir->t)) { - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = (left >> 8); left &= 255; - asm_guardcc(as, (cc >> 4)); - if ((cc & CC_TWO)) - emit_tab(as, PPCI_CROR, ((cc>>4)&3), ((cc>>4)&3), (CC_EQ&3)); - emit_fab(as, PPCI_FCMPU, 0, left, right); - } else { - IRRef lref = ir->op1, rref = ir->op2; - if (irref_isk(lref) && !irref_isk(rref)) { - /* Swap constants to the right (only for ABC). */ - IRRef tmp = lref; lref = rref; rref = tmp; - if ((cc & 2) == 0) cc ^= 1; /* LT <-> GT, LE <-> GE */ - } - asm_guardcc(as, cc); - asm_intcomp_(as, lref, rref, 0, cc); - } -} - -#define asm_equal(as, ir) asm_comp(as, ir) - -#if LJ_HASFFI -/* 64 bit integer comparisons. */ -static void asm_comp64(ASMState *as, IRIns *ir) -{ - PPCCC cc = asm_compmap[(ir-1)->o]; - if ((cc&3) == (CC_EQ&3)) { - asm_guardcc(as, cc); - emit_tab(as, (cc&4) ? PPCI_CRAND : PPCI_CROR, - (CC_EQ&3), (CC_EQ&3), 4+(CC_EQ&3)); - } else { - asm_guardcc(as, CC_EQ); - emit_tab(as, PPCI_CROR, (CC_EQ&3), (CC_EQ&3), ((cc^~(cc>>2))&1)); - emit_tab(as, (cc&4) ? PPCI_CRAND : PPCI_CRANDC, - (CC_EQ&3), (CC_EQ&3), 4+(cc&3)); - } - /* Loword comparison sets cr1 and is unsigned, except for equality. */ - asm_intcomp_(as, (ir-1)->op1, (ir-1)->op2, 4, - cc | ((cc&3) == (CC_EQ&3) ? 0 : CC_UNSIGNED)); - /* Hiword comparison sets cr0. */ - asm_intcomp_(as, ir->op1, ir->op2, 0, cc); - as->flagmcp = NULL; /* Doesn't work here. */ -} -#endif - -/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ - -/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ -static void asm_hiop(ASMState *as, IRIns *ir) -{ -#if LJ_HASFFI - /* HIOP is marked as a store because it needs its own DCE logic. */ - int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ - if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; - if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ - as->curins--; /* Always skip the CONV. */ - if (usehi || uselo) - asm_conv64(as, ir); - return; - } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ - as->curins--; /* Always skip the loword comparison. */ - asm_comp64(as, ir); - return; - } else if ((ir-1)->o == IR_XSTORE) { - as->curins--; /* Handle both stores here. */ - if ((ir-1)->r != RID_SINK) { - asm_xstore_(as, ir, 0); - asm_xstore_(as, ir-1, 4); - } - return; - } - if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ - switch ((ir-1)->o) { - case IR_ADD: as->curins--; asm_add64(as, ir); break; - case IR_SUB: as->curins--; asm_sub64(as, ir); break; - case IR_NEG: as->curins--; asm_neg64(as, ir); break; - case IR_CALLN: - case IR_CALLXS: - if (!uselo) - ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ - break; - case IR_CNEWI: - /* Nothing to do here. Handled by lo op itself. */ - break; - default: lua_assert(0); break; - } -#else - UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused without FFI. */ -#endif -} - -/* -- Profiling ----------------------------------------------------------- */ - -static void asm_prof(ASMState *as, IRIns *ir) -{ - UNUSED(ir); - asm_guardcc(as, CC_NE); - emit_asi(as, PPCI_ANDIDOT, RID_TMP, RID_TMP, HOOK_PROFILE); - emit_lsglptr(as, PPCI_LBZ, RID_TMP, - (int32_t)offsetof(global_State, hookmask)); -} - -/* -- Stack handling ------------------------------------------------------ */ - -/* Check Lua stack size for overflow. Use exit handler as fallback. */ -static void asm_stack_check(ASMState *as, BCReg topslot, - IRIns *irp, RegSet allow, ExitNo exitno) -{ - /* Try to get an unused temp. register, otherwise spill/restore RID_RET*. */ - Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE; - rset_clear(allow, pbase); - tmp = allow ? rset_pickbot(allow) : - (pbase == RID_RETHI ? RID_RETLO : RID_RETHI); - emit_condbranch(as, PPCI_BC, CC_LT, asm_exitstub_addr(as, exitno)); - if (allow == RSET_EMPTY) /* Restore temp. register. */ - emit_tai(as, PPCI_LWZ, tmp, RID_SP, SPOFS_TMPW); - else - ra_modified(as, tmp); - emit_ai(as, PPCI_CMPLWI, RID_TMP, (int32_t)(8*topslot)); - emit_tab(as, PPCI_SUBF, RID_TMP, pbase, tmp); - emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack)); - if (pbase == RID_TMP) - emit_getgl(as, RID_TMP, jit_base); - emit_getgl(as, tmp, cur_L); - if (allow == RSET_EMPTY) /* Spill temp. register. */ - emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW); -} - -/* Restore Lua stack from on-trace state. */ -static void asm_stack_restore(ASMState *as, SnapShot *snap) -{ - SnapEntry *map = &as->T->snapmap[snap->mapofs]; - SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1]; - MSize n, nent = snap->nent; - /* Store the value of all modified slots to the Lua stack. */ - for (n = 0; n < nent; n++) { - SnapEntry sn = map[n]; - BCReg s = snap_slot(sn); - int32_t ofs = 8*((int32_t)s-1); - IRRef ref = snap_ref(sn); - IRIns *ir = IR(ref); - if ((sn & SNAP_NORESTORE)) - continue; - if (irt_isnum(ir->t)) { - Reg src = ra_alloc1(as, ref, RSET_FPR); - emit_fai(as, PPCI_STFD, src, RID_BASE, ofs); - } else { - Reg type; - RegSet allow = rset_exclude(RSET_GPR, RID_BASE); - lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); - if (!irt_ispri(ir->t)) { - Reg src = ra_alloc1(as, ref, allow); - rset_clear(allow, src); - emit_tai(as, PPCI_STW, src, RID_BASE, ofs+4); - } - if ((sn & (SNAP_CONT|SNAP_FRAME))) { - if (s == 0) continue; /* Do not overwrite link to previous frame. */ - type = ra_allock(as, (int32_t)(*flinks--), allow); - } else { - type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); - } - emit_tai(as, PPCI_STW, type, RID_BASE, ofs); - } - checkmclim(as); - } - lua_assert(map + nent == flinks); -} - -/* -- GC handling --------------------------------------------------------- */ - -/* Check GC threshold and do one or more GC steps. */ -static void asm_gc_check(ASMState *as) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; - IRRef args[2]; - MCLabel l_end; - Reg tmp; - ra_evictset(as, RSET_SCRATCH); - l_end = emit_label(as); - /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ - asm_guardcc(as, CC_NE); /* Assumes asm_snap_prep() already done. */ - emit_ai(as, PPCI_CMPWI, RID_RET, 0); - args[0] = ASMREF_TMP1; /* global_State *g */ - args[1] = ASMREF_TMP2; /* MSize steps */ - asm_gencall(as, ci, args); - emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); - tmp = ra_releasetmp(as, ASMREF_TMP2); - emit_loadi(as, tmp, as->gcsteps); - /* Jump around GC step if GC total < GC threshold. */ - emit_condbranch(as, PPCI_BC|PPCF_Y, CC_LT, l_end); - emit_ab(as, PPCI_CMPLW, RID_TMP, tmp); - emit_getgl(as, tmp, gc.threshold); - emit_getgl(as, RID_TMP, gc.total); - as->gcsteps = 0; - checkmclim(as); -} - -/* -- Loop handling ------------------------------------------------------- */ - -/* Fixup the loop branch. */ -static void asm_loop_fixup(ASMState *as) -{ - MCode *p = as->mctop; - MCode *target = as->mcp; - if (as->loopinv) { /* Inverted loop branch? */ - /* asm_guardcc already inverted the cond branch and patched the final b. */ - p[-2] = (p[-2] & (0xffff0000u & ~PPCF_Y)) | (((target-p+2) & 0x3fffu) << 2); - } else { - p[-1] = PPCI_B|(((target-p+1)&0x00ffffffu)<<2); - } -} - -/* -- Head of trace ------------------------------------------------------- */ - -/* Coalesce BASE register for a root trace. */ -static void asm_head_root_base(ASMState *as) -{ - IRIns *ir = IR(REF_BASE); - Reg r = ir->r; - if (ra_hasreg(r)) { - ra_free(as, r); - if (rset_test(as->modset, r) || irt_ismarked(ir->t)) - ir->r = RID_INIT; /* No inheritance for modified BASE register. */ - if (r != RID_BASE) - emit_mr(as, r, RID_BASE); - } -} - -/* Coalesce BASE register for a side trace. */ -static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) -{ - IRIns *ir = IR(REF_BASE); - Reg r = ir->r; - if (ra_hasreg(r)) { - ra_free(as, r); - if (rset_test(as->modset, r) || irt_ismarked(ir->t)) - ir->r = RID_INIT; /* No inheritance for modified BASE register. */ - if (irp->r == r) { - rset_clear(allow, r); /* Mark same BASE register as coalesced. */ - } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { - rset_clear(allow, irp->r); - emit_mr(as, r, irp->r); /* Move from coalesced parent reg. */ - } else { - emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ - } - } - return allow; -} - -/* -- Tail of trace ------------------------------------------------------- */ - -/* Fixup the tail code. */ -static void asm_tail_fixup(ASMState *as, TraceNo lnk) -{ - MCode *p = as->mctop; - MCode *target; - int32_t spadj = as->T->spadjust; - if (spadj == 0) { - *--p = PPCI_NOP; - *--p = PPCI_NOP; - as->mctop = p; - } else { - /* Patch stack adjustment. */ - lua_assert(checki16(CFRAME_SIZE+spadj)); - p[-3] = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj); - p[-2] = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj; - } - /* Patch exit branch. */ - target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; - p[-1] = PPCI_B|(((target-p+1)&0x00ffffffu)<<2); -} - -/* Prepare tail of code. */ -static void asm_tail_prep(ASMState *as) -{ - MCode *p = as->mctop - 1; /* Leave room for exit branch. */ - if (as->loopref) { - as->invmcp = as->mcp = p; - } else { - as->mcp = p-2; /* Leave room for stack pointer adjustment. */ - as->invmcp = NULL; - } -} - -/* -- Trace setup --------------------------------------------------------- */ - -/* Ensure there are enough stack slots for call arguments. */ -static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) -{ - IRRef args[CCI_NARGS_MAX*2]; - uint32_t i, nargs = CCI_XNARGS(ci); - int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; - asm_collectargs(as, ir, ci, args); - for (i = 0; i < nargs; i++) - if (args[i] && irt_isfp(IR(args[i])->t)) { - if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1; - } else { - if (ngpr > 0) ngpr--; else nslots++; - } - if (nslots > as->evenspill) /* Leave room for args in stack slots. */ - as->evenspill = nslots; - return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET); -} - -static void asm_setup_target(ASMState *as) -{ - asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0)); -} - -/* -- Trace patching ------------------------------------------------------ */ - -/* Patch exit jumps of existing machine code to a new target. */ -void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) -{ - MCode *p = T->mcode; - MCode *pe = (MCode *)((char *)p + T->szmcode); - MCode *px = exitstub_trace_addr(T, exitno); - MCode *cstart = NULL; - MCode *mcarea = lj_mcode_patch(J, p, 0); - int clearso = 0; - for (; p < pe; p++) { - /* Look for exitstub branch, try to replace with branch to target. */ - uint32_t ins = *p; - if ((ins & 0xfc000000u) == 0x40000000u && - ((ins ^ ((char *)px-(char *)p)) & 0xffffu) == 0) { - ptrdiff_t delta = (char *)target - (char *)p; - if (((ins >> 16) & 3) == (CC_SO&3)) { - clearso = sizeof(MCode); - delta -= sizeof(MCode); - } - /* Many, but not all short-range branches can be patched directly. */ - if (((delta + 0x8000) >> 16) == 0) { - *p = (ins & 0xffdf0000u) | ((uint32_t)delta & 0xffffu) | - ((delta & 0x8000) * (PPCF_Y/0x8000)); - if (!cstart) cstart = p; - } - } else if ((ins & 0xfc000000u) == PPCI_B && - ((ins ^ ((char *)px-(char *)p)) & 0x03ffffffu) == 0) { - ptrdiff_t delta = (char *)target - (char *)p; - lua_assert(((delta + 0x02000000) >> 26) == 0); - *p = PPCI_B | ((uint32_t)delta & 0x03ffffffu); - if (!cstart) cstart = p; - } - } - { /* Always patch long-range branch in exit stub itself. */ - ptrdiff_t delta = (char *)target - (char *)px - clearso; - lua_assert(((delta + 0x02000000) >> 26) == 0); - *px = PPCI_B | ((uint32_t)delta & 0x03ffffffu); - } - if (!cstart) cstart = px; - lj_mcode_sync(cstart, px+1); - if (clearso) { /* Extend the current trace. Ugly workaround. */ - MCode *pp = J->cur.mcode; - J->cur.szmcode += sizeof(MCode); - *--pp = PPCI_MCRXR; /* Clear SO flag. */ - J->cur.mcode = pp; - lj_mcode_sync(pp, pp+1); - } - lj_mcode_patch(J, mcarea, 1); -} - diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index fceb187758..534102389d 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -21,14 +21,6 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) } /* Push the high byte of the exitno for each exit stub group. */ *mxp++ = XI_PUSHi8; *mxp++ = (MCode)((group*EXITSTUBS_PER_GROUP)>>8); -#if !LJ_GC64 - /* Store DISPATCH at original stack slot 0. Account for the two push ops. */ - *mxp++ = XI_MOVmi; - *mxp++ = MODRM(XM_OFS8, 0, RID_ESP); - *mxp++ = MODRM(XM_SCALE1, RID_ESP, RID_ESP); - *mxp++ = 2*sizeof(void *); - *(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4; -#endif /* Jump to exit handler which fills in the ExitState. */ *mxp++ = XI_JMP; mxp += 4; *((int32_t *)(mxp-4)) = jmprel(mxp, (MCode *)(void *)lj_vm_exit_handler); @@ -85,7 +77,6 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k) { if (irref_isk(ref)) { IRIns *ir = IR(ref); -#if LJ_GC64 if (ir->o == IR_KNULL || !irt_is64(ir->t)) { *k = ir->i; return 1; @@ -93,15 +84,6 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k) *k = (int32_t)ir_k64(ir)->u64; return 1; } -#else - if (ir->o != IR_KINT64) { - *k = ir->i; - return 1; - } else if (checki32((int64_t)ir_kint64(ir)->u64)) { - *k = (int32_t)ir_kint64(ir)->u64; - return 1; - } -#endif } return 0; } @@ -201,7 +183,6 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow) if (irref_isk(ir->op1)) { GCfunc *fn = ir_kfunc(IR(ir->op1)); GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv; -#if LJ_GC64 int64_t ofs = dispofs(as, &uv->tv); if (checki32(ofs) && checki32(ofs+4)) { as->mrm.ofs = (int32_t)ofs; @@ -209,11 +190,6 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow) as->mrm.idx = RID_NONE; return; } -#else - as->mrm.ofs = ptr2addr(&uv->tv); - as->mrm.base = as->mrm.idx = RID_NONE; - return; -#endif } break; default: @@ -233,19 +209,13 @@ static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow) lua_assert(ir->o == IR_FLOAD || ir->o == IR_FREF); as->mrm.idx = RID_NONE; if (ir->op1 == REF_NIL) { -#if LJ_GC64 as->mrm.ofs = (int32_t)(ir->op2 << 2) - GG_OFS(dispatch); as->mrm.base = RID_DISPATCH; -#else - as->mrm.ofs = (int32_t)(ir->op2 << 2) + ptr2addr(J2GG(as->J)); - as->mrm.base = RID_NONE; -#endif return; } as->mrm.ofs = field_ofs[ir->op2]; if (irref_isk(ir->op1)) { IRIns *op1 = IR(ir->op1); -#if LJ_GC64 if (ir->op1 == REF_NIL) { as->mrm.ofs -= GG_OFS(dispatch); as->mrm.base = RID_DISPATCH; @@ -258,11 +228,6 @@ static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow) return; } } -#else - as->mrm.ofs += op1->i; - as->mrm.base = RID_NONE; - return; -#endif } as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow); } @@ -307,7 +272,6 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow) IRIns *ir = IR(ref); as->mrm.idx = RID_NONE; if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { -#if LJ_GC64 intptr_t ofs = dispofs(as, ir_kptr(ir)); if (checki32(ofs)) { as->mrm.ofs = (int32_t)ofs; @@ -315,12 +279,6 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow) return; } } if (0) { -#else - as->mrm.ofs = ir->i; - as->mrm.base = RID_NONE; - } else if (ir->o == IR_STRREF) { - asm_fusestrref(as, ir, allow); -#endif } else { as->mrm.ofs = 0; if (canfuse(as, ir) && ir->o == IR_ADD && ra_noreg(ir->r)) { @@ -370,7 +328,6 @@ static Reg asm_fuseloadk64(ASMState *as, IRIns *ir) if (!LJ_GC64 || checki32((intptr_t)k)) { as->mrm.ofs = ptr2addr(k); as->mrm.base = RID_NONE; -#if LJ_GC64 } else if (checki32(dispofs(as, k))) { as->mrm.ofs = (int32_t)dispofs(as, k); as->mrm.base = RID_DISPATCH; @@ -390,7 +347,6 @@ static Reg asm_fuseloadk64(ASMState *as, IRIns *ir) } as->mrm.ofs = (int32_t)mcpofs(as, as->mctop - ir->i); as->mrm.base = RID_RIP; -#endif } as->mrm.idx = RID_NONE; return RID_MRM; @@ -427,13 +383,8 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) lua_assert(allow != RSET_EMPTY); if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */ if (ref == REF_BASE) { -#if LJ_GC64 as->mrm.ofs = (int32_t)dispofs(as, &J2G(as->J)->jit_base); as->mrm.base = RID_DISPATCH; -#else - as->mrm.ofs = ptr2addr(&J2G(as->J)->jit_base); - as->mrm.base = RID_NONE; -#endif as->mrm.idx = RID_NONE; return RID_MRM; } else { @@ -489,7 +440,6 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) return ra_allocref(as, ref, allow); } -#if LJ_64 /* Don't fuse a 32 bit load into a 64 bit operation. */ static Reg asm_fuseloadm(ASMState *as, IRRef ref, RegSet allow, int is64) { @@ -497,9 +447,6 @@ static Reg asm_fuseloadm(ASMState *as, IRRef ref, RegSet allow, int is64) return ra_alloc1(as, ref, allow); return asm_fuseload(as, ref, allow); } -#else -#define asm_fuseloadm(as, ref, allow, is64) asm_fuseload(as, (ref), (allow)) -#endif /* -- Calls --------------------------------------------------------------- */ @@ -508,7 +455,6 @@ static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args) { uint32_t i, nargs = CCI_XNARGS(ci); int nslots = 0; -#if LJ_64 if (LJ_ABI_WIN) { nslots = (int)(nargs*2); /* Only matters for more than four args. */ } else { @@ -520,19 +466,6 @@ static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args) if (ngpr > 0) ngpr--; else nslots += 2; } } -#else - int ngpr = 0; - if ((ci->flags & CCI_CC_MASK) == CCI_CC_FASTCALL) - ngpr = 2; - else if ((ci->flags & CCI_CC_MASK) == CCI_CC_THISCALL) - ngpr = 1; - for (i = 0; i < nargs; i++) - if (args[i] && irt_isfp(IR(args[i])->t)) { - nslots += irt_isnum(IR(args[i])->t) ? 2 : 1; - } else { - if (ngpr > 0) ngpr--; else nslots++; - } -#endif return nslots; } @@ -541,68 +474,30 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) { uint32_t n, nargs = CCI_XNARGS(ci); int32_t ofs = STACKARG_OFS; -#if LJ_64 uint32_t gprs = REGARG_GPRS; Reg fpr = REGARG_FIRSTFPR; -#if !LJ_ABI_WIN MCode *patchnfpr = NULL; -#endif -#else - uint32_t gprs = 0; - if ((ci->flags & CCI_CC_MASK) != CCI_CC_CDECL) { - if ((ci->flags & CCI_CC_MASK) == CCI_CC_THISCALL) - gprs = (REGARG_GPRS & 31); - else if ((ci->flags & CCI_CC_MASK) == CCI_CC_FASTCALL) - gprs = REGARG_GPRS; - } -#endif if ((void *)ci->func) emit_call(as, ci->func); -#if LJ_64 if ((ci->flags & CCI_VARARG)) { /* Special handling for vararg calls. */ -#if LJ_ABI_WIN - for (n = 0; n < 4 && n < nargs; n++) { - IRIns *ir = IR(args[n]); - if (irt_isfp(ir->t)) /* Duplicate FPRs in GPRs. */ - emit_rr(as, XO_MOVDto, (irt_isnum(ir->t) ? REX_64 : 0) | (fpr+n), - ((gprs >> (n*5)) & 31)); /* Either MOVD or MOVQ. */ - } -#else patchnfpr = --as->mcp; /* Indicate number of used FPRs in register al. */ *--as->mcp = XI_MOVrib | RID_EAX; -#endif } -#endif for (n = 0; n < nargs; n++) { /* Setup args. */ IRRef ref = args[n]; IRIns *ir = IR(ref); Reg r; -#if LJ_64 && LJ_ABI_WIN - /* Windows/x64 argument registers are strictly positional. */ - r = irt_isfp(ir->t) ? (fpr <= REGARG_LASTFPR ? fpr : 0) : (gprs & 31); - fpr++; gprs >>= 5; -#elif LJ_64 /* POSIX/x64 argument registers are used in order of appearance. */ if (irt_isfp(ir->t)) { r = fpr <= REGARG_LASTFPR ? fpr++ : 0; } else { r = gprs & 31; gprs >>= 5; } -#else - if (ref && irt_isfp(ir->t)) { - r = 0; - } else { - r = gprs & 31; gprs >>= 5; - if (!ref) continue; - } -#endif if (r) { /* Argument is in a register. */ if (r < RID_MAX_GPR && ref < ASMREF_TMP1) { -#if LJ_64 if (LJ_GC64 ? !(ir->o == IR_KINT || ir->o == IR_KNULL) : ir->o == IR_KINT64) emit_loadu64(as, r, ir_k64(ir)->u64); else -#endif emit_loadi(as, r, ir->i); } else { lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */ @@ -636,9 +531,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) } checkmclim(as); } -#if LJ_64 && !LJ_ABI_WIN if (patchnfpr) *patchnfpr = fpr - REGARG_FIRSTFPR; -#endif } /* Setup result reg/sp for call. Evict scratch regs. */ @@ -656,7 +549,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) if (ra_used(ir)) { if (irt_isfp(ir->t)) { int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ -#if LJ_64 if ((ci->flags & CCI_CASTU64)) { Reg dest = ir->r; if (ra_hasreg(dest)) { @@ -668,27 +560,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) } else { ra_destreg(as, ir, RID_FPRET); } -#else - /* Number result is in x87 st0 for x86 calling convention. */ - Reg dest = ir->r; - if (ra_hasreg(dest)) { - ra_free(as, dest); - ra_modified(as, dest); - emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, - dest, RID_ESP, ofs); - } - if ((ci->flags & CCI_CASTU64)) { - emit_movtomro(as, RID_RETLO, RID_ESP, ofs); - emit_movtomro(as, RID_RETHI, RID_ESP, ofs+4); - } else { - emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd, - irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); - } -#endif -#if LJ_32 - } else if (hiop) { - ra_destpair(as, ir); -#endif } else { lua_assert(!irt_ispri(ir->t)); ra_destreg(as, ir, RID_RET); @@ -701,11 +572,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) /* Return a constant function pointer or NULL for indirect calls. */ static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func) { -#if LJ_32 - UNUSED(as); - if (irref_isk(func)) - return (void *)irf->i; -#else if (irref_isk(func)) { MCode *p; if (irf->o == IR_KINT64) @@ -716,7 +582,6 @@ static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func) return p; /* Call target is still in +-2GB range. */ /* Avoid the indirect case of emit_call(). Try to hoist func addr. */ } -#endif return NULL; } @@ -730,11 +595,6 @@ static void asm_callx(ASMState *as, IRIns *ir) ci.flags = asm_callx_flags(as, ir); asm_collectargs(as, ir, &ci, args); asm_setupresult(as, ir, &ci); -#if LJ_32 - /* Have to readjust stack after non-cdecl calls due to callee cleanup. */ - if ((ci.flags & CCI_CC_MASK) != CCI_CC_CDECL) - spadj = 4 * asm_count_call_slots(as, &ci, args); -#endif func = ir->op2; irf = IR(func); if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } ci.func = (ASMFunction)asm_callx_func(as, irf, func); @@ -756,9 +616,7 @@ static void asm_callx(ASMState *as, IRIns *ir) static void asm_retf(ASMState *as, IRIns *ir) { Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); -#if LJ_FR2 Reg rpc = ra_scratch(as, rset_exclude(RSET_GPR, base)); -#endif void *pc = ir_kptr(IR(ir->op2)); int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); as->topslot -= (BCReg)delta; @@ -767,12 +625,8 @@ static void asm_retf(ASMState *as, IRIns *ir) emit_setgl(as, base, jit_base); emit_addptr(as, base, -8*delta); asm_guardcc(as, CC_NE); -#if LJ_FR2 emit_rmro(as, XO_CMP, rpc|REX_GC64, base, -8); emit_loadu64(as, rpc, u64ptr(pc)); -#else - emit_gmroi(as, XG_ARITHi(XOg_CMP), base, -4, ptr2addr(pc)); -#endif } /* -- Type conversions ---------------------------------------------------- */ @@ -928,96 +782,6 @@ static void asm_conv(ASMState *as, IRIns *ir) } } -#if LJ_32 && LJ_HASFFI -/* No SSE conversions to/from 64 bit on x86, so resort to ugly x87 code. */ - -/* 64 bit integer to FP conversion in 32 bit mode. */ -static void asm_conv_fp_int64(ASMState *as, IRIns *ir) -{ - Reg hi = ra_alloc1(as, ir->op1, RSET_GPR); - Reg lo = ra_alloc1(as, (ir-1)->op1, rset_exclude(RSET_GPR, hi)); - int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ - Reg dest = ir->r; - if (ra_hasreg(dest)) { - ra_free(as, dest); - ra_modified(as, dest); - emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, dest, RID_ESP, ofs); - } - emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd, - irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); - if (((ir-1)->op2 & IRCONV_SRCMASK) == IRT_U64) { - /* For inputs in [2^63,2^64-1] add 2^64 to compensate. */ - MCLabel l_end = emit_label(as); - emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_2P64]); - emit_sjcc(as, CC_NS, l_end); - emit_rr(as, XO_TEST, hi, hi); /* Check if u64 >= 2^63. */ - } else { - lua_assert(((ir-1)->op2 & IRCONV_SRCMASK) == IRT_I64); - } - emit_rmro(as, XO_FILDq, XOg_FILDq, RID_ESP, 0); - /* NYI: Avoid narrow-to-wide store-to-load forwarding stall. */ - emit_rmro(as, XO_MOVto, hi, RID_ESP, 4); - emit_rmro(as, XO_MOVto, lo, RID_ESP, 0); -} - -/* FP to 64 bit integer conversion in 32 bit mode. */ -static void asm_conv_int64_fp(ASMState *as, IRIns *ir) -{ - IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); - IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); - Reg lo, hi; - lua_assert(st == IRT_NUM || st == IRT_FLOAT); - lua_assert(dt == IRT_I64 || dt == IRT_U64); - hi = ra_dest(as, ir, RSET_GPR); - lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi)); - if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0); - /* NYI: Avoid wide-to-narrow store-to-load forwarding stall. */ - if (!(as->flags & JIT_F_SSE3)) { /* Set FPU rounding mode to default. */ - emit_rmro(as, XO_FLDCW, XOg_FLDCW, RID_ESP, 4); - emit_rmro(as, XO_MOVto, lo, RID_ESP, 4); - emit_gri(as, XG_ARITHi(XOg_AND), lo, 0xf3ff); - } - if (dt == IRT_U64) { - /* For inputs in [2^63,2^64-1] add -2^64 and convert again. */ - MCLabel l_pop, l_end = emit_label(as); - emit_x87op(as, XI_FPOP); - l_pop = emit_label(as); - emit_sjmp(as, l_end); - emit_rmro(as, XO_MOV, hi, RID_ESP, 4); - if ((as->flags & JIT_F_SSE3)) - emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); - else - emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0); - emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_M2P64]); - emit_sjcc(as, CC_NS, l_pop); - emit_rr(as, XO_TEST, hi, hi); /* Check if out-of-range (2^63). */ - } - emit_rmro(as, XO_MOV, hi, RID_ESP, 4); - if ((as->flags & JIT_F_SSE3)) { /* Truncation is easy with SSE3. */ - emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); - } else { /* Otherwise set FPU rounding mode to truncate before the store. */ - emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0); - emit_rmro(as, XO_FLDCW, XOg_FLDCW, RID_ESP, 0); - emit_rmro(as, XO_MOVtow, lo, RID_ESP, 0); - emit_rmro(as, XO_ARITHw(XOg_OR), lo, RID_ESP, 0); - emit_loadi(as, lo, 0xc00); - emit_rmro(as, XO_FNSTCW, XOg_FNSTCW, RID_ESP, 0); - } - if (dt == IRT_U64) - emit_x87op(as, XI_FDUP); - emit_mrm(as, st == IRT_NUM ? XO_FLDq : XO_FLDd, - st == IRT_NUM ? XOg_FLDq: XOg_FLDd, - asm_fuseload(as, ir->op1, RSET_EMPTY)); -} - -static void asm_conv64(ASMState *as, IRIns *ir) -{ - if (irt_isfp(ir->t)) - asm_conv_fp_int64(as, ir); - else - asm_conv_int64_fp(as, ir); -} -#endif static void asm_strto(ASMState *as, IRIns *ir) { @@ -1052,7 +816,6 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) emit_rmro(as, XO_LEA, dest|REX_64, RID_ESP, ra_spill(as, ir)); } else { /* Otherwise use g->tmptv to hold the TValue. */ -#if LJ_GC64 if (irref_isk(ref)) { TValue k; lj_ir_kvalue(as->J->L, &k, ir); @@ -1070,16 +833,6 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) } emit_movtomro(as, REX_64IR(ir, src), dest, 0); } -#else - if (!irref_isk(ref)) { - Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest)); - emit_movtomro(as, REX_64IR(ir, src), dest, 0); - } else if (!irt_ispri(ir->t)) { - emit_movmroi(as, dest, 0, ir->i); - } - if (!(LJ_64 && irt_islightud(ir->t))) - emit_movmroi(as, dest, 4, irt_toitype(ir->t)); -#endif emit_loada(as, dest, &J2G(as->J)->tmptv); } } @@ -1153,19 +906,9 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n)); emit_sjcc(as, CC_AE, l_next); /* The type check avoids NaN penalties and complaints from Valgrind. */ -#if LJ_64 && !LJ_GC64 - emit_u32(as, LJ_TISNUM); - emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it)); -#else emit_i8(as, LJ_TISNUM); emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); -#endif } -#if LJ_64 && !LJ_GC64 - } else if (irt_islightud(kt)) { - emit_rmro(as, XO_CMP, key|REX_64, dest, offsetof(Node, key.u64)); -#endif -#if LJ_GC64 } else if (irt_isaddr(kt)) { if (isk) { TValue k; @@ -1182,30 +925,13 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) lua_assert(irt_ispri(kt) && !irt_isnil(kt)); emit_u32(as, (irt_toitype(kt)<<15)|0x7fff); emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it)); -#else - } else { - if (!irt_ispri(kt)) { - lua_assert(irt_isaddr(kt)); - if (isk) - emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.gcr), - ptr2addr(ir_kgc(irkey))); - else - emit_rmro(as, XO_CMP, key, dest, offsetof(Node, key.gcr)); - emit_sjcc(as, CC_NE, l_next); - } - lua_assert(!irt_isnil(kt)); - emit_i8(as, irt_toitype(kt)); - emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); -#endif } emit_sfixup(as, l_loop); checkmclim(as); -#if LJ_GC64 if (!isk && irt_isaddr(kt)) { emit_rr(as, XO_OR, tmp|REX_64, key); emit_loadu64(as, tmp, (uint64_t)irt_toitype(kt) << 47); } -#endif /* Load main position relative to tab->node into dest. */ khash = isk ? ir_khash(irkey) : 1; @@ -1237,17 +963,11 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_rr(as, XO_ARITH(XOg_XOR), tmp, dest); if (irt_isnum(kt)) { emit_rr(as, XO_ARITH(XOg_ADD), dest, dest); -#if LJ_64 emit_shifti(as, XOg_SHR|REX_64, dest, 32); emit_rr(as, XO_MOV, tmp, dest); emit_rr(as, XO_MOVDto, key|REX_64, dest); -#else - emit_rmro(as, XO_MOV, dest, RID_ESP, ra_spill(as, irkey)+4); - emit_rr(as, XO_MOVDto, key, tmp); -#endif } else { emit_rr(as, XO_MOV, tmp, key); -#if LJ_GC64 checkmclim(as); emit_gri(as, XG_ARITHi(XOg_XOR), dest, irt_toitype(kt) << 15); if ((as->flags & JIT_F_BMI2)) { @@ -1257,9 +977,6 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_shifti(as, XOg_SHR|REX_64, dest, 32); emit_rr(as, XO_MOV, dest|REX_64, key|REX_64); } -#else - emit_rmro(as, XO_LEA, dest, key, HASH_BIAS); -#endif } } } @@ -1272,9 +989,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir) int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); Reg dest = ra_used(ir) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; Reg node = ra_alloc1(as, ir->op1, RSET_GPR); -#if !LJ_64 - MCLabel l_exit; -#endif lua_assert(ofs % sizeof(Node) == 0); if (ra_hasreg(dest)) { if (ofs != 0) { @@ -1287,7 +1001,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir) } } asm_guardcc(as, CC_NE); -#if LJ_64 if (!irt_ispri(irkey->t)) { Reg key = ra_scratch(as, rset_exclude(RSET_GPR, node)); emit_rmro(as, XO_CMP, key|REX_64, node, @@ -1295,50 +1008,14 @@ static void asm_hrefk(ASMState *as, IRIns *ir) lua_assert(irt_isnum(irkey->t) || irt_isgcv(irkey->t)); /* Assumes -0.0 is already canonicalized to +0.0. */ emit_loadu64(as, key, irt_isnum(irkey->t) ? ir_knum(irkey)->u64 : -#if LJ_GC64 ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey)); -#else - ((uint64_t)irt_toitype(irkey->t) << 32) | - (uint64_t)(uint32_t)ptr2addr(ir_kgc(irkey))); -#endif } else { lua_assert(!irt_isnil(irkey->t)); -#if LJ_GC64 emit_i32(as, (irt_toitype(irkey->t)<<15)|0x7fff); emit_rmro(as, XO_ARITHi, XOg_CMP, node, ofs + (int32_t)offsetof(Node, key.it)); -#else - emit_i8(as, irt_toitype(irkey->t)); - emit_rmro(as, XO_ARITHi8, XOg_CMP, node, - ofs + (int32_t)offsetof(Node, key.it)); -#endif } -#else - l_exit = emit_label(as); - if (irt_isnum(irkey->t)) { - /* Assumes -0.0 is already canonicalized to +0.0. */ - emit_gmroi(as, XG_ARITHi(XOg_CMP), node, - ofs + (int32_t)offsetof(Node, key.u32.lo), - (int32_t)ir_knum(irkey)->u32.lo); - emit_sjcc(as, CC_NE, l_exit); - emit_gmroi(as, XG_ARITHi(XOg_CMP), node, - ofs + (int32_t)offsetof(Node, key.u32.hi), - (int32_t)ir_knum(irkey)->u32.hi); - } else { - if (!irt_ispri(irkey->t)) { - lua_assert(irt_isgcv(irkey->t)); - emit_gmroi(as, XG_ARITHi(XOg_CMP), node, - ofs + (int32_t)offsetof(Node, key.gcr), - ptr2addr(ir_kgc(irkey))); - emit_sjcc(as, CC_NE, l_exit); - } - lua_assert(!irt_isnil(irkey->t)); - emit_i8(as, irt_toitype(irkey->t)); - emit_rmro(as, XO_ARITHi8, XOg_CMP, node, - ofs + (int32_t)offsetof(Node, key.it)); - } -#endif } static void asm_uref(ASMState *as, IRIns *ir) @@ -1450,9 +1127,6 @@ static void asm_fxstore(ASMState *as, IRIns *ir) case IRT_I16: case IRT_U16: xo = XO_MOVtow; break; case IRT_NUM: xo = XO_MOVSDto; break; case IRT_FLOAT: xo = XO_MOVSSto; break; -#if LJ_64 && !LJ_GC64 - case IRT_LIGHTUD: lua_assert(0); /* NYI: mask 64 bit lightuserdata. */ -#endif default: if (LJ_64 && irt_is64(ir->t)) src |= REX_64; @@ -1482,48 +1156,16 @@ static void asm_fxstore(ASMState *as, IRIns *ir) #define asm_fstore(as, ir) asm_fxstore(as, ir) #define asm_xstore(as, ir) asm_fxstore(as, ir) -#if LJ_64 && !LJ_GC64 -static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) -{ - if (ra_used(ir) || typecheck) { - Reg dest = ra_dest(as, ir, RSET_GPR); - if (typecheck) { - Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, dest)); - asm_guardcc(as, CC_NE); - emit_i8(as, -2); - emit_rr(as, XO_ARITHi8, XOg_CMP, tmp); - emit_shifti(as, XOg_SAR|REX_64, tmp, 47); - emit_rr(as, XO_MOV, tmp|REX_64, dest); - } - return dest; - } else { - return RID_NONE; - } -} -#endif static void asm_ahuvload(ASMState *as, IRIns *ir) { -#if LJ_GC64 Reg tmp = RID_NONE; -#endif lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isint(ir->t))); -#if LJ_64 && !LJ_GC64 - if (irt_islightud(ir->t)) { - Reg dest = asm_load_lightud64(as, ir, 1); - if (ra_hasreg(dest)) { - asm_fuseahuref(as, ir->op1, RSET_GPR); - emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM); - } - return; - } else -#endif if (ra_used(ir)) { RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; Reg dest = ra_dest(as, ir, allow); asm_fuseahuref(as, ir->op1, RSET_GPR); -#if LJ_GC64 if (irt_isaddr(ir->t)) { emit_shifti(as, XOg_SHR|REX_64, dest, 17); asm_guardcc(as, CC_NE); @@ -1539,16 +1181,13 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) } return; } else -#endif emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM); } else { RegSet gpr = RSET_GPR; -#if LJ_GC64 if (irt_isaddr(ir->t)) { tmp = ra_scratch(as, RSET_GPR); gpr = rset_exclude(gpr, tmp); } -#endif asm_fuseahuref(as, ir->op1, gpr); } /* Always do the type check, even if the load result is unused. */ @@ -1556,13 +1195,8 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE); if (LJ_64 && irt_type(ir->t) >= IRT_NUM) { lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t)); -#if LJ_GC64 emit_u32(as, LJ_TISNUM << 15); -#else - emit_u32(as, LJ_TISNUM); -#endif emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM); -#if LJ_GC64 } else if (irt_isaddr(ir->t)) { as->mrm.ofs -= 4; emit_i8(as, irt_toitype(ir->t)); @@ -1576,11 +1210,6 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) } else { emit_u32(as, (irt_toitype(ir->t) << 15) | 0x7fff); emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM); -#else - } else { - emit_i8(as, irt_toitype(ir->t)); - emit_mrm(as, XO_ARITHi8, XOg_CMP, RID_MRM); -#endif } } @@ -1592,13 +1221,6 @@ static void asm_ahustore(ASMState *as, IRIns *ir) Reg src = ra_alloc1(as, ir->op2, RSET_FPR); asm_fuseahuref(as, ir->op1, RSET_GPR); emit_mrm(as, XO_MOVSDto, src, RID_MRM); -#if LJ_64 && !LJ_GC64 - } else if (irt_islightud(ir->t)) { - Reg src = ra_alloc1(as, ir->op2, RSET_GPR); - asm_fuseahuref(as, ir->op1, rset_exclude(RSET_GPR, src)); - emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM); -#endif -#if LJ_GC64 } else if (irref_isk(ir->op2)) { TValue k; lj_ir_kvalue(as->J->L, &k, IR(ir->op2)); @@ -1613,7 +1235,6 @@ static void asm_ahustore(ASMState *as, IRIns *ir) emit_u32(as, k.u32.hi); emit_mrm(as, XO_MOVmi, 0, RID_MRM); } -#endif } else { IRIns *irr = IR(ir->op2); RegSet allow = RSET_GPR; @@ -1624,7 +1245,6 @@ static void asm_ahustore(ASMState *as, IRIns *ir) } asm_fuseahuref(as, ir->op1, allow); if (ra_hasreg(src)) { -#if LJ_GC64 if (!(LJ_DUALNUM && irt_isinteger(ir->t))) { /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */ as->mrm.ofs += 4; @@ -1634,7 +1254,6 @@ static void asm_ahustore(ASMState *as, IRIns *ir) emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM); return; } -#endif emit_mrm(as, XO_MOVto, src, RID_MRM); } else if (!irt_ispri(irr->t)) { lua_assert(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t))); @@ -1642,12 +1261,8 @@ static void asm_ahustore(ASMState *as, IRIns *ir) emit_mrm(as, XO_MOVmi, 0, RID_MRM); } as->mrm.ofs += 4; -#if LJ_GC64 lua_assert(LJ_DUALNUM && irt_isinteger(ir->t)); emit_i32(as, LJ_TNUMX << 15); -#else - emit_i32(as, (int32_t)irt_toitype(ir->t)); -#endif emit_mrm(as, XO_MOVmi, 0, RID_MRM); } } @@ -1668,15 +1283,6 @@ static void asm_sload(ASMState *as, IRIns *ir) base = ra_alloc1(as, REF_BASE, RSET_GPR); emit_rmro(as, XO_MOVSD, left, base, ofs); t.irt = IRT_NUM; /* Continue with a regular number type check. */ -#if LJ_64 && !LJ_GC64 - } else if (irt_islightud(t)) { - Reg dest = asm_load_lightud64(as, ir, (ir->op2 & IRSLOAD_TYPECHECK)); - if (ra_hasreg(dest)) { - base = ra_alloc1(as, REF_BASE, RSET_GPR); - emit_rmro(as, XO_MOV, dest|REX_64, base, ofs); - } - return; -#endif } else if (ra_used(ir)) { RegSet allow = irt_isnum(t) ? RSET_FPR : RSET_GPR; Reg dest = ra_dest(as, ir, allow); @@ -1686,7 +1292,6 @@ static void asm_sload(ASMState *as, IRIns *ir) t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs); } else { -#if LJ_GC64 if (irt_isaddr(t)) { /* LJ_GC64 type check + tag removal without BMI2 and with BMI2: ** @@ -1715,7 +1320,6 @@ static void asm_sload(ASMState *as, IRIns *ir) } return; } else -#endif emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs); } } else { @@ -1728,13 +1332,8 @@ static void asm_sload(ASMState *as, IRIns *ir) asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE); if (LJ_64 && irt_type(t) >= IRT_NUM) { lua_assert(irt_isinteger(t) || irt_isnum(t)); -#if LJ_GC64 emit_u32(as, LJ_TISNUM << 15); -#else - emit_u32(as, LJ_TISNUM); -#endif emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4); -#if LJ_GC64 } else if (irt_isnil(t)) { /* LJ_GC64 type check for nil: ** @@ -1759,18 +1358,12 @@ static void asm_sload(ASMState *as, IRIns *ir) emit_rr(as, XO_ARITHi8, XOg_CMP, tmp); emit_shifti(as, XOg_SAR|REX_64, tmp, 47); emit_rmro(as, XO_MOV, tmp|REX_64, base, ofs+4); -#else - } else { - emit_i8(as, irt_toitype(t)); - emit_rmro(as, XO_ARITHi8, XOg_CMP, base, ofs+4); -#endif } } } /* -- Allocations --------------------------------------------------------- */ -#if LJ_HASFFI static void asm_cnew(ASMState *as, IRIns *ir) { CTState *cts = ctype_ctsG(J2G(as->J)); @@ -1787,7 +1380,6 @@ static void asm_cnew(ASMState *as, IRIns *ir) /* Initialize immutable cdata object. */ if (ir->o == IR_CNEWI) { RegSet allow = (RSET_GPR & ~RSET_SCRATCH); -#if LJ_64 Reg r64 = sz == 8 ? REX_64 : 0; if (irref_isk(ir->op2)) { IRIns *irk = IR(ir->op2); @@ -1804,24 +1396,6 @@ static void asm_cnew(ASMState *as, IRIns *ir) Reg r = ra_alloc1(as, ir->op2, allow); emit_movtomro(as, r + r64, RID_RET, sizeof(GCcdata)); } -#else - int32_t ofs = sizeof(GCcdata); - if (sz == 8) { - ofs += 4; ir++; - lua_assert(ir->o == IR_HIOP); - } - do { - if (irref_isk(ir->op2)) { - emit_movmroi(as, RID_RET, ofs, IR(ir->op2)->i); - } else { - Reg r = ra_alloc1(as, ir->op2, allow); - emit_movtomro(as, r, RID_RET, ofs); - rset_clear(allow, r); - } - if (ofs == sizeof(GCcdata)) break; - ofs -= 4; ir--; - } while (1); -#endif lua_assert(sz == 4 || sz == 8); } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; @@ -1846,9 +1420,6 @@ static void asm_cnew(ASMState *as, IRIns *ir) asm_gencall(as, ci, args); emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata))); } -#else -#define asm_cnew(as, ir) ((void)0) -#endif /* -- Write barriers ------------------------------------------------------ */ @@ -1993,12 +1564,10 @@ static void asm_fppowi(ASMState *as, IRIns *ir) static void asm_pow(ASMState *as, IRIns *ir) { -#if LJ_64 && LJ_HASFFI if (!irt_isnum(ir->t)) asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : IRCALL_lj_carith_powu64); else -#endif asm_fppowi(as, ir); } @@ -2200,23 +1769,19 @@ static void asm_mul(ASMState *as, IRIns *ir) static void asm_div(ASMState *as, IRIns *ir) { -#if LJ_64 && LJ_HASFFI if (!irt_isnum(ir->t)) asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : IRCALL_lj_carith_divu64); else -#endif asm_fparith(as, ir, XO_DIVSD); } static void asm_mod(ASMState *as, IRIns *ir) { -#if LJ_64 && LJ_HASFFI if (!irt_isint(ir->t)) asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : IRCALL_lj_carith_modu64); else -#endif asm_callid(as, ir, IRCALL_lj_vm_modi); } @@ -2529,137 +2094,13 @@ static void asm_comp(ASMState *as, IRIns *ir) #define asm_equal(as, ir) asm_comp(as, ir) -#if LJ_32 && LJ_HASFFI -/* 64 bit integer comparisons in 32 bit mode. */ -static void asm_comp_int64(ASMState *as, IRIns *ir) -{ - uint32_t cc = asm_compmap[(ir-1)->o]; - RegSet allow = RSET_GPR; - Reg lefthi = RID_NONE, leftlo = RID_NONE; - Reg righthi = RID_NONE, rightlo = RID_NONE; - MCLabel l_around; - x86ModRM mrm; - - as->curins--; /* Skip loword ins. Avoids failing in noconflict(), too. */ - - /* Allocate/fuse hiword operands. */ - if (irref_isk(ir->op2)) { - lefthi = asm_fuseload(as, ir->op1, allow); - } else { - lefthi = ra_alloc1(as, ir->op1, allow); - rset_clear(allow, lefthi); - righthi = asm_fuseload(as, ir->op2, allow); - if (righthi == RID_MRM) { - if (as->mrm.base != RID_NONE) rset_clear(allow, as->mrm.base); - if (as->mrm.idx != RID_NONE) rset_clear(allow, as->mrm.idx); - } else { - rset_clear(allow, righthi); - } - } - mrm = as->mrm; /* Save state for hiword instruction. */ - - /* Allocate/fuse loword operands. */ - if (irref_isk((ir-1)->op2)) { - leftlo = asm_fuseload(as, (ir-1)->op1, allow); - } else { - leftlo = ra_alloc1(as, (ir-1)->op1, allow); - rset_clear(allow, leftlo); - rightlo = asm_fuseload(as, (ir-1)->op2, allow); - } - - /* All register allocations must be performed _before_ this point. */ - l_around = emit_label(as); - as->invmcp = as->flagmcp = NULL; /* Cannot use these optimizations. */ - - /* Loword comparison and branch. */ - asm_guardcc(as, cc >> 4); /* Always use unsigned compare for loword. */ - if (ra_noreg(rightlo)) { - int32_t imm = IR((ir-1)->op2)->i; - if (imm == 0 && ((cc >> 4) & 0xa) != 0x2 && leftlo != RID_MRM) - emit_rr(as, XO_TEST, leftlo, leftlo); - else - emit_gmrmi(as, XG_ARITHi(XOg_CMP), leftlo, imm); - } else { - emit_mrm(as, XO_CMP, leftlo, rightlo); - } - - /* Hiword comparison and branches. */ - if ((cc & 15) != CC_NE) - emit_sjcc(as, CC_NE, l_around); /* Hiword unequal: skip loword compare. */ - if ((cc & 15) != CC_E) - asm_guardcc(as, cc >> 8); /* Hiword compare without equality check. */ - as->mrm = mrm; /* Restore state. */ - if (ra_noreg(righthi)) { - int32_t imm = IR(ir->op2)->i; - if (imm == 0 && (cc & 0xa) != 0x2 && lefthi != RID_MRM) - emit_rr(as, XO_TEST, lefthi, lefthi); - else - emit_gmrmi(as, XG_ARITHi(XOg_CMP), lefthi, imm); - } else { - emit_mrm(as, XO_CMP, lefthi, righthi); - } -} -#endif /* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ /* Hiword op of a split 64 bit op. Previous op must be the loword op. */ static void asm_hiop(ASMState *as, IRIns *ir) { -#if LJ_32 && LJ_HASFFI - /* HIOP is marked as a store because it needs its own DCE logic. */ - int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ - if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; - if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ - as->curins--; /* Always skip the CONV. */ - if (usehi || uselo) - asm_conv64(as, ir); - return; - } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ - asm_comp_int64(as, ir); - return; - } else if ((ir-1)->o == IR_XSTORE) { - if ((ir-1)->r != RID_SINK) - asm_fxstore(as, ir); - return; - } - if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ - switch ((ir-1)->o) { - case IR_ADD: - as->flagmcp = NULL; - as->curins--; - asm_intarith(as, ir, XOg_ADC); - asm_intarith(as, ir-1, XOg_ADD); - break; - case IR_SUB: - as->flagmcp = NULL; - as->curins--; - asm_intarith(as, ir, XOg_SBB); - asm_intarith(as, ir-1, XOg_SUB); - break; - case IR_NEG: { - Reg dest = ra_dest(as, ir, RSET_GPR); - emit_rr(as, XO_GROUP3, XOg_NEG, dest); - emit_i8(as, 0); - emit_rr(as, XO_ARITHi8, XOg_ADC, dest); - ra_left(as, dest, ir->op1); - as->curins--; - asm_neg_not(as, ir-1, XOg_NEG); - break; - } - case IR_CALLN: - case IR_CALLXS: - if (!uselo) - ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ - break; - case IR_CNEWI: - /* Nothing to do here. Handled by CNEWI itself. */ - break; - default: lua_assert(0); break; - } -#else UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused on x64 or without FFI. */ -#endif } /* -- Profiling ----------------------------------------------------------- */ @@ -2690,13 +2131,8 @@ static void asm_stack_check(ASMState *as, BCReg topslot, if (ra_hasreg(pbase) && pbase != r) emit_rr(as, XO_ARITH(XOg_SUB), r|REX_GC64, pbase); else -#if LJ_GC64 emit_rmro(as, XO_ARITH(XOg_SUB), r|REX_64, RID_DISPATCH, (int32_t)dispofs(as, &J2G(as->J)->jit_base)); -#else - emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE, - ptr2addr(&J2G(as->J)->jit_base)); -#endif emit_rmro(as, XO_MOV, r|REX_GC64, r, offsetof(lua_State, maxstack)); emit_getgl(as, r, cur_L); if (allow == RSET_EMPTY) /* Spill temp. register. */ @@ -2728,7 +2164,6 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) (LJ_DUALNUM && irt_isinteger(ir->t))); if (!irref_isk(ref)) { Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); -#if LJ_GC64 if (irt_is64(ir->t)) { /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */ emit_u32(as, irt_toitype(ir->t) << 15); @@ -2738,9 +2173,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) } else { emit_movmroi(as, RID_BASE, ofs+4, (irt_toitype(ir->t)<<15)|0x7fff); } -#endif emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs); -#if LJ_GC64 } else { TValue k; lj_ir_kvalue(as->J->L, &k, ir); @@ -2751,21 +2184,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) emit_movmroi(as, RID_BASE, ofs+4, k.u32.hi); emit_movmroi(as, RID_BASE, ofs, k.u32.lo); } -#else - } else if (!irt_ispri(ir->t)) { - emit_movmroi(as, RID_BASE, ofs, ir->i); -#endif } if ((sn & (SNAP_CONT|SNAP_FRAME))) { -#if !LJ_FR2 - if (s != 0) /* Do not overwrite link to previous frame. */ - emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--)); -#endif -#if !LJ_GC64 - } else { - if (!(LJ_64 && irt_islightud(ir->t))) - emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); -#endif } } checkmclim(as); @@ -2791,11 +2211,7 @@ static void asm_gc_check(ASMState *as) args[1] = ASMREF_TMP2; /* MSize steps */ asm_gencall(as, ci, args); tmp = ra_releasetmp(as, ASMREF_TMP1); -#if LJ_GC64 emit_rmro(as, XO_LEA, tmp|REX_64, RID_DISPATCH, GG_DISP2G); -#else - emit_loada(as, tmp, J2G(as->J)); -#endif emit_loadi(as, ra_releasetmp(as, ASMREF_TMP2), as->gcsteps); /* Jump around GC step if GC total < GC threshold. */ emit_sjcc(as, CC_B, l_end); @@ -2913,16 +2329,12 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) *(int32_t *)p1 = spadj; } if ((as->flags & JIT_F_LEA_AGU)) { -#if LJ_64 p1[-4] = 0x48; -#endif p1[-3] = (MCode)XI_LEA; p1[-2] = MODRM(checki8(spadj) ? XM_OFS8 : XM_OFS32, RID_ESP, RID_ESP); p1[-1] = MODRM(XM_SCALE1, RID_ESP, RID_ESP); } else { -#if LJ_64 p1[-3] = 0x48; -#endif p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi); p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP); } @@ -2972,11 +2384,7 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) nslots = asm_count_call_slots(as, ci, args); if (nslots > as->evenspill) /* Leave room for args in stack slots. */ as->evenspill = nslots; -#if LJ_64 return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET); -#else - return irt_isfp(ir->t) ? REGSP_INIT : REGSP_HINT(RID_RET); -#endif } /* Target-specific setup. */ @@ -2993,21 +2401,13 @@ static const uint8_t map_op1[256] = { 0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51, 0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51, 0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51, -#if LJ_64 0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x14,0x14,0x14,0x14,0x14,0x14,0x14,0x14, -#else -0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51, -#endif 0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51, 0x51,0x51,0x92,0x92,0x10,0x10,0x12,0x11,0x45,0x86,0x52,0x93,0x51,0x51,0x51,0x51, 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52, 0x93,0x86,0x93,0x93,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92, 0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x47,0x51,0x51,0x51,0x51,0x51, -#if LJ_64 0x59,0x59,0x59,0x59,0x51,0x51,0x51,0x51,0x52,0x45,0x51,0x51,0x51,0x51,0x51,0x51, -#else -0x55,0x55,0x55,0x55,0x51,0x51,0x51,0x51,0x52,0x45,0x51,0x51,0x51,0x51,0x51,0x51, -#endif 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x05,0x05,0x05,0x05,0x05,0x05,0x05,0x05, 0x93,0x93,0x53,0x51,0x70,0x71,0x93,0x86,0x54,0x51,0x53,0x51,0x51,0x52,0x51,0x51, 0x92,0x92,0x92,0x92,0x52,0x52,0x51,0x51,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92, @@ -3096,11 +2496,7 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) MSize len = T->szmcode; MCode *px = exitstub_addr(J, exitno) - 6; MCode *pe = p+len-6; -#if LJ_GC64 uint32_t statei = (uint32_t)(GG_OFS(g.vmstate) - GG_OFS(dispatch)); -#else - uint32_t statei = u32ptr(&J2G(J)->vmstate); -#endif if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px) *(int32_t *)(p+len-4) = jmprel(p+len, target); /* Do not patch parent exit for a stack check. Skip beyond vmstate update. */ diff --git a/src/lj_bcread.c b/src/lj_bcread.c index 48c5e7c7f5..bd79849cd5 100644 --- a/src/lj_bcread.c +++ b/src/lj_bcread.c @@ -13,11 +13,9 @@ #include "lj_str.h" #include "lj_tab.h" #include "lj_bc.h" -#if LJ_HASFFI #include "lj_ctype.h" #include "lj_cdata.h" #include "lualib.h" -#endif #include "lj_lex.h" #include "lj_bcdump.h" #include "lj_state.h" @@ -233,7 +231,6 @@ static void bcread_kgc(LexState *ls, GCproto *pt, MSize sizekgc) setgcref(*kr, obj2gco(lj_str_new(ls->L, p, len))); } else if (tp == BCDUMP_KGC_TAB) { setgcref(*kr, obj2gco(bcread_ktab(ls))); -#if LJ_HASFFI } else if (tp != BCDUMP_KGC_CHILD) { CTypeID id = tp == BCDUMP_KGC_COMPLEX ? CTID_COMPLEX_DOUBLE : tp == BCDUMP_KGC_I64 ? CTID_INT64 : CTID_UINT64; @@ -247,7 +244,6 @@ static void bcread_kgc(LexState *ls, GCproto *pt, MSize sizekgc) p[1].u32.lo = bcread_uleb128(ls); p[1].u32.hi = bcread_uleb128(ls); } -#endif } else { lua_State *L = ls->L; lua_assert(tp == BCDUMP_KGC_CHILD); @@ -396,16 +392,12 @@ static int bcread_header(LexState *ls) if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0; if ((flags & BCDUMP_F_FR2) != LJ_FR2*BCDUMP_F_FR2) return 0; if ((flags & BCDUMP_F_FFI)) { -#if LJ_HASFFI lua_State *L = ls->L; if (!ctype_ctsG(G(L))) { ptrdiff_t oldtop = savestack(L, L->top); luaopen_ffi(L); /* Load FFI library on-demand. */ L->top = restorestack(L, oldtop); } -#else - return 0; -#endif } if ((flags & BCDUMP_F_STRIP)) { ls->chunkname = lj_str_newz(ls->L, ls->chunkarg); diff --git a/src/lj_bcwrite.c b/src/lj_bcwrite.c index 5e05caeaf5..64ada1541f 100644 --- a/src/lj_bcwrite.c +++ b/src/lj_bcwrite.c @@ -10,13 +10,9 @@ #include "lj_gc.h" #include "lj_buf.h" #include "lj_bc.h" -#if LJ_HASFFI #include "lj_ctype.h" -#endif -#if LJ_HASJIT #include "lj_dispatch.h" #include "lj_jit.h" -#endif #include "lj_strfmt.h" #include "lj_bcdump.h" #include "lj_vm.h" @@ -123,7 +119,6 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt) } else if (o->gch.gct == ~LJ_TPROTO) { lua_assert((pt->flags & PROTO_CHILD)); tp = BCDUMP_KGC_CHILD; -#if LJ_HASFFI } else if (o->gch.gct == ~LJ_TCDATA) { CTypeID id = gco2cd(o)->ctypeid; need = 1+4*5; @@ -135,7 +130,6 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt) lua_assert(id == CTID_COMPLEX_DOUBLE); tp = BCDUMP_KGC_COMPLEX; } -#endif } else { lua_assert(o->gch.gct == ~LJ_TTAB); tp = BCDUMP_KGC_TAB; @@ -150,7 +144,6 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt) } else if (tp == BCDUMP_KGC_TAB) { bcwrite_ktab(ctx, p, gco2tab(o)); continue; -#if LJ_HASFFI } else if (tp != BCDUMP_KGC_CHILD) { cTValue *q = (TValue *)cdataptr(gco2cd(o)); p = lj_strfmt_wuleb128(p, q[0].u32.lo); @@ -159,7 +152,6 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt) p = lj_strfmt_wuleb128(p, q[1].u32.lo); p = lj_strfmt_wuleb128(p, q[1].u32.hi); } -#endif } setsbufP(&ctx->sb, p); } @@ -202,12 +194,9 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt) static char *bcwrite_bytecode(BCWriteCtx *ctx, char *p, GCproto *pt) { MSize nbc = pt->sizebc-1; /* Omit the [JI]FUNC* header. */ -#if LJ_HASJIT uint8_t *q = (uint8_t *)p; -#endif p = lj_buf_wmem(p, proto_bc(pt)+1, nbc*(MSize)sizeof(BCIns)); UNUSED(ctx); -#if LJ_HASJIT /* Unpatch modified bytecode containing ILOOP/JLOOP etc. */ if ((pt->flags & PROTO_ILOOP) || pt->trace) { jit_State *J = L2J(sbufL(&ctx->sb)); @@ -226,7 +215,6 @@ static char *bcwrite_bytecode(BCWriteCtx *ctx, char *p, GCproto *pt) } } } -#endif return p; } diff --git a/src/lj_buf.c b/src/lj_buf.c index 0dfe7f9807..a71b588a09 100644 --- a/src/lj_buf.c +++ b/src/lj_buf.c @@ -114,12 +114,8 @@ SBuf * LJ_FASTCALL lj_buf_putstr_lower(SBuf *sb, GCstr *s) const char *q = strdata(s); for (; p < e; p++, q++) { uint32_t c = *(unsigned char *)q; -#if LJ_TARGET_PPC - *p = c + ((c >= 'A' && c <= 'Z') << 5); -#else if (c >= 'A' && c <= 'Z') c += 0x20; *p = c; -#endif } setsbufP(sb, p); return sb; @@ -132,12 +128,8 @@ SBuf * LJ_FASTCALL lj_buf_putstr_upper(SBuf *sb, GCstr *s) const char *q = strdata(s); for (; p < e; p++, q++) { uint32_t c = *(unsigned char *)q; -#if LJ_TARGET_PPC - *p = c - ((c >= 'a' && c <= 'z') << 5); -#else if (c >= 'a' && c <= 'z') c -= 0x20; *p = c; -#endif } setsbufP(sb, p); return sb; diff --git a/src/lj_carith.c b/src/lj_carith.c index 218abd260f..09fde019fb 100644 --- a/src/lj_carith.c +++ b/src/lj_carith.c @@ -5,7 +5,6 @@ #include "lj_obj.h" -#if LJ_HASFFI #include "lj_gc.h" #include "lj_err.h" @@ -276,14 +275,8 @@ int lj_carith_op(lua_State *L, MMS mm) /* -- 64 bit bit operations helpers --------------------------------------- */ -#if LJ_64 #define B64DEF(name) \ static LJ_AINLINE uint64_t lj_carith_##name(uint64_t x, int32_t sh) -#else -/* Not inlined on 32 bit archs, since some of these are quite lengthy. */ -#define B64DEF(name) \ - uint64_t LJ_NOINLINE lj_carith_##name(uint64_t x, int32_t sh) -#endif B64DEF(shl64) { return x << (sh&63); } B64DEF(shr64) { return x >> (sh&63); } @@ -350,13 +343,6 @@ uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id) /* -- 64 bit integer arithmetic helpers ----------------------------------- */ -#if LJ_32 && LJ_HASJIT -/* Signed/unsigned 64 bit multiplication. */ -int64_t lj_carith_mul64(int64_t a, int64_t b) -{ - return a * b; -} -#endif /* Unsigned 64 bit division. */ uint64_t lj_carith_divu64(uint64_t a, uint64_t b) @@ -426,4 +412,3 @@ int64_t lj_carith_powi64(int64_t x, int64_t k) return (int64_t)lj_carith_powu64((uint64_t)x, (uint64_t)k); } -#endif diff --git a/src/lj_carith.h b/src/lj_carith.h index 67d976bf0c..41c73ffe65 100644 --- a/src/lj_carith.h +++ b/src/lj_carith.h @@ -8,23 +8,12 @@ #include "lj_obj.h" -#if LJ_HASFFI LJ_FUNC int lj_carith_op(lua_State *L, MMS mm); -#if LJ_32 -LJ_FUNC uint64_t lj_carith_shl64(uint64_t x, int32_t sh); -LJ_FUNC uint64_t lj_carith_shr64(uint64_t x, int32_t sh); -LJ_FUNC uint64_t lj_carith_sar64(uint64_t x, int32_t sh); -LJ_FUNC uint64_t lj_carith_rol64(uint64_t x, int32_t sh); -LJ_FUNC uint64_t lj_carith_ror64(uint64_t x, int32_t sh); -#endif LJ_FUNC uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op); LJ_FUNC uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id); -#if LJ_32 && LJ_HASJIT -LJ_FUNC int64_t lj_carith_mul64(int64_t x, int64_t k); -#endif LJ_FUNC uint64_t lj_carith_divu64(uint64_t a, uint64_t b); LJ_FUNC int64_t lj_carith_divi64(int64_t a, int64_t b); LJ_FUNC uint64_t lj_carith_modu64(uint64_t a, uint64_t b); @@ -32,6 +21,5 @@ LJ_FUNC int64_t lj_carith_modi64(int64_t a, int64_t b); LJ_FUNC uint64_t lj_carith_powu64(uint64_t x, uint64_t k); LJ_FUNC int64_t lj_carith_powi64(int64_t x, int64_t k); -#endif #endif diff --git a/src/lj_ccall.c b/src/lj_ccall.c index 2b7ca36456..560aa6d941 100644 --- a/src/lj_ccall.c +++ b/src/lj_ccall.c @@ -5,7 +5,6 @@ #include "lj_obj.h" -#if LJ_HASFFI #include "lj_gc.h" #include "lj_err.h" @@ -17,119 +16,6 @@ #include "lj_trace.h" /* Target-specific handling of register arguments. */ -#if LJ_TARGET_X86 -/* -- x86 calling conventions --------------------------------------------- */ - -#if LJ_ABI_WIN - -#define CCALL_HANDLE_STRUCTRET \ - /* Return structs bigger than 8 by reference (on stack only). */ \ - cc->retref = (sz > 8); \ - if (cc->retref) cc->stack[nsp++] = (GPRArg)dp; - -#define CCALL_HANDLE_COMPLEXRET CCALL_HANDLE_STRUCTRET - -#else - -#if LJ_TARGET_OSX - -#define CCALL_HANDLE_STRUCTRET \ - /* Return structs of size 1, 2, 4 or 8 in registers. */ \ - cc->retref = !(sz == 1 || sz == 2 || sz == 4 || sz == 8); \ - if (cc->retref) { \ - if (ngpr < maxgpr) \ - cc->gpr[ngpr++] = (GPRArg)dp; \ - else \ - cc->stack[nsp++] = (GPRArg)dp; \ - } else { /* Struct with single FP field ends up in FPR. */ \ - cc->resx87 = ccall_classify_struct(cts, ctr); \ - } - -#define CCALL_HANDLE_STRUCTRET2 \ - if (cc->resx87) sp = (uint8_t *)&cc->fpr[0]; \ - memcpy(dp, sp, ctr->size); - -#else - -#define CCALL_HANDLE_STRUCTRET \ - cc->retref = 1; /* Return all structs by reference (in reg or on stack). */ \ - if (ngpr < maxgpr) \ - cc->gpr[ngpr++] = (GPRArg)dp; \ - else \ - cc->stack[nsp++] = (GPRArg)dp; - -#endif - -#define CCALL_HANDLE_COMPLEXRET \ - /* Return complex float in GPRs and complex double by reference. */ \ - cc->retref = (sz > 8); \ - if (cc->retref) { \ - if (ngpr < maxgpr) \ - cc->gpr[ngpr++] = (GPRArg)dp; \ - else \ - cc->stack[nsp++] = (GPRArg)dp; \ - } - -#endif - -#define CCALL_HANDLE_COMPLEXRET2 \ - if (!cc->retref) \ - *(int64_t *)dp = *(int64_t *)sp; /* Copy complex float from GPRs. */ - -#define CCALL_HANDLE_STRUCTARG \ - ngpr = maxgpr; /* Pass all structs by value on the stack. */ - -#define CCALL_HANDLE_COMPLEXARG \ - isfp = 1; /* Pass complex by value on stack. */ - -#define CCALL_HANDLE_REGARG \ - if (!isfp) { /* Only non-FP values may be passed in registers. */ \ - if (n > 1) { /* Anything > 32 bit is passed on the stack. */ \ - if (!LJ_ABI_WIN) ngpr = maxgpr; /* Prevent reordering. */ \ - } else if (ngpr + 1 <= maxgpr) { \ - dp = &cc->gpr[ngpr]; \ - ngpr += n; \ - goto done; \ - } \ - } - -#elif LJ_TARGET_X64 && LJ_ABI_WIN -/* -- Windows/x64 calling conventions ------------------------------------- */ - -#define CCALL_HANDLE_STRUCTRET \ - /* Return structs of size 1, 2, 4 or 8 in a GPR. */ \ - cc->retref = !(sz == 1 || sz == 2 || sz == 4 || sz == 8); \ - if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp; - -#define CCALL_HANDLE_COMPLEXRET CCALL_HANDLE_STRUCTRET - -#define CCALL_HANDLE_COMPLEXRET2 \ - if (!cc->retref) \ - *(int64_t *)dp = *(int64_t *)sp; /* Copy complex float from GPRs. */ - -#define CCALL_HANDLE_STRUCTARG \ - /* Pass structs of size 1, 2, 4 or 8 in a GPR by value. */ \ - if (!(sz == 1 || sz == 2 || sz == 4 || sz == 8)) { \ - rp = cdataptr(lj_cdata_new(cts, did, sz)); \ - sz = CTSIZE_PTR; /* Pass all other structs by reference. */ \ - } - -#define CCALL_HANDLE_COMPLEXARG \ - /* Pass complex float in a GPR and complex double by reference. */ \ - if (sz != 2*sizeof(float)) { \ - rp = cdataptr(lj_cdata_new(cts, did, sz)); \ - sz = CTSIZE_PTR; \ - } - -/* Windows/x64 argument registers are strictly positional (use ngpr). */ -#define CCALL_HANDLE_REGARG \ - if (isfp) { \ - if (ngpr < maxgpr) { dp = &cc->fpr[ngpr++]; nfpr = ngpr; goto done; } \ - } else { \ - if (ngpr < maxgpr) { dp = &cc->gpr[ngpr++]; goto done; } \ - } - -#elif LJ_TARGET_X64 /* -- POSIX/x64 calling conventions --------------------------------------- */ #define CCALL_HANDLE_STRUCTRET \ @@ -187,377 +73,6 @@ } \ } -#elif LJ_TARGET_ARM -/* -- ARM calling conventions --------------------------------------------- */ - -#if LJ_ABI_SOFTFP - -#define CCALL_HANDLE_STRUCTRET \ - /* Return structs of size <= 4 in a GPR. */ \ - cc->retref = !(sz <= 4); \ - if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp; - -#define CCALL_HANDLE_COMPLEXRET \ - cc->retref = 1; /* Return all complex values by reference. */ \ - cc->gpr[ngpr++] = (GPRArg)dp; - -#define CCALL_HANDLE_COMPLEXRET2 \ - UNUSED(dp); /* Nothing to do. */ - -#define CCALL_HANDLE_STRUCTARG \ - /* Pass all structs by value in registers and/or on the stack. */ - -#define CCALL_HANDLE_COMPLEXARG \ - /* Pass complex by value in 2 or 4 GPRs. */ - -#define CCALL_HANDLE_REGARG_FP1 -#define CCALL_HANDLE_REGARG_FP2 - -#else - -#define CCALL_HANDLE_STRUCTRET \ - cc->retref = !ccall_classify_struct(cts, ctr, ct); \ - if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp; - -#define CCALL_HANDLE_STRUCTRET2 \ - if (ccall_classify_struct(cts, ctr, ct) > 1) sp = (uint8_t *)&cc->fpr[0]; \ - memcpy(dp, sp, ctr->size); - -#define CCALL_HANDLE_COMPLEXRET \ - if (!(ct->info & CTF_VARARG)) cc->retref = 0; /* Return complex in FPRs. */ - -#define CCALL_HANDLE_COMPLEXRET2 \ - if (!(ct->info & CTF_VARARG)) memcpy(dp, &cc->fpr[0], ctr->size); - -#define CCALL_HANDLE_STRUCTARG \ - isfp = (ccall_classify_struct(cts, d, ct) > 1); - /* Pass all structs by value in registers and/or on the stack. */ - -#define CCALL_HANDLE_COMPLEXARG \ - isfp = 1; /* Pass complex by value in FPRs or on stack. */ - -#define CCALL_HANDLE_REGARG_FP1 \ - if (isfp && !(ct->info & CTF_VARARG)) { \ - if ((d->info & CTF_ALIGN) > CTALIGN_PTR) { \ - if (nfpr + (n >> 1) <= CCALL_NARG_FPR) { \ - dp = &cc->fpr[nfpr]; \ - nfpr += (n >> 1); \ - goto done; \ - } \ - } else { \ - if (sz > 1 && fprodd != nfpr) fprodd = 0; \ - if (fprodd) { \ - if (2*nfpr+n <= 2*CCALL_NARG_FPR+1) { \ - dp = (void *)&cc->fpr[fprodd-1].f[1]; \ - nfpr += (n >> 1); \ - if ((n & 1)) fprodd = 0; else fprodd = nfpr-1; \ - goto done; \ - } \ - } else { \ - if (2*nfpr+n <= 2*CCALL_NARG_FPR) { \ - dp = (void *)&cc->fpr[nfpr]; \ - nfpr += (n >> 1); \ - if ((n & 1)) fprodd = ++nfpr; else fprodd = 0; \ - goto done; \ - } \ - } \ - } \ - fprodd = 0; /* No reordering after the first FP value is on stack. */ \ - } else { - -#define CCALL_HANDLE_REGARG_FP2 } - -#endif - -#define CCALL_HANDLE_REGARG \ - CCALL_HANDLE_REGARG_FP1 \ - if ((d->info & CTF_ALIGN) > CTALIGN_PTR) { \ - if (ngpr < maxgpr) \ - ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ - } \ - if (ngpr < maxgpr) { \ - dp = &cc->gpr[ngpr]; \ - if (ngpr + n > maxgpr) { \ - nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ - if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ - ngpr = maxgpr; \ - } else { \ - ngpr += n; \ - } \ - goto done; \ - } CCALL_HANDLE_REGARG_FP2 - -#define CCALL_HANDLE_RET \ - if ((ct->info & CTF_VARARG)) sp = (uint8_t *)&cc->gpr[0]; - -#elif LJ_TARGET_ARM64 -/* -- ARM64 calling conventions ------------------------------------------- */ - -#define CCALL_HANDLE_STRUCTRET \ - cc->retref = !ccall_classify_struct(cts, ctr); \ - if (cc->retref) cc->retp = dp; - -#define CCALL_HANDLE_STRUCTRET2 \ - unsigned int cl = ccall_classify_struct(cts, ctr); \ - if ((cl & 4)) { /* Combine float HFA from separate registers. */ \ - CTSize i = (cl >> 8) - 1; \ - do { ((uint32_t *)dp)[i] = cc->fpr[i].u32; } while (i--); \ - } else { \ - if (cl > 1) sp = (uint8_t *)&cc->fpr[0]; \ - memcpy(dp, sp, ctr->size); \ - } - -#define CCALL_HANDLE_COMPLEXRET \ - /* Complex values are returned in one or two FPRs. */ \ - cc->retref = 0; - -#define CCALL_HANDLE_COMPLEXRET2 \ - if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ - ((float *)dp)[0] = cc->fpr[0].f; \ - ((float *)dp)[1] = cc->fpr[1].f; \ - } else { /* Copy complex double from FPRs. */ \ - ((double *)dp)[0] = cc->fpr[0].d; \ - ((double *)dp)[1] = cc->fpr[1].d; \ - } - -#define CCALL_HANDLE_STRUCTARG \ - unsigned int cl = ccall_classify_struct(cts, d); \ - if (cl == 0) { /* Pass struct by reference. */ \ - rp = cdataptr(lj_cdata_new(cts, did, sz)); \ - sz = CTSIZE_PTR; \ - } else if (cl > 1) { /* Pass struct in FPRs or on stack. */ \ - isfp = (cl & 4) ? 2 : 1; \ - } /* else: Pass struct in GPRs or on stack. */ - -#define CCALL_HANDLE_COMPLEXARG \ - /* Pass complex by value in separate (!) FPRs or on stack. */ \ - isfp = sz == 2*sizeof(float) ? 2 : 1; - -#define CCALL_HANDLE_REGARG \ - if (LJ_TARGET_IOS && isva) { \ - /* IOS: All variadic arguments are on the stack. */ \ - } else if (isfp) { /* Try to pass argument in FPRs. */ \ - int n2 = ctype_isvector(d->info) ? 1 : n*isfp; \ - if (nfpr + n2 <= CCALL_NARG_FPR) { \ - dp = &cc->fpr[nfpr]; \ - nfpr += n2; \ - goto done; \ - } else { \ - nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \ - if (LJ_TARGET_IOS && d->size < 8) goto err_nyi; \ - } \ - } else { /* Try to pass argument in GPRs. */ \ - if (!LJ_TARGET_IOS && (d->info & CTF_ALIGN) > CTALIGN_PTR) \ - ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ - if (ngpr + n <= maxgpr) { \ - dp = &cc->gpr[ngpr]; \ - ngpr += n; \ - goto done; \ - } else { \ - ngpr = maxgpr; /* Prevent reordering. */ \ - if (LJ_TARGET_IOS && d->size < 8) goto err_nyi; \ - } \ - } - -#elif LJ_TARGET_PPC -/* -- PPC calling conventions --------------------------------------------- */ - -#define CCALL_HANDLE_STRUCTRET \ - cc->retref = 1; /* Return all structs by reference. */ \ - cc->gpr[ngpr++] = (GPRArg)dp; - -#define CCALL_HANDLE_COMPLEXRET \ - /* Complex values are returned in 2 or 4 GPRs. */ \ - cc->retref = 0; - -#define CCALL_HANDLE_COMPLEXRET2 \ - memcpy(dp, sp, ctr->size); /* Copy complex from GPRs. */ - -#define CCALL_HANDLE_STRUCTARG \ - rp = cdataptr(lj_cdata_new(cts, did, sz)); \ - sz = CTSIZE_PTR; /* Pass all structs by reference. */ - -#define CCALL_HANDLE_COMPLEXARG \ - /* Pass complex by value in 2 or 4 GPRs. */ - -#define CCALL_HANDLE_REGARG \ - if (isfp) { /* Try to pass argument in FPRs. */ \ - if (nfpr + 1 <= CCALL_NARG_FPR) { \ - dp = &cc->fpr[nfpr]; \ - nfpr += 1; \ - d = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \ - goto done; \ - } \ - } else { /* Try to pass argument in GPRs. */ \ - if (n > 1) { \ - lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \ - if (ctype_isinteger(d->info)) \ - ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ - else if (ngpr + n > maxgpr) \ - ngpr = maxgpr; /* Prevent reordering. */ \ - } \ - if (ngpr + n <= maxgpr) { \ - dp = &cc->gpr[ngpr]; \ - ngpr += n; \ - goto done; \ - } \ - } - -#define CCALL_HANDLE_RET \ - if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ - ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ - -#elif LJ_TARGET_MIPS32 -/* -- MIPS o32 calling conventions ---------------------------------------- */ - -#define CCALL_HANDLE_STRUCTRET \ - cc->retref = 1; /* Return all structs by reference. */ \ - cc->gpr[ngpr++] = (GPRArg)dp; - -#define CCALL_HANDLE_COMPLEXRET \ - /* Complex values are returned in 1 or 2 FPRs. */ \ - cc->retref = 0; - -#if LJ_ABI_SOFTFP -#define CCALL_HANDLE_COMPLEXRET2 \ - if (ctr->size == 2*sizeof(float)) { /* Copy complex float from GPRs. */ \ - ((intptr_t *)dp)[0] = cc->gpr[0]; \ - ((intptr_t *)dp)[1] = cc->gpr[1]; \ - } else { /* Copy complex double from GPRs. */ \ - ((intptr_t *)dp)[0] = cc->gpr[0]; \ - ((intptr_t *)dp)[1] = cc->gpr[1]; \ - ((intptr_t *)dp)[2] = cc->gpr[2]; \ - ((intptr_t *)dp)[3] = cc->gpr[3]; \ - } -#else -#define CCALL_HANDLE_COMPLEXRET2 \ - if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ - ((float *)dp)[0] = cc->fpr[0].f; \ - ((float *)dp)[1] = cc->fpr[1].f; \ - } else { /* Copy complex double from FPRs. */ \ - ((double *)dp)[0] = cc->fpr[0].d; \ - ((double *)dp)[1] = cc->fpr[1].d; \ - } -#endif - -#define CCALL_HANDLE_STRUCTARG \ - /* Pass all structs by value in registers and/or on the stack. */ - -#define CCALL_HANDLE_COMPLEXARG \ - /* Pass complex by value in 2 or 4 GPRs. */ - -#define CCALL_HANDLE_GPR \ - if ((d->info & CTF_ALIGN) > CTALIGN_PTR) \ - ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ - if (ngpr < maxgpr) { \ - dp = &cc->gpr[ngpr]; \ - if (ngpr + n > maxgpr) { \ - nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ - if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ - ngpr = maxgpr; \ - } else { \ - ngpr += n; \ - } \ - goto done; \ - } - -#if !LJ_ABI_SOFTFP /* MIPS32 hard-float */ -#define CCALL_HANDLE_REGARG \ - if (isfp && nfpr < CCALL_NARG_FPR && !(ct->info & CTF_VARARG)) { \ - /* Try to pass argument in FPRs. */ \ - dp = n == 1 ? (void *)&cc->fpr[nfpr].f : (void *)&cc->fpr[nfpr].d; \ - nfpr++; ngpr += n; \ - goto done; \ - } else { /* Try to pass argument in GPRs. */ \ - nfpr = CCALL_NARG_FPR; \ - CCALL_HANDLE_GPR \ - } -#else /* MIPS32 soft-float */ -#define CCALL_HANDLE_REGARG CCALL_HANDLE_GPR -#endif - -#if !LJ_ABI_SOFTFP -/* On MIPS64 soft-float, position of float return values is endian-dependant. */ -#define CCALL_HANDLE_RET \ - if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ - sp = (uint8_t *)&cc->fpr[0].f; -#endif - -#elif LJ_TARGET_MIPS64 -/* -- MIPS n64 calling conventions ---------------------------------------- */ - -#define CCALL_HANDLE_STRUCTRET \ - cc->retref = !(sz <= 16); \ - if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp; - -#define CCALL_HANDLE_STRUCTRET2 \ - ccall_copy_struct(cc, ctr, dp, sp, ccall_classify_struct(cts, ctr, ct)); - -#define CCALL_HANDLE_COMPLEXRET \ - /* Complex values are returned in 1 or 2 FPRs. */ \ - cc->retref = 0; - -#if LJ_ABI_SOFTFP /* MIPS64 soft-float */ - -#define CCALL_HANDLE_COMPLEXRET2 \ - if (ctr->size == 2*sizeof(float)) { /* Copy complex float from GPRs. */ \ - ((intptr_t *)dp)[0] = cc->gpr[0]; \ - } else { /* Copy complex double from GPRs. */ \ - ((intptr_t *)dp)[0] = cc->gpr[0]; \ - ((intptr_t *)dp)[1] = cc->gpr[1]; \ - } - -#define CCALL_HANDLE_COMPLEXARG \ - /* Pass complex by value in 2 or 4 GPRs. */ - -/* Position of soft-float 'float' return value depends on endianess. */ -#define CCALL_HANDLE_RET \ - if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ - sp = (uint8_t *)cc->gpr + LJ_ENDIAN_SELECT(0, 4); - -#else /* MIPS64 hard-float */ - -#define CCALL_HANDLE_COMPLEXRET2 \ - if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ - ((float *)dp)[0] = cc->fpr[0].f; \ - ((float *)dp)[1] = cc->fpr[1].f; \ - } else { /* Copy complex double from FPRs. */ \ - ((double *)dp)[0] = cc->fpr[0].d; \ - ((double *)dp)[1] = cc->fpr[1].d; \ - } - -#define CCALL_HANDLE_COMPLEXARG \ - if (sz == 2*sizeof(float)) { \ - isfp = 2; \ - if (ngpr < maxgpr) \ - sz *= 2; \ - } - -#define CCALL_HANDLE_RET \ - if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ - sp = (uint8_t *)&cc->fpr[0].f; - -#endif - -#define CCALL_HANDLE_STRUCTARG \ - /* Pass all structs by value in registers and/or on the stack. */ - -#define CCALL_HANDLE_REGARG \ - if (ngpr < maxgpr) { \ - dp = &cc->gpr[ngpr]; \ - if (ngpr + n > maxgpr) { \ - nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ - if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ - ngpr = maxgpr; \ - } else { \ - ngpr += n; \ - } \ - goto done; \ - } - -#else -#error "Missing calling convention definitions for this architecture" -#endif #ifndef CCALL_HANDLE_STRUCTRET2 #define CCALL_HANDLE_STRUCTRET2 \ @@ -566,43 +81,9 @@ /* -- x86 OSX ABI struct classification ----------------------------------- */ -#if LJ_TARGET_X86 && LJ_TARGET_OSX - -/* Check for struct with single FP field. */ -static int ccall_classify_struct(CTState *cts, CType *ct) -{ - CTSize sz = ct->size; - if (!(sz == sizeof(float) || sz == sizeof(double))) return 0; - if ((ct->info & CTF_UNION)) return 0; - while (ct->sib) { - ct = ctype_get(cts, ct->sib); - if (ctype_isfield(ct->info)) { - CType *sct = ctype_rawchild(cts, ct); - if (ctype_isfp(sct->info)) { - if (sct->size == sz) - return (sz >> 2); /* Return 1 for float or 2 for double. */ - } else if (ctype_isstruct(sct->info)) { - if (sct->size) - return ccall_classify_struct(cts, sct); - } else { - break; - } - } else if (ctype_isbitfield(ct->info)) { - break; - } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) { - CType *sct = ctype_rawchild(cts, ct); - if (sct->size) - return ccall_classify_struct(cts, sct); - } - } - return 0; -} - -#endif /* -- x64 struct classification ------------------------------------------- */ -#if LJ_TARGET_X64 && !LJ_ABI_WIN /* Register classes for x64 struct classification. */ #define CCALL_RCL_INT 1 @@ -699,172 +180,15 @@ static void ccall_struct_ret(CCallState *cc, int *rcl, uint8_t *dp, CTSize sz) } memcpy(dp, sp, sz); } -#endif /* -- ARM hard-float ABI struct classification ---------------------------- */ -#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP - -/* Classify a struct based on its fields. */ -static unsigned int ccall_classify_struct(CTState *cts, CType *ct, CType *ctf) -{ - CTSize sz = ct->size; - unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION); - if ((ctf->info & CTF_VARARG)) goto noth; - while (ct->sib) { - CType *sct; - ct = ctype_get(cts, ct->sib); - if (ctype_isfield(ct->info)) { - sct = ctype_rawchild(cts, ct); - if (ctype_isfp(sct->info)) { - r |= sct->size; - if (!isu) n++; else if (n == 0) n = 1; - } else if (ctype_iscomplex(sct->info)) { - r |= (sct->size >> 1); - if (!isu) n += 2; else if (n < 2) n = 2; - } else if (ctype_isstruct(sct->info)) { - goto substruct; - } else { - goto noth; - } - } else if (ctype_isbitfield(ct->info)) { - goto noth; - } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) { - sct = ctype_rawchild(cts, ct); - substruct: - if (sct->size > 0) { - unsigned int s = ccall_classify_struct(cts, sct, ctf); - if (s <= 1) goto noth; - r |= (s & 255); - if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8); - } - } - } - if ((r == 4 || r == 8) && n <= 4) - return r + (n << 8); -noth: /* Not a homogeneous float/double aggregate. */ - return (sz <= 4); /* Return structs of size <= 4 in a GPR. */ -} - -#endif /* -- ARM64 ABI struct classification ------------------------------------- */ -#if LJ_TARGET_ARM64 - -/* Classify a struct based on its fields. */ -static unsigned int ccall_classify_struct(CTState *cts, CType *ct) -{ - CTSize sz = ct->size; - unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION); - while (ct->sib) { - CType *sct; - ct = ctype_get(cts, ct->sib); - if (ctype_isfield(ct->info)) { - sct = ctype_rawchild(cts, ct); - if (ctype_isfp(sct->info)) { - r |= sct->size; - if (!isu) n++; else if (n == 0) n = 1; - } else if (ctype_iscomplex(sct->info)) { - r |= (sct->size >> 1); - if (!isu) n += 2; else if (n < 2) n = 2; - } else if (ctype_isstruct(sct->info)) { - goto substruct; - } else { - goto noth; - } - } else if (ctype_isbitfield(ct->info)) { - goto noth; - } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) { - sct = ctype_rawchild(cts, ct); - substruct: - if (sct->size > 0) { - unsigned int s = ccall_classify_struct(cts, sct); - if (s <= 1) goto noth; - r |= (s & 255); - if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8); - } - } - } - if ((r == 4 || r == 8) && n <= 4) - return r + (n << 8); -noth: /* Not a homogeneous float/double aggregate. */ - return (sz <= 16); /* Return structs of size <= 16 in GPRs. */ -} - -#endif /* -- MIPS64 ABI struct classification ---------------------------- */ -#if LJ_TARGET_MIPS64 - -#define FTYPE_FLOAT 1 -#define FTYPE_DOUBLE 2 - -/* Classify FP fields (max. 2) and their types. */ -static unsigned int ccall_classify_struct(CTState *cts, CType *ct, CType *ctf) -{ - int n = 0, ft = 0; - if ((ctf->info & CTF_VARARG) || (ct->info & CTF_UNION)) - goto noth; - while (ct->sib) { - CType *sct; - ct = ctype_get(cts, ct->sib); - if (n == 2) { - goto noth; - } else if (ctype_isfield(ct->info)) { - sct = ctype_rawchild(cts, ct); - if (ctype_isfp(sct->info)) { - ft |= (sct->size == 4 ? FTYPE_FLOAT : FTYPE_DOUBLE) << 2*n; - n++; - } else { - goto noth; - } - } else if (ctype_isbitfield(ct->info) || - ctype_isxattrib(ct->info, CTA_SUBTYPE)) { - goto noth; - } - } - if (n <= 2) - return ft; -noth: /* Not a homogeneous float/double aggregate. */ - return 0; /* Struct is in GPRs. */ -} - -void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, int ft) -{ - if (LJ_ABI_SOFTFP ? ft : - ((ft & 3) == FTYPE_FLOAT || (ft >> 2) == FTYPE_FLOAT)) { - int i, ofs = 0; - for (i = 0; ft != 0; i++, ft >>= 2) { - if ((ft & 3) == FTYPE_FLOAT) { -#if LJ_ABI_SOFTFP - /* The 2nd FP struct result is in CARG1 (gpr[2]) and not CRET2. */ - memcpy((uint8_t *)dp + ofs, - (uint8_t *)&cc->gpr[2*i] + LJ_ENDIAN_SELECT(0, 4), 4); -#else - *(float *)((uint8_t *)dp + ofs) = cc->fpr[i].f; -#endif - ofs += 4; - } else { - ofs = (ofs + 7) & ~7; /* 64 bit alignment. */ -#if LJ_ABI_SOFTFP - *(intptr_t *)((uint8_t *)dp + ofs) = cc->gpr[2*i]; -#else - *(double *)((uint8_t *)dp + ofs) = cc->fpr[i].d; -#endif - ofs += 8; - } - } - } else { -#if !LJ_ABI_SOFTFP - if (ft) sp = (uint8_t *)&cc->fpr[0]; -#endif - memcpy(dp, sp, ctr->size); - } -} - -#endif /* -- Common C call handling ---------------------------------------------- */ @@ -907,9 +231,6 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, MSize maxgpr, ngpr = 0, nsp = 0, narg; #if CCALL_NARG_FPR MSize nfpr = 0; -#if LJ_TARGET_ARM - MSize fprodd = 0; -#endif #endif /* Clear unused regs to get some determinism in case of misdeclaration. */ @@ -918,17 +239,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, memset(cc->fpr, 0, sizeof(cc->fpr)); #endif -#if LJ_TARGET_X86 - /* x86 has several different calling conventions. */ - cc->resx87 = 0; - switch (ctype_cconv(ct->info)) { - case CTCC_FASTCALL: maxgpr = 2; break; - case CTCC_THISCALL: maxgpr = 1; break; - default: maxgpr = 0; break; - } -#else maxgpr = CCALL_NARG_GPR; -#endif /* Perform required setup for some result types. */ ctr = ctype_rawchild(cts, ct); @@ -946,10 +257,6 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, } else { CCALL_HANDLE_COMPLEXRET } -#if LJ_TARGET_X86 - } else if (ctype_isfp(ctr->info)) { - cc->resx87 = ctr->size == sizeof(float) ? 1 : 2; -#endif } /* Skip initial attributes. */ @@ -1033,42 +340,15 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, *(int32_t *)dp = d->size == 1 ? (int32_t)*(int8_t *)dp : (int32_t)*(int16_t *)dp; } -#if LJ_TARGET_MIPS64 - if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) || - (isfp && nsp == 0)) && d->size <= 4) { - *(int64_t *)dp = (int64_t)*(int32_t *)dp; /* Sign-extend to 64 bit. */ - } -#endif -#if LJ_TARGET_X64 && LJ_ABI_WIN - if (isva) { /* Windows/x64 mirrors varargs in both register sets. */ - if (nfpr == ngpr) - cc->gpr[ngpr-1] = cc->fpr[ngpr-1].l[0]; - else - cc->fpr[ngpr-1].l[0] = cc->gpr[ngpr-1]; - } -#else UNUSED(isva); -#endif -#if LJ_TARGET_X64 && !LJ_ABI_WIN if (isfp == 2 && n == 2 && (uint8_t *)dp == (uint8_t *)&cc->fpr[nfpr-2]) { cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1]; /* Split complex double. */ cc->fpr[nfpr-2].d[1] = 0; } -#elif LJ_TARGET_ARM64 || (LJ_TARGET_MIPS64 && !LJ_ABI_SOFTFP) - if (isfp == 2 && (uint8_t *)dp < (uint8_t *)cc->stack) { - /* Split float HFA or complex float into separate registers. */ - CTSize i = (sz >> 2) - 1; - do { ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; } while (i--); - } -#else - UNUSED(isfp); -#endif } if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */ -#if LJ_TARGET_X64 || LJ_TARGET_PPC cc->nfpr = nfpr; /* Required for vararg functions. */ -#endif cc->nsp = nsp; cc->spadj = (CCALL_SPS_FREE + CCALL_SPS_EXTRA)*CTSIZE_PTR; if (nsp > CCALL_SPS_FREE) @@ -1141,13 +421,6 @@ int lj_ccall_func(lua_State *L, GCcdata *cd) } ct = (CType *)((intptr_t)ct+(intptr_t)cts->tab); /* May be reallocated. */ gcsteps += ccall_get_results(L, cts, ct, &cc, &ret); -#if LJ_TARGET_X86 && LJ_ABI_WIN - /* Automatically detect __stdcall and fix up C function declaration. */ - if (cc.spadj && ctype_cconv(ct->info) == CTCC_CDECL) { - CTF_INSERT(ct->info, CCONV, CTCC_STDCALL); - lj_trace_abort(G(L)); - } -#endif while (gcsteps-- > 0) lj_gc_check(L); return ret; @@ -1155,4 +428,3 @@ int lj_ccall_func(lua_State *L, GCcdata *cd) return -1; /* Not a function. */ } -#endif diff --git a/src/lj_ccall.h b/src/lj_ccall.h index 34e800cc03..04b8a1d693 100644 --- a/src/lj_ccall.h +++ b/src/lj_ccall.h @@ -9,31 +9,15 @@ #include "lj_obj.h" #include "lj_ctype.h" -#if LJ_HASFFI /* -- C calling conventions ----------------------------------------------- */ -#if LJ_TARGET_X86ORX64 -#if LJ_TARGET_X86 -#define CCALL_NARG_GPR 2 /* For fastcall arguments. */ -#define CCALL_NARG_FPR 0 -#define CCALL_NRET_GPR 2 -#define CCALL_NRET_FPR 1 /* For FP results on x87 stack. */ -#define CCALL_ALIGN_STACKARG 0 /* Don't align argument on stack. */ -#elif LJ_ABI_WIN -#define CCALL_NARG_GPR 4 -#define CCALL_NARG_FPR 4 -#define CCALL_NRET_GPR 1 -#define CCALL_NRET_FPR 1 -#define CCALL_SPS_EXTRA 4 -#else #define CCALL_NARG_GPR 6 #define CCALL_NARG_FPR 8 #define CCALL_NRET_GPR 2 #define CCALL_NRET_FPR 2 #define CCALL_VECTOR_REG 1 /* Pass vectors in registers. */ -#endif #define CCALL_SPS_FREE 1 #define CCALL_ALIGN_CALLSTATE 16 @@ -49,86 +33,6 @@ typedef LJ_ALIGN(16) union FPRArg { typedef intptr_t GPRArg; -#elif LJ_TARGET_ARM - -#define CCALL_NARG_GPR 4 -#define CCALL_NRET_GPR 2 /* For softfp double. */ -#if LJ_ABI_SOFTFP -#define CCALL_NARG_FPR 0 -#define CCALL_NRET_FPR 0 -#else -#define CCALL_NARG_FPR 8 -#define CCALL_NRET_FPR 4 -#endif -#define CCALL_SPS_FREE 0 - -typedef intptr_t GPRArg; -typedef union FPRArg { - double d; - float f[2]; -} FPRArg; - -#elif LJ_TARGET_ARM64 - -#define CCALL_NARG_GPR 8 -#define CCALL_NRET_GPR 2 -#define CCALL_NARG_FPR 8 -#define CCALL_NRET_FPR 4 -#define CCALL_SPS_FREE 0 - -typedef intptr_t GPRArg; -typedef union FPRArg { - double d; - float f; - uint32_t u32; -} FPRArg; - -#elif LJ_TARGET_PPC - -#define CCALL_NARG_GPR 8 -#define CCALL_NARG_FPR 8 -#define CCALL_NRET_GPR 4 /* For complex double. */ -#define CCALL_NRET_FPR 1 -#define CCALL_SPS_EXTRA 4 -#define CCALL_SPS_FREE 0 - -typedef intptr_t GPRArg; -typedef double FPRArg; - -#elif LJ_TARGET_MIPS32 - -#define CCALL_NARG_GPR 4 -#define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 2) -#define CCALL_NRET_GPR (LJ_ABI_SOFTFP ? 4 : 2) -#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 2) -#define CCALL_SPS_EXTRA 7 -#define CCALL_SPS_FREE 1 - -typedef intptr_t GPRArg; -typedef union FPRArg { - double d; - struct { LJ_ENDIAN_LOHI(float f; , float g;) }; -} FPRArg; - -#elif LJ_TARGET_MIPS64 - -/* FP args are positional and overlay the GPR array. */ -#define CCALL_NARG_GPR 8 -#define CCALL_NARG_FPR 0 -#define CCALL_NRET_GPR 2 -#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 2) -#define CCALL_SPS_EXTRA 3 -#define CCALL_SPS_FREE 1 - -typedef intptr_t GPRArg; -typedef union FPRArg { - double d; - struct { LJ_ENDIAN_LOHI(float f; , float g;) }; -} FPRArg; - -#else -#error "Missing calling convention definitions for this architecture" -#endif #ifndef CCALL_SPS_EXTRA #define CCALL_SPS_EXTRA 0 @@ -161,19 +65,8 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState { uint32_t spadj; /* Stack pointer adjustment. */ uint8_t nsp; /* Number of stack slots. */ uint8_t retref; /* Return value by reference. */ -#if LJ_TARGET_X64 uint8_t ngpr; /* Number of arguments in GPRs. */ uint8_t nfpr; /* Number of arguments in FPRs. */ -#elif LJ_TARGET_X86 - uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */ -#elif LJ_TARGET_ARM64 - void *retp; /* Aggregate return pointer in x8. */ -#elif LJ_TARGET_PPC - uint8_t nfpr; /* Number of arguments in FPRs. */ -#endif -#if LJ_32 - int32_t align1; -#endif #if CCALL_NUM_FPR FPRArg fpr[CCALL_NUM_FPR]; /* Arguments/results in FPRs. */ #endif @@ -189,6 +82,5 @@ LJ_ASMF void LJ_FASTCALL lj_vm_ffi_call(CCallState *cc); LJ_FUNC CTypeID lj_ccall_ctid_vararg(CTState *cts, cTValue *o); LJ_FUNC int lj_ccall_func(lua_State *L, GCcdata *cd); -#endif #endif diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c index fce6a3ed46..ac5cbd1925 100644 --- a/src/lj_ccallback.c +++ b/src/lj_ccallback.c @@ -5,7 +5,6 @@ #include "lj_obj.h" -#if LJ_HASFFI #include "lj_gc.h" #include "lj_err.h" @@ -25,14 +24,6 @@ #define CALLBACK_MCODE_SIZE (LJ_PAGESIZE * LJ_NUM_CBPAGE) -#if LJ_OS_NOJIT - -/* Callbacks disabled. */ -#define CALLBACK_SLOT2OFS(slot) (0*(slot)) -#define CALLBACK_OFS2SLOT(ofs) (0*(ofs)) -#define CALLBACK_MAX_SLOT 0 - -#elif LJ_TARGET_X86ORX64 #define CALLBACK_MCODE_HEAD (LJ_64 ? 8 : 0) #define CALLBACK_MCODE_GROUP (-2+1+2+(LJ_GC64 ? 10 : 5)+(LJ_64 ? 6 : 5)) @@ -51,34 +42,6 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs) #define CALLBACK_MAX_SLOT \ (((CALLBACK_MCODE_SIZE-CALLBACK_MCODE_HEAD)/(CALLBACK_MCODE_GROUP+4*32))*32) -#elif LJ_TARGET_ARM - -#define CALLBACK_MCODE_HEAD 32 - -#elif LJ_TARGET_ARM64 - -#define CALLBACK_MCODE_HEAD 32 - -#elif LJ_TARGET_PPC - -#define CALLBACK_MCODE_HEAD 24 - -#elif LJ_TARGET_MIPS32 - -#define CALLBACK_MCODE_HEAD 20 - -#elif LJ_TARGET_MIPS64 - -#define CALLBACK_MCODE_HEAD 52 - -#else - -/* Missing support for this architecture. */ -#define CALLBACK_SLOT2OFS(slot) (0*(slot)) -#define CALLBACK_OFS2SLOT(ofs) (0*(ofs)) -#define CALLBACK_MAX_SLOT 0 - -#endif #ifndef CALLBACK_SLOT2OFS #define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot)) @@ -105,18 +68,12 @@ MSize lj_ccallback_ptr2slot(CTState *cts, void *p) } /* Initialize machine code for callback function pointers. */ -#if LJ_OS_NOJIT -/* Disabled callback support. */ -#define callback_mcode_init(g, p) UNUSED(p) -#elif LJ_TARGET_X86ORX64 static void callback_mcode_init(global_State *g, uint8_t *page) { uint8_t *p = page; uint8_t *target = (uint8_t *)(void *)lj_vm_ffi_callback; MSize slot; -#if LJ_64 *(void **)p = target; p += 8; -#endif for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { /* mov al, slot; jmp group */ *p++ = XI_MOVrib | RID_EAX; *p++ = (uint8_t)slot; @@ -124,140 +81,26 @@ static void callback_mcode_init(global_State *g, uint8_t *page) /* push ebp/rbp; mov ah, slot>>8; mov ebp, &g. */ *p++ = XI_PUSH + RID_EBP; *p++ = XI_MOVrib | (RID_EAX+4); *p++ = (uint8_t)(slot >> 8); -#if LJ_GC64 *p++ = 0x48; *p++ = XI_MOVri | RID_EBP; *(uint64_t *)p = (uint64_t)(g); p += 8; -#else - *p++ = XI_MOVri | RID_EBP; - *(int32_t *)p = i32ptr(g); p += 4; -#endif -#if LJ_64 /* jmp [rip-pageofs] where lj_vm_ffi_callback is stored. */ *p++ = XI_GROUP5; *p++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP; *(int32_t *)p = (int32_t)(page-(p+4)); p += 4; -#else - /* jmp lj_vm_ffi_callback. */ - *p++ = XI_JMP; *(int32_t *)p = target-(p+4); p += 4; -#endif } else { *p++ = XI_JMPs; *p++ = (uint8_t)((2+2)*(31-(slot&31)) - 2); } } lua_assert(p - page <= CALLBACK_MCODE_SIZE); } -#elif LJ_TARGET_ARM -static void callback_mcode_init(global_State *g, uint32_t *page) -{ - uint32_t *p = page; - void *target = (void *)lj_vm_ffi_callback; - MSize slot; - /* This must match with the saveregs macro in buildvm_arm.dasc. */ - *p++ = ARMI_SUB|ARMF_D(RID_R12)|ARMF_N(RID_R12)|ARMF_M(RID_PC); - *p++ = ARMI_PUSH|ARMF_N(RID_SP)|RSET_RANGE(RID_R4,RID_R11+1)|RID2RSET(RID_LR); - *p++ = ARMI_SUB|ARMI_K12|ARMF_D(RID_R12)|ARMF_N(RID_R12)|CALLBACK_MCODE_HEAD; - *p++ = ARMI_STR|ARMI_LS_P|ARMI_LS_W|ARMF_D(RID_R12)|ARMF_N(RID_SP)|(CFRAME_SIZE-4*9); - *p++ = ARMI_LDR|ARMI_LS_P|ARMI_LS_U|ARMF_D(RID_R12)|ARMF_N(RID_PC); - *p++ = ARMI_LDR|ARMI_LS_P|ARMI_LS_U|ARMF_D(RID_PC)|ARMF_N(RID_PC); - *p++ = u32ptr(g); - *p++ = u32ptr(target); - for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { - *p++ = ARMI_MOV|ARMF_D(RID_R12)|ARMF_M(RID_PC); - *p = ARMI_B | ((page-p-2) & 0x00ffffffu); - p++; - } - lua_assert(p - page <= CALLBACK_MCODE_SIZE); -} -#elif LJ_TARGET_ARM64 -static void callback_mcode_init(global_State *g, uint32_t *page) -{ - uint32_t *p = page; - void *target = (void *)lj_vm_ffi_callback; - MSize slot; - *p++ = A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4); - *p++ = A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5); - *p++ = A64I_BR | A64F_N(RID_X11); - *p++ = A64I_NOP; - ((void **)p)[0] = target; - ((void **)p)[1] = g; - p += 4; - for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { - *p++ = A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot); - *p = A64I_B | A64F_S26((page-p) & 0x03ffffffu); - p++; - } - lua_assert(p - page <= CALLBACK_MCODE_SIZE); -} -#elif LJ_TARGET_PPC -static void callback_mcode_init(global_State *g, uint32_t *page) -{ - uint32_t *p = page; - void *target = (void *)lj_vm_ffi_callback; - MSize slot; - *p++ = PPCI_LIS | PPCF_T(RID_TMP) | (u32ptr(target) >> 16); - *p++ = PPCI_LIS | PPCF_T(RID_R12) | (u32ptr(g) >> 16); - *p++ = PPCI_ORI | PPCF_A(RID_TMP)|PPCF_T(RID_TMP) | (u32ptr(target) & 0xffff); - *p++ = PPCI_ORI | PPCF_A(RID_R12)|PPCF_T(RID_R12) | (u32ptr(g) & 0xffff); - *p++ = PPCI_MTCTR | PPCF_T(RID_TMP); - *p++ = PPCI_BCTR; - for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { - *p++ = PPCI_LI | PPCF_T(RID_R11) | slot; - *p = PPCI_B | (((page-p) & 0x00ffffffu) << 2); - p++; - } - lua_assert(p - page <= CALLBACK_MCODE_SIZE); -} -#elif LJ_TARGET_MIPS -static void callback_mcode_init(global_State *g, uint32_t *page) -{ - uint32_t *p = page; - uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback; - uintptr_t ug = (uintptr_t)(void *)g; - MSize slot; -#if LJ_TARGET_MIPS32 - *p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (target >> 16); - *p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (ug >> 16); -#else - *p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (target >> 48); - *p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (ug >> 48); - *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | ((target >> 32) & 0xffff); - *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | ((ug >> 32) & 0xffff); - *p++ = MIPSI_DSLL | MIPSF_D(RID_R3)|MIPSF_T(RID_R3) | MIPSF_A(16); - *p++ = MIPSI_DSLL | MIPSF_D(RID_R2)|MIPSF_T(RID_R2) | MIPSF_A(16); - *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | ((target >> 16) & 0xffff); - *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | ((ug >> 16) & 0xffff); - *p++ = MIPSI_DSLL | MIPSF_D(RID_R3)|MIPSF_T(RID_R3) | MIPSF_A(16); - *p++ = MIPSI_DSLL | MIPSF_D(RID_R2)|MIPSF_T(RID_R2) | MIPSF_A(16); -#endif - *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | (target & 0xffff); - *p++ = MIPSI_JR | MIPSF_S(RID_R3); - *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | (ug & 0xffff); - for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { - *p = MIPSI_B | ((page-p-1) & 0x0000ffffu); - p++; - *p++ = MIPSI_LI | MIPSF_T(RID_R1) | slot; - } - lua_assert(p - page <= CALLBACK_MCODE_SIZE); -} -#else -/* Missing support for this architecture. */ -#define callback_mcode_init(g, p) UNUSED(p) -#endif /* -- Machine code management --------------------------------------------- */ -#if LJ_TARGET_WINDOWS - -#define WIN32_LEAN_AND_MEAN -#include - -#elif LJ_TARGET_POSIX #include #ifndef MAP_ANONYMOUS #define MAP_ANONYMOUS MAP_ANON #endif -#endif /* Allocate and initialize area for callback function pointers. */ static void callback_mcode_new(CTState *cts) @@ -266,30 +109,14 @@ static void callback_mcode_new(CTState *cts) void *p; if (CALLBACK_MAX_SLOT == 0) lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); -#if LJ_TARGET_WINDOWS - p = VirtualAlloc(NULL, sz, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); - if (!p) - lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); -#elif LJ_TARGET_POSIX p = mmap(NULL, sz, (PROT_READ|PROT_WRITE), MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); if (p == MAP_FAILED) lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); -#else - /* Fallback allocator. Fails if memory is not executable by default. */ - p = lj_mem_new(cts->L, sz); -#endif cts->cb.mcode = p; callback_mcode_init(cts->g, p); lj_mcode_sync(p, (char *)p + sz); -#if LJ_TARGET_WINDOWS - { - DWORD oprot; - VirtualProtect(p, sz, PAGE_EXECUTE_READ, &oprot); - } -#elif LJ_TARGET_POSIX mprotect(p, sz, (PROT_READ|PROT_EXEC)); -#endif } /* Free area for callback function pointers. */ @@ -298,103 +125,12 @@ void lj_ccallback_mcode_free(CTState *cts) size_t sz = (size_t)CALLBACK_MCODE_SIZE; void *p = cts->cb.mcode; if (p == NULL) return; -#if LJ_TARGET_WINDOWS - VirtualFree(p, 0, MEM_RELEASE); - UNUSED(sz); -#elif LJ_TARGET_POSIX munmap(p, sz); -#else - lj_mem_free(cts->g, p, sz); -#endif } /* -- C callback entry ---------------------------------------------------- */ /* Target-specific handling of register arguments. Similar to lj_ccall.c. */ -#if LJ_TARGET_X86 - -#define CALLBACK_HANDLE_REGARG \ - if (!isfp) { /* Only non-FP values may be passed in registers. */ \ - if (n > 1) { /* Anything > 32 bit is passed on the stack. */ \ - if (!LJ_ABI_WIN) ngpr = maxgpr; /* Prevent reordering. */ \ - } else if (ngpr + 1 <= maxgpr) { \ - sp = &cts->cb.gpr[ngpr]; \ - ngpr += n; \ - goto done; \ - } \ - } - -#elif LJ_TARGET_X64 && LJ_ABI_WIN - -/* Windows/x64 argument registers are strictly positional (use ngpr). */ -#define CALLBACK_HANDLE_REGARG \ - if (isfp) { \ - if (ngpr < maxgpr) { sp = &cts->cb.fpr[ngpr++]; UNUSED(nfpr); goto done; } \ - } else { \ - if (ngpr < maxgpr) { sp = &cts->cb.gpr[ngpr++]; goto done; } \ - } - -#elif LJ_TARGET_X64 - -#define CALLBACK_HANDLE_REGARG \ - if (isfp) { \ - if (nfpr + n <= CCALL_NARG_FPR) { \ - sp = &cts->cb.fpr[nfpr]; \ - nfpr += n; \ - goto done; \ - } \ - } else { \ - if (ngpr + n <= maxgpr) { \ - sp = &cts->cb.gpr[ngpr]; \ - ngpr += n; \ - goto done; \ - } \ - } - -#elif LJ_TARGET_ARM - -#if LJ_ABI_SOFTFP - -#define CALLBACK_HANDLE_REGARG_FP1 UNUSED(isfp); -#define CALLBACK_HANDLE_REGARG_FP2 - -#else - -#define CALLBACK_HANDLE_REGARG_FP1 \ - if (isfp) { \ - if (n == 1) { \ - if (fprodd) { \ - sp = &cts->cb.fpr[fprodd-1]; \ - fprodd = 0; \ - goto done; \ - } else if (nfpr + 1 <= CCALL_NARG_FPR) { \ - sp = &cts->cb.fpr[nfpr++]; \ - fprodd = nfpr; \ - goto done; \ - } \ - } else { \ - if (nfpr + 1 <= CCALL_NARG_FPR) { \ - sp = &cts->cb.fpr[nfpr++]; \ - goto done; \ - } \ - } \ - fprodd = 0; /* No reordering after the first FP value is on stack. */ \ - } else { - -#define CALLBACK_HANDLE_REGARG_FP2 } - -#endif - -#define CALLBACK_HANDLE_REGARG \ - CALLBACK_HANDLE_REGARG_FP1 \ - if (n > 1) ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ - if (ngpr + n <= maxgpr) { \ - sp = &cts->cb.gpr[ngpr]; \ - ngpr += n; \ - goto done; \ - } CALLBACK_HANDLE_REGARG_FP2 - -#elif LJ_TARGET_ARM64 #define CALLBACK_HANDLE_REGARG \ if (isfp) { \ @@ -402,35 +138,8 @@ void lj_ccallback_mcode_free(CTState *cts) sp = &cts->cb.fpr[nfpr]; \ nfpr += n; \ goto done; \ - } else { \ - nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \ } \ } else { \ - if (!LJ_TARGET_IOS && n > 1) \ - ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ - if (ngpr + n <= maxgpr) { \ - sp = &cts->cb.gpr[ngpr]; \ - ngpr += n; \ - goto done; \ - } else { \ - ngpr = CCALL_NARG_GPR; /* Prevent reordering. */ \ - } \ - } - -#elif LJ_TARGET_PPC - -#define CALLBACK_HANDLE_REGARG \ - if (isfp) { \ - if (nfpr + 1 <= CCALL_NARG_FPR) { \ - sp = &cts->cb.fpr[nfpr++]; \ - cta = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \ - goto done; \ - } \ - } else { /* Try to pass argument in GPRs. */ \ - if (n > 1) { \ - lua_assert(ctype_isinteger(cta->info) && n == 2); /* int64_t. */ \ - ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ - } \ if (ngpr + n <= maxgpr) { \ sp = &cts->cb.gpr[ngpr]; \ ngpr += n; \ @@ -438,66 +147,6 @@ void lj_ccallback_mcode_free(CTState *cts) } \ } -#define CALLBACK_HANDLE_RET \ - if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ - *(double *)dp = *(float *)dp; /* FPRs always hold doubles. */ - -#elif LJ_TARGET_MIPS32 - -#define CALLBACK_HANDLE_GPR \ - if (n > 1) ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ - if (ngpr + n <= maxgpr) { \ - sp = &cts->cb.gpr[ngpr]; \ - ngpr += n; \ - goto done; \ - } - -#if !LJ_ABI_SOFTFP /* MIPS32 hard-float */ -#define CALLBACK_HANDLE_REGARG \ - if (isfp && nfpr < CCALL_NARG_FPR) { /* Try to pass argument in FPRs. */ \ - sp = (void *)((uint8_t *)&cts->cb.fpr[nfpr] + ((LJ_BE && n==1) ? 4 : 0)); \ - nfpr++; ngpr += n; \ - goto done; \ - } else { /* Try to pass argument in GPRs. */ \ - nfpr = CCALL_NARG_FPR; \ - CALLBACK_HANDLE_GPR \ - } -#else /* MIPS32 soft-float */ -#define CALLBACK_HANDLE_REGARG \ - CALLBACK_HANDLE_GPR \ - UNUSED(isfp); -#endif - -#define CALLBACK_HANDLE_RET \ - if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ - ((float *)dp)[1] = *(float *)dp; - -#elif LJ_TARGET_MIPS64 - -#if !LJ_ABI_SOFTFP /* MIPS64 hard-float */ -#define CALLBACK_HANDLE_REGARG \ - if (ngpr + n <= maxgpr) { \ - sp = isfp ? (void*) &cts->cb.fpr[ngpr] : (void*) &cts->cb.gpr[ngpr]; \ - ngpr += n; \ - goto done; \ - } -#else /* MIPS64 soft-float */ -#define CALLBACK_HANDLE_REGARG \ - if (ngpr + n <= maxgpr) { \ - UNUSED(isfp); \ - sp = (void*) &cts->cb.gpr[ngpr]; \ - ngpr += n; \ - goto done; \ - } -#endif - -#define CALLBACK_HANDLE_RET \ - if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ - ((float *)dp)[1] = *(float *)dp; - -#else -#error "Missing calling convention definitions for this architecture" -#endif /* Convert and push callback arguments to Lua stack. */ static void callback_conv_args(CTState *cts, lua_State *L) @@ -513,9 +162,6 @@ static void callback_conv_args(CTState *cts, lua_State *L) MSize ngpr = 0, nsp = 0, maxgpr = CCALL_NARG_GPR; #if CCALL_NARG_FPR MSize nfpr = 0; -#if LJ_TARGET_ARM - MSize fprodd = 0; -#endif #endif if (slot < cts->cb.sizeid && (id = cts->cb.cbid[slot]) != 0) { @@ -549,14 +195,6 @@ static void callback_conv_args(CTState *cts, lua_State *L) lj_state_checkstack(L, LUA_MINSTACK); /* May throw. */ o = L->base; /* Might have been reallocated. */ -#if LJ_TARGET_X86 - /* x86 has several different calling conventions. */ - switch (ctype_cconv(ct->info)) { - case CTCC_FASTCALL: maxgpr = 2; break; - case CTCC_THISCALL: maxgpr = 1; break; - default: maxgpr = 0; break; - } -#endif fid = ct->sib; while (fid) { @@ -583,9 +221,6 @@ static void callback_conv_args(CTState *cts, lua_State *L) done: if (LJ_BE && cta->size < CTSIZE_PTR -#if LJ_TARGET_MIPS64 - && !(isfp && nsp) -#endif ) sp = (void *)((uint8_t *)sp + CTSIZE_PTR-cta->size); gcsteps += lj_cconv_tv_ct(cts, cta, 0, o++, sp); @@ -593,16 +228,6 @@ static void callback_conv_args(CTState *cts, lua_State *L) fid = ctf->sib; } L->top = o; -#if LJ_TARGET_X86 - /* Store stack adjustment for returns from non-cdecl callbacks. */ - if (ctype_cconv(ct->info) != CTCC_CDECL) { -#if LJ_FR2 - (L->base-3)->u64 |= (nsp << (16+2)); -#else - (L->base-2)->u32.hi |= (nsp << (16+2)); -#endif - } -#endif while (gcsteps-- > 0) lj_gc_check(L); } @@ -610,14 +235,7 @@ static void callback_conv_args(CTState *cts, lua_State *L) /* Convert Lua object to callback result. */ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) { -#if LJ_FR2 CType *ctr = ctype_raw(cts, (uint16_t)(L->base-3)->u64); -#else - CType *ctr = ctype_raw(cts, (uint16_t)(L->base-2)->u32.hi); -#endif -#if LJ_TARGET_X86 - cts->cb.gpr[2] = 0; -#endif if (!ctype_isvoid(ctr->info)) { uint8_t *dp = (uint8_t *)&cts->cb.gpr[0]; #if CCALL_NUM_FPR @@ -637,16 +255,6 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) *(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp : (int32_t)*(int16_t *)dp; } -#if LJ_TARGET_MIPS64 - /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */ - if (ctr->size <= 4 && - (LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info))) - *(int64_t *)dp = (int64_t)*(int32_t *)dp; -#endif -#if LJ_TARGET_X86 - if (ctype_isfp(ctr->info)) - cts->cb.gpr[2] = ctr->size == sizeof(float) ? 1 : 2; -#endif } } @@ -768,4 +376,3 @@ void *lj_ccallback_new(CTState *cts, CType *ct, GCfunc *fn) return NULL; /* Bad conversion. */ } -#endif diff --git a/src/lj_ccallback.h b/src/lj_ccallback.h index a8cdad3863..aa2af303fa 100644 --- a/src/lj_ccallback.h +++ b/src/lj_ccallback.h @@ -9,7 +9,6 @@ #include "lj_obj.h" #include "lj_ctype.h" -#if LJ_HASFFI /* Really belongs to lj_vm.h. */ LJ_ASMF void lj_vm_ffi_callback(void); @@ -20,6 +19,5 @@ LJ_FUNCA void LJ_FASTCALL lj_ccallback_leave(CTState *cts, TValue *o); LJ_FUNC void *lj_ccallback_new(CTState *cts, CType *ct, GCfunc *fn); LJ_FUNC void lj_ccallback_mcode_free(CTState *cts); -#endif #endif diff --git a/src/lj_cconv.c b/src/lj_cconv.c index ab398adcdf..2156cb670c 100644 --- a/src/lj_cconv.c +++ b/src/lj_cconv.c @@ -5,7 +5,6 @@ #include "lj_obj.h" -#if LJ_HASFFI #include "lj_err.h" #include "lj_tab.h" @@ -749,4 +748,3 @@ void lj_cconv_ct_init(CTState *cts, CType *d, CTSize sz, cconv_err_initov(cts, d); } -#endif diff --git a/src/lj_cconv.h b/src/lj_cconv.h index 0a0b66c909..161dbf2739 100644 --- a/src/lj_cconv.h +++ b/src/lj_cconv.h @@ -9,7 +9,6 @@ #include "lj_obj.h" #include "lj_ctype.h" -#if LJ_HASFFI /* Compressed C type index. ORDER CCX. */ enum { @@ -28,11 +27,7 @@ static LJ_AINLINE uint32_t cconv_idx(CTInfo info) { uint32_t idx = ((info >> 26) & 15u); /* Dispatch bits. */ lua_assert(ctype_type(info) <= CT_MAYCONVERT); -#if LJ_64 idx = ((uint32_t)(U64x(f436fff5,fff7f021) >> 4*idx) & 15u); -#else - idx = (((idx < 8 ? 0xfff7f021u : 0xf436fff5) >> 4*(idx & 7u)) & 15u); -#endif lua_assert(idx < 8); return idx; } @@ -65,6 +60,5 @@ LJ_FUNC int lj_cconv_multi_init(CTState *cts, CType *d, TValue *o); LJ_FUNC void lj_cconv_ct_init(CTState *cts, CType *d, CTSize sz, uint8_t *dp, TValue *o, MSize len); -#endif #endif diff --git a/src/lj_cdata.c b/src/lj_cdata.c index 68e16d76fc..7fb141def6 100644 --- a/src/lj_cdata.c +++ b/src/lj_cdata.c @@ -5,7 +5,6 @@ #include "lj_obj.h" -#if LJ_HASFFI #include "lj_gc.h" #include "lj_err.h" @@ -132,10 +131,6 @@ CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key, uint8_t **pp, idx = (ptrdiff_t)intV(key); goto integer_key; } else if (tvisnum(key)) { /* Numeric key. */ -#ifdef _MSC_VER - /* Workaround for MSVC bug. */ - volatile -#endif lua_Number n = numV(key); idx = LJ_64 ? (ptrdiff_t)n : (ptrdiff_t)lj_num2int(n); integer_key: @@ -296,4 +291,3 @@ void lj_cdata_set(CTState *cts, CType *d, uint8_t *dp, TValue *o, CTInfo qual) lj_cconv_ct_tv(cts, d, dp, o, 0); } -#endif diff --git a/src/lj_cdata.h b/src/lj_cdata.h index 5bb0f5dca2..5ee0976879 100644 --- a/src/lj_cdata.h +++ b/src/lj_cdata.h @@ -10,7 +10,6 @@ #include "lj_gc.h" #include "lj_ctype.h" -#if LJ_HASFFI /* Get C data pointer. */ static LJ_AINLINE void *cdata_getptr(void *p, CTSize sz) @@ -73,6 +72,5 @@ LJ_FUNC int lj_cdata_get(CTState *cts, CType *s, TValue *o, uint8_t *sp); LJ_FUNC void lj_cdata_set(CTState *cts, CType *d, uint8_t *dp, TValue *o, CTInfo qual); -#endif #endif diff --git a/src/lj_clib.c b/src/lj_clib.c index 614265903a..a3f2791f88 100644 --- a/src/lj_clib.c +++ b/src/lj_clib.c @@ -5,7 +5,6 @@ #include "lj_obj.h" -#if LJ_HASFFI #include "lj_gc.h" #include "lj_err.h" @@ -20,7 +19,6 @@ /* -- OS-specific functions ----------------------------------------------- */ -#if LJ_TARGET_DLOPEN #include #include @@ -40,16 +38,10 @@ LJ_NORET LJ_NOINLINE static void clib_error_(lua_State *L) #define clib_error(L, fmt, name) clib_error_(L) -#if LJ_TARGET_CYGWIN -#define CLIB_SOPREFIX "cyg" -#else #define CLIB_SOPREFIX "lib" -#endif #if LJ_TARGET_OSX #define CLIB_SOEXT "%s.dylib" -#elif LJ_TARGET_CYGWIN -#define CLIB_SOEXT "%s.dll" #else #define CLIB_SOEXT "%s.so" #endif @@ -57,17 +49,10 @@ LJ_NORET LJ_NOINLINE static void clib_error_(lua_State *L) static const char *clib_extname(lua_State *L, const char *name) { if (!strchr(name, '/') -#if LJ_TARGET_CYGWIN - && !strchr(name, '\\') -#endif ) { if (!strchr(name, '.')) { name = lj_strfmt_pushf(L, CLIB_SOEXT, name); L->top--; -#if LJ_TARGET_CYGWIN - } else { - return name; -#endif } if (!(name[0] == CLIB_SOPREFIX[0] && name[1] == CLIB_SOPREFIX[1] && name[2] == CLIB_SOPREFIX[2])) { @@ -142,177 +127,9 @@ static void *clib_getsym(CLibrary *cl, const char *name) return p; } -#elif LJ_TARGET_WINDOWS - -#define WIN32_LEAN_AND_MEAN -#include - -#ifndef GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS -#define GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS 4 -#define GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT 2 -BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*); -#endif - -#define CLIB_DEFHANDLE ((void *)-1) - -/* Default libraries. */ -enum { - CLIB_HANDLE_EXE, - CLIB_HANDLE_DLL, - CLIB_HANDLE_CRT, - CLIB_HANDLE_KERNEL32, - CLIB_HANDLE_USER32, - CLIB_HANDLE_GDI32, - CLIB_HANDLE_MAX -}; - -static void *clib_def_handle[CLIB_HANDLE_MAX]; - -LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt, - const char *name) -{ - DWORD err = GetLastError(); -#if LJ_TARGET_XBOXONE - wchar_t wbuf[128]; - char buf[128*2]; - if (!FormatMessageW(FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_FROM_SYSTEM, - NULL, err, 0, wbuf, sizeof(wbuf)/sizeof(wchar_t), NULL) || - !WideCharToMultiByte(CP_ACP, 0, wbuf, 128, buf, 128*2, NULL, NULL)) -#else - char buf[128]; - if (!FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_FROM_SYSTEM, - NULL, err, 0, buf, sizeof(buf), NULL)) -#endif - buf[0] = '\0'; - lj_err_callermsg(L, lj_strfmt_pushf(L, fmt, name, buf)); -} - -static int clib_needext(const char *s) -{ - while (*s) { - if (*s == '/' || *s == '\\' || *s == '.') return 0; - s++; - } - return 1; -} - -static const char *clib_extname(lua_State *L, const char *name) -{ - if (clib_needext(name)) { - name = lj_strfmt_pushf(L, "%s.dll", name); - L->top--; - } - return name; -} - -static void *clib_loadlib(lua_State *L, const char *name, int global) -{ - DWORD oldwerr = GetLastError(); - void *h = (void *)LoadLibraryExA(clib_extname(L, name), NULL, 0); - if (!h) clib_error(L, "cannot load module " LUA_QS ": %s", name); - SetLastError(oldwerr); - UNUSED(global); - return h; -} - -static void clib_unloadlib(CLibrary *cl) -{ - if (cl->handle == CLIB_DEFHANDLE) { - MSize i; - for (i = CLIB_HANDLE_KERNEL32; i < CLIB_HANDLE_MAX; i++) { - void *h = clib_def_handle[i]; - if (h) { - clib_def_handle[i] = NULL; - FreeLibrary((HINSTANCE)h); - } - } - } else if (cl->handle) { - FreeLibrary((HINSTANCE)cl->handle); - } -} - -static void *clib_getsym(CLibrary *cl, const char *name) -{ - void *p = NULL; - if (cl->handle == CLIB_DEFHANDLE) { /* Search default libraries. */ - MSize i; - for (i = 0; i < CLIB_HANDLE_MAX; i++) { - HINSTANCE h = (HINSTANCE)clib_def_handle[i]; - if (!(void *)h) { /* Resolve default library handles (once). */ - switch (i) { - case CLIB_HANDLE_EXE: GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, NULL, &h); break; - case CLIB_HANDLE_DLL: - GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, - (const char *)clib_def_handle, &h); - break; - case CLIB_HANDLE_CRT: - GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, - (const char *)&_fmode, &h); - break; - case CLIB_HANDLE_KERNEL32: h = LoadLibraryExA("kernel32.dll", NULL, 0); break; - case CLIB_HANDLE_USER32: h = LoadLibraryExA("user32.dll", NULL, 0); break; - case CLIB_HANDLE_GDI32: h = LoadLibraryExA("gdi32.dll", NULL, 0); break; - } - if (!h) continue; - clib_def_handle[i] = (void *)h; - } - p = (void *)GetProcAddress(h, name); - if (p) break; - } - } else { - p = (void *)GetProcAddress((HINSTANCE)cl->handle, name); - } - return p; -} - -#else - -#define CLIB_DEFHANDLE NULL - -LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt, - const char *name) -{ - lj_err_callermsg(L, lj_strfmt_pushf(L, fmt, name, "no support for this OS")); -} - -static void *clib_loadlib(lua_State *L, const char *name, int global) -{ - lj_err_callermsg(L, "no support for loading dynamic libraries for this OS"); - UNUSED(name); UNUSED(global); - return NULL; -} - -static void clib_unloadlib(CLibrary *cl) -{ - UNUSED(cl); -} - -static void *clib_getsym(CLibrary *cl, const char *name) -{ - UNUSED(cl); UNUSED(name); - return NULL; -} - -#endif /* -- C library indexing -------------------------------------------------- */ -#if LJ_TARGET_X86 && LJ_ABI_WIN -/* Compute argument size for fastcall/stdcall functions. */ -static CTSize clib_func_argsize(CTState *cts, CType *ct) -{ - CTSize n = 0; - while (ct->sib) { - CType *d; - ct = ctype_get(cts, ct->sib); - if (ctype_isfield(ct->info)) { - d = ctype_rawchild(cts, ct); - n += ((d->size + 3) & ~3); - } - } - return n; -} -#endif /* Get redirected or mangled external symbol. */ static const char *clib_extsym(CTState *cts, CType *ct, GCstr *name) @@ -344,31 +161,11 @@ TValue *lj_clib_index(lua_State *L, CLibrary *cl, GCstr *name) setintV(tv, (int32_t)ct->size); } else { const char *sym = clib_extsym(cts, ct, name); -#if LJ_TARGET_WINDOWS - DWORD oldwerr = GetLastError(); -#endif void *p = clib_getsym(cl, sym); GCcdata *cd; lua_assert(ctype_isfunc(ct->info) || ctype_isextern(ct->info)); -#if LJ_TARGET_X86 && LJ_ABI_WIN - /* Retry with decorated name for fastcall/stdcall functions. */ - if (!p && ctype_isfunc(ct->info)) { - CTInfo cconv = ctype_cconv(ct->info); - if (cconv == CTCC_FASTCALL || cconv == CTCC_STDCALL) { - CTSize sz = clib_func_argsize(cts, ct); - const char *symd = lj_strfmt_pushf(L, - cconv == CTCC_FASTCALL ? "@%s@%d" : "_%s@%d", - sym, sz); - L->top--; - p = clib_getsym(cl, symd); - } - } -#endif if (!p) clib_error(L, "cannot resolve symbol " LUA_QS ": %s", sym); -#if LJ_TARGET_WINDOWS - SetLastError(oldwerr); -#endif cd = lj_cdata_new(cts, id, CTSIZE_PTR); *(void **)cdataptr(cd) = p; setcdataV(L, tv, cd); @@ -415,4 +212,3 @@ void lj_clib_default(lua_State *L, GCtab *mt) cl->handle = CLIB_DEFHANDLE; } -#endif diff --git a/src/lj_clib.h b/src/lj_clib.h index fcc9dac592..0cf636ca7e 100644 --- a/src/lj_clib.h +++ b/src/lj_clib.h @@ -8,7 +8,6 @@ #include "lj_obj.h" -#if LJ_HASFFI /* Namespace for C library indexing. */ #define CLNS_INDEX ((1u<stack[decl->pos]; if (ctype_isfunc(ct->info)) { /* Ok to modify in-place. */ -#if LJ_TARGET_X86 - if ((decl->fattr & CTFP_CCONV)) - ct->info = (ct->info & (CTMASK_NUM|CTF_VARARG|CTMASK_CID)) + - (decl->fattr & ~CTMASK_CID); -#endif } else { if ((decl->attr & CTFP_ALIGNED) && !(decl->mode & CPARSE_MODE_FIELD)) cp_push(decl, CTINFO(CT_ATTRIB, CTATTRIB(CTA_ALIGN)), @@ -1075,32 +1069,6 @@ static void cp_decl_gccattribute(CPState *cp, CPDecl *decl) if (vsize) CTF_INSERT(decl->attr, VSIZEP, lj_fls(vsize)); } break; -#if LJ_TARGET_X86 - case H_(5ad22db8,c689b848): case H_(439150fa,65ea78cb): /* regparm */ - CTF_INSERT(decl->fattr, REGPARM, cp_decl_sizeattr(cp)); - decl->fattr |= CTFP_CCONV; - break; - case H_(18fc0b98,7ff4c074): case H_(4e62abed,0a747424): /* cdecl */ - CTF_INSERT(decl->fattr, CCONV, CTCC_CDECL); - decl->fattr |= CTFP_CCONV; - break; - case H_(72b2e41b,494c5a44): case H_(f2356d59,f25fc9bd): /* thiscall */ - CTF_INSERT(decl->fattr, CCONV, CTCC_THISCALL); - decl->fattr |= CTFP_CCONV; - break; - case H_(0d0ffc42,ab746f88): case H_(21c54ba1,7f0ca7e3): /* fastcall */ - CTF_INSERT(decl->fattr, CCONV, CTCC_FASTCALL); - decl->fattr |= CTFP_CCONV; - break; - case H_(ef76b040,9412e06a): case H_(de56697b,c750e6e1): /* stdcall */ - CTF_INSERT(decl->fattr, CCONV, CTCC_STDCALL); - decl->fattr |= CTFP_CCONV; - break; - case H_(ea78b622,f234bd8e): case H_(252ffb06,8d50f34b): /* sseregparm */ - decl->fattr |= CTF_SSEREGPARM; - decl->fattr |= CTFP_CCONV; - break; -#endif default: /* Skip all other attributes. */ goto skip_attr; } @@ -1156,15 +1124,9 @@ static void cp_decl_attributes(CPState *cp, CPDecl *decl) case CTOK_ASM: cp_decl_asm(cp, decl); continue; case CTOK_DECLSPEC: cp_decl_msvcattribute(cp, decl); continue; case CTOK_CCDECL: -#if LJ_TARGET_X86 - CTF_INSERT(decl->fattr, CCONV, cp->ct->size); - decl->fattr |= CTFP_CCONV; -#endif break; case CTOK_PTRSZ: -#if LJ_64 CTF_INSERT(decl->attr, MSIZEP, cp->ct->size); -#endif break; default: return; } @@ -1216,19 +1178,6 @@ static CTSize cp_field_align(CPState *cp, CType *ct, CTInfo info) { CTSize align = ctype_align(info); UNUSED(cp); UNUSED(ct); -#if (LJ_TARGET_X86 && !LJ_ABI_WIN) || (LJ_TARGET_ARM && __APPLE__) - /* The SYSV i386 and iOS ABIs limit alignment of non-vector fields to 2^2. */ - if (align > 2 && !(info & CTFP_ALIGNED)) { - if (ctype_isarray(info) && !(info & CTF_VECTOR)) { - do { - ct = ctype_rawchild(cp->cts, ct); - info = ct->info; - } while (ctype_isarray(info) && !(info & CTF_VECTOR)); - } - if (ctype_isnum(info) || ctype_isenum(info)) - align = 2; - } -#endif return align; } @@ -1286,11 +1235,7 @@ static void cp_struct_layout(CPState *cp, CTypeID sid, CTInfo sattr) ct->info = CTINFO(CT_BITFIELD, (info & (CTF_QUAL|CTF_UNSIGNED|CTF_BOOL)) + (csz << (CTSHIFT_BITCSZ-3)) + (bsz << CTSHIFT_BITBSZ)); -#if LJ_BE - ct->info += ((csz - (bofs & (csz-1)) - bsz) << CTSHIFT_BITPOS); -#else ct->info += ((bofs & (csz-1)) << CTSHIFT_BITPOS); -#endif ct->size = ((bofs & ~(csz-1)) >> 3); /* Store container offset. */ } } @@ -1646,12 +1591,10 @@ static void cp_declarator(CPState *cp, CPDecl *decl) cp_decl_attributes(cp, decl); sz = CTSIZE_PTR; info = CTINFO(CT_PTR, CTALIGN_PTR); -#if LJ_64 if (ctype_msizeP(decl->attr) == 4) { sz = 4; info = CTINFO(CT_PTR, CTALIGN(2)); } -#endif info += (decl->attr & (CTF_QUAL|CTF_REF)); decl->attr &= ~(CTF_QUAL|(CTMASK_MSIZEP<ctypeid; @@ -716,12 +707,8 @@ static TRef crec_reassoc_ofs(jit_State *J, TRef tr, ptrdiff_t *ofsp, MSize sz) static void crec_tailcall(jit_State *J, RecordFFData *rd, cTValue *tv) { TRef kfunc = lj_ir_kfunc(J, funcV(tv)); -#if LJ_FR2 J->base[-2] = kfunc; J->base[-1] = TREF_FRAME; -#else - J->base[-1] = kfunc | TREF_FRAME; -#endif rd->nres = -1; /* Pending tailcall. */ } @@ -780,17 +767,6 @@ void LJ_FASTCALL recff_cdata_index(jit_State *J, RecordFFData *rd) idx = emitir(IRT(IR_BAND, IRT_INTP), idx, lj_ir_kintp(J, 1)); sz = lj_ctype_size(cts, (sid = ctype_cid(ct->info))); idx = crec_reassoc_ofs(J, idx, &ofs, sz); -#if LJ_TARGET_ARM || LJ_TARGET_PPC - /* Hoist base add to allow fusion of index/shift into operands. */ - if (LJ_LIKELY(J->flags & JIT_F_OPT_LOOP) && ofs -#if LJ_TARGET_ARM - && (sz == 1 || sz == 4) -#endif - ) { - ptr = emitir(IRT(IR_ADD, IRT_PTR), ptr, lj_ir_kintp(J, ofs)); - ofs = 0; - } -#endif idx = emitir(IRT(IR_MUL, IRT_INTP), idx, lj_ir_kintp(J, sz)); ptr = emitir(IRT(IR_ADD, IRT_PTR), idx, ptr); } @@ -1037,16 +1013,6 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd, MSize i, n; TRef tr, *base; cTValue *o; -#if LJ_TARGET_X86 -#if LJ_ABI_WIN - TRef *arg0 = NULL, *arg1 = NULL; -#endif - int ngpr = 0; - if (ctype_cconv(ct->info) == CTCC_THISCALL) - ngpr = 1; - else if (ctype_cconv(ct->info) == CTCC_FASTCALL) - ngpr = 2; -#endif /* Skip initial attributes. */ fid = ct->sib; @@ -1088,35 +1054,6 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd, } else if (LJ_SOFTFP && ctype_isfp(d->info) && d->size > 4) { lj_needsplit(J); } -#if LJ_TARGET_X86 - /* 64 bit args must not end up in registers for fastcall/thiscall. */ -#if LJ_ABI_WIN - if (!ctype_isfp(d->info)) { - /* Sigh, the Windows/x86 ABI allows reordering across 64 bit args. */ - if (tref_typerange(tr, IRT_I64, IRT_U64)) { - if (ngpr) { - arg0 = &args[n]; args[n++] = TREF_NIL; ngpr--; - if (ngpr) { - arg1 = &args[n]; args[n++] = TREF_NIL; ngpr--; - } - } - } else { - if (arg0) { *arg0 = tr; arg0 = NULL; n--; continue; } - if (arg1) { *arg1 = tr; arg1 = NULL; n--; continue; } - if (ngpr) ngpr--; - } - } -#else - if (!ctype_isfp(d->info) && ngpr) { - if (tref_typerange(tr, IRT_I64, IRT_U64)) { - /* No reordering for other x86 ABIs. Simply add alignment args. */ - do { args[n++] = TREF_NIL; } while (--ngpr); - } else { - ngpr--; - } - } -#endif -#endif args[n] = tr; } tr = args[0]; @@ -1176,9 +1113,6 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd) lj_trace_err(J, LJ_TRERR_NYICALL); } if ((ct->info & CTF_VARARG) -#if LJ_TARGET_X86 - || ctype_cconv(ct->info) != CTCC_CDECL -#endif ) func = emitir(IRT(IR_CARG, IRT_NIL), func, lj_ir_kint(J, ctype_typeid(cts, ct))); @@ -1189,12 +1123,8 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd) tr = TREF_NIL; } else { crec_snap_caller(J); -#if LJ_TARGET_X86ORX64 /* Note: only the x86/x64 backend supports U8 and only for EQ(tr, 0). */ lj_ir_set(J, IRTG(IR_NE, IRT_U8), tr, lj_ir_kint(J, 0)); -#else - lj_ir_set(J, IRTGI(IR_NE), tr, lj_ir_kint(J, 0)); -#endif J->postproc = LJ_POST_FIXGUARDSNAP; tr = TREF_TRUE; } @@ -1318,9 +1248,7 @@ static TRef crec_arith_ptr(jit_State *J, TRef *sp, CType **s, MMS mm) return 0; /* NYI: integer division. */ tr = emitir(IRT(IR_SUB, IRT_INTP), sp[0], sp[1]); tr = emitir(IRT(IR_BSAR, IRT_INTP), tr, lj_ir_kint(J, lj_fls(sz))); -#if LJ_64 tr = emitconv(tr, IRT_NUM, IRT_INTP, 0); -#endif return tr; } else { /* Pointer comparison (unsigned). */ /* Assume true comparison. Fixup and emit pending guard later. */ @@ -1344,18 +1272,11 @@ static TRef crec_arith_ptr(jit_State *J, TRef *sp, CType **s, MMS mm) IRType t = tref_type(tr); CTSize sz = lj_ctype_size(cts, ctype_cid(ctp->info)); CTypeID id; -#if LJ_64 if (t == IRT_NUM || t == IRT_FLOAT) tr = emitconv(tr, IRT_INTP, t, IRCONV_ANY); else if (!(t == IRT_I64 || t == IRT_U64)) tr = emitconv(tr, IRT_INTP, IRT_INT, ((t - IRT_I8) & 1) ? 0 : IRCONV_SEXT); -#else - if (!tref_typerange(sp[1], IRT_I8, IRT_U32)) { - tr = emitconv(tr, IRT_INTP, t, - (t == IRT_NUM || t == IRT_FLOAT) ? IRCONV_ANY : 0); - } -#endif tr = emitir(IRT(IR_MUL, IRT_INTP), tr, lj_ir_kintp(J, sz)); tr = emitir(IRT(mm+(int)IR_ADD-(int)MM_add, IRT_PTR), sp[0], tr); id = lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|ctype_cid(ctp->info)), @@ -1842,4 +1763,3 @@ void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd) #undef emitir #undef emitconv -#endif diff --git a/src/lj_crecord.h b/src/lj_crecord.h index c165def475..cd6ad9f506 100644 --- a/src/lj_crecord.h +++ b/src/lj_crecord.h @@ -10,7 +10,6 @@ #include "lj_jit.h" #include "lj_ffrecord.h" -#if LJ_HASJIT && LJ_HASFFI LJ_FUNC void LJ_FASTCALL recff_cdata_index(jit_State *J, RecordFFData *rd); LJ_FUNC void LJ_FASTCALL recff_cdata_call(jit_State *J, RecordFFData *rd); LJ_FUNC void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData *rd); @@ -33,6 +32,5 @@ LJ_FUNC int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd); LJ_FUNC TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr); LJ_FUNC void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd); -#endif #endif diff --git a/src/lj_ctype.c b/src/lj_ctype.c index 0ea89c7486..06a75abff0 100644 --- a/src/lj_ctype.c +++ b/src/lj_ctype.c @@ -5,7 +5,6 @@ #include "lj_obj.h" -#if LJ_HASFFI #include "lj_gc.h" #include "lj_err.h" @@ -634,4 +633,3 @@ void lj_ctype_freestate(global_State *g) } } -#endif diff --git a/src/lj_ctype.h b/src/lj_ctype.h index 0c220a8886..245a1b6528 100644 --- a/src/lj_ctype.h +++ b/src/lj_ctype.h @@ -9,7 +9,6 @@ #include "lj_obj.h" #include "lj_gc.h" -#if LJ_HASFFI /* -- C type definitions -------------------------------------------------- */ @@ -247,13 +246,8 @@ typedef struct CTState { CTINFO(CT_ATTRIB, CTATTRIB(at))) /* Target-dependent sizes and alignments. */ -#if LJ_64 #define CTSIZE_PTR 8 #define CTALIGN_PTR CTALIGN(3) -#else -#define CTSIZE_PTR 4 -#define CTALIGN_PTR CTALIGN(2) -#endif #define CTINFO_REF(ref) \ CTINFO(CT_PTR, (CTF_CONST|CTF_REF|CTALIGN_PTR) + (ref)) @@ -263,12 +257,7 @@ typedef struct CTState { /* -- Predefined types ---------------------------------------------------- */ /* Target-dependent types. */ -#if LJ_TARGET_PPC -#define CTTYDEFP(_) \ - _(LINT32, 4, CT_NUM, CTF_LONG|CTALIGN(2)) -#else #define CTTYDEFP(_) -#endif /* Common types. */ #define CTTYDEF(_) \ @@ -307,21 +296,10 @@ CTTYDEF(CTTYIDDEF) }; /* Target-dependent type IDs. */ -#if LJ_64 #define CTID_INT_PSZ CTID_INT64 #define CTID_UINT_PSZ CTID_UINT64 -#else -#define CTID_INT_PSZ CTID_INT32 -#define CTID_UINT_PSZ CTID_UINT32 -#endif -#if LJ_ABI_WIN -#define CTID_WCHAR CTID_UINT16 -#elif LJ_TARGET_PPC -#define CTID_WCHAR CTID_LINT32 -#else #define CTID_WCHAR CTID_INT32 -#endif /* -- C tokens and keywords ----------------------------------------------- */ @@ -456,6 +434,5 @@ LJ_FUNC GCstr *lj_ctype_repr_complex(lua_State *L, void *sp, CTSize size); LJ_FUNC CTState *lj_ctype_init(lua_State *L); LJ_FUNC void lj_ctype_freestate(global_State *g); -#endif #endif diff --git a/src/lj_debug.c b/src/lj_debug.c index 959dc289c7..5f624d5b68 100644 --- a/src/lj_debug.c +++ b/src/lj_debug.c @@ -15,9 +15,7 @@ #include "lj_frame.h" #include "lj_bc.h" #include "lj_strfmt.h" -#if LJ_HASJIT #include "lj_jit.h" -#endif /* -- Frames -------------------------------------------------------------- */ @@ -97,13 +95,11 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe) } pt = funcproto(fn); pos = proto_bcpos(pt, ins) - 1; -#if LJ_HASJIT if (pos > pt->sizebc) { /* Undo the effects of lj_trace_exit for JLOOP. */ GCtrace *T = (GCtrace *)((char *)(ins-1) - offsetof(GCtrace, startins)); lua_assert(bc_isret(bc_op(ins[-1]))); pos = proto_bcpos(pt, mref(T->startpc, const BCIns)); } -#endif return pos; } @@ -541,110 +537,6 @@ LUA_API int lua_getstack(lua_State *L, int level, lua_Debug *ar) } } -#if LJ_HASPROFILE -/* Put the chunkname into a buffer. */ -static int debug_putchunkname(SBuf *sb, GCproto *pt, int pathstrip) -{ - GCstr *name = proto_chunkname(pt); - const char *p = strdata(name); - if (pt->firstline == ~(BCLine)0) { - lj_buf_putmem(sb, "[builtin:", 9); - lj_buf_putstr(sb, name); - lj_buf_putb(sb, ']'); - return 0; - } - if (*p == '=' || *p == '@') { - MSize len = name->len-1; - p++; - if (pathstrip) { - int i; - for (i = len-1; i >= 0; i--) - if (p[i] == '/' || p[i] == '\\') { - len -= i+1; - p = p+i+1; - break; - } - } - lj_buf_putmem(sb, p, len); - } else { - lj_buf_putmem(sb, "[string]", 8); - } - return 1; -} - -/* Put a compact stack dump into a buffer. */ -void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt, int depth) -{ - int level = 0, dir = 1, pathstrip = 1; - MSize lastlen = 0; - if (depth < 0) { level = ~depth; depth = dir = -1; } /* Reverse frames. */ - while (level != depth) { /* Loop through all frame. */ - int size; - cTValue *frame = lj_debug_frame(L, level, &size); - if (frame) { - cTValue *nextframe = size ? frame+size : NULL; - GCfunc *fn = frame_func(frame); - const uint8_t *p = (const uint8_t *)fmt; - int c; - while ((c = *p++)) { - switch (c) { - case 'p': /* Preserve full path. */ - pathstrip = 0; - break; - case 'F': case 'f': { /* Dump function name. */ - const char *name; - const char *what = lj_debug_funcname(L, frame, &name); - if (what) { - if (c == 'F' && isluafunc(fn)) { /* Dump module:name for 'F'. */ - GCproto *pt = funcproto(fn); - if (pt->firstline != ~(BCLine)0) { /* Not a bytecode builtin. */ - debug_putchunkname(sb, pt, pathstrip); - lj_buf_putb(sb, ':'); - } - } - lj_buf_putmem(sb, name, (MSize)strlen(name)); - break; - } /* else: can't derive a name, dump module:line. */ - } - /* fallthrough */ - case 'l': /* Dump module:line. */ - if (isluafunc(fn)) { - GCproto *pt = funcproto(fn); - if (debug_putchunkname(sb, pt, pathstrip)) { - /* Regular Lua function. */ - BCLine line = c == 'l' ? debug_frameline(L, fn, nextframe) : - pt->firstline; - lj_buf_putb(sb, ':'); - lj_strfmt_putint(sb, line >= 0 ? line : pt->firstline); - } - } else if (isffunc(fn)) { /* Dump numbered builtins. */ - lj_buf_putmem(sb, "[builtin#", 9); - lj_strfmt_putint(sb, fn->c.ffid); - lj_buf_putb(sb, ']'); - } else { /* Dump C function address. */ - lj_buf_putb(sb, '@'); - lj_strfmt_putptr(sb, fn->c.f); - } - break; - case 'Z': /* Zap trailing separator. */ - lastlen = sbuflen(sb); - break; - default: - lj_buf_putb(sb, c); - break; - } - } - } else if (dir == 1) { - break; - } else { - level -= size; /* Reverse frame order: quickly skip missing level. */ - } - level += dir; - } - if (lastlen) - setsbufP(sb, sbufB(sb) + lastlen); /* Zap trailing separator. */ -} -#endif /* Number of frames for the leading and trailing part of a traceback. */ #define TRACEBACK_LEVELS1 12 diff --git a/src/lj_debug.h b/src/lj_debug.h index 5917c00bc6..217a07a80e 100644 --- a/src/lj_debug.h +++ b/src/lj_debug.h @@ -40,10 +40,6 @@ LJ_FUNC void lj_debug_addloc(lua_State *L, const char *msg, LJ_FUNC void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc); LJ_FUNC int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, int ext); -#if LJ_HASPROFILE -LJ_FUNC void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt, - int depth); -#endif /* Fixed internal variable names. */ #define VARNAMEDEF(_) \ diff --git a/src/lj_def.h b/src/lj_def.h index 2d8fff66f3..cbc40f82ce 100644 --- a/src/lj_def.h +++ b/src/lj_def.h @@ -8,38 +8,7 @@ #include "lua.h" -#if defined(_MSC_VER) -/* MSVC is stuck in the last century and doesn't have C99's stdint.h. */ -typedef __int8 int8_t; -typedef __int16 int16_t; -typedef __int32 int32_t; -typedef __int64 int64_t; -typedef unsigned __int8 uint8_t; -typedef unsigned __int16 uint16_t; -typedef unsigned __int32 uint32_t; -typedef unsigned __int64 uint64_t; -#ifdef _WIN64 -typedef __int64 intptr_t; -typedef unsigned __int64 uintptr_t; -#else -typedef __int32 intptr_t; -typedef unsigned __int32 uintptr_t; -#endif -#elif defined(__symbian__) -/* Cough. */ -typedef signed char int8_t; -typedef short int int16_t; -typedef int int32_t; -typedef long long int64_t; -typedef unsigned char uint8_t; -typedef unsigned short int uint16_t; -typedef unsigned int uint32_t; -typedef unsigned long long uint64_t; -typedef int intptr_t; -typedef unsigned int uintptr_t; -#else #include -#endif /* Needed everywhere. */ #include @@ -129,18 +98,13 @@ typedef uintptr_t BloomFilter; #define LJ_NOINLINE __attribute__((noinline)) #if defined(__ELF__) || defined(__MACH__) || defined(__psp2__) -#if !((defined(__sun__) && defined(__svr4__)) || defined(__CELLOS_LV2__)) #define LJ_NOAPI extern __attribute__((visibility("hidden"))) #endif -#endif /* Note: it's only beneficial to use fastcall on x86 and then only for up to ** two non-FP args. The amalgamated compile covers all LJ_FUNC cases. Only ** indirect calls and related tail-called C functions are marked as fastcall. */ -#if defined(__i386__) -#define LJ_FASTCALL __attribute__((fastcall)) -#endif #define LJ_LIKELY(x) __builtin_expect(!!(x), 1) #define LJ_UNLIKELY(x) __builtin_expect(!!(x), 0) @@ -156,33 +120,7 @@ static LJ_AINLINE uint32_t lj_fls(uint32_t x) #define lj_fls(x) ((uint32_t)(__builtin_clz(x)^31)) #endif -#if defined(__arm__) -static LJ_AINLINE uint32_t lj_bswap(uint32_t x) -{ -#if defined(__psp2__) - return __builtin_rev(x); -#else - uint32_t r; -#if __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6T2__ || __ARM_ARCH_6Z__ ||\ - __ARM_ARCH_6ZK__ || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ - __asm__("rev %0, %1" : "=r" (r) : "r" (x)); - return r; -#else -#ifdef __thumb__ - r = x ^ lj_ror(x, 16); -#else - __asm__("eor %0, %1, %1, ror #16" : "=r" (r) : "r" (x)); -#endif - return ((r & 0xff00ffffu) >> 8) ^ lj_ror(x, 8); -#endif -#endif -} - -static LJ_AINLINE uint64_t lj_bswap64(uint64_t x) -{ - return ((uint64_t)lj_bswap((uint32_t)x)<<32) | lj_bswap((uint32_t)(x>>32)); -} -#elif (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) +#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) static LJ_AINLINE uint32_t lj_bswap(uint32_t x) { return (uint32_t)__builtin_bswap32((int32_t)x); @@ -198,17 +136,10 @@ static LJ_AINLINE uint32_t lj_bswap(uint32_t x) uint32_t r; __asm__("bswap %0" : "=r" (r) : "0" (x)); return r; } -#if defined(__i386__) -static LJ_AINLINE uint64_t lj_bswap64(uint64_t x) -{ - return ((uint64_t)lj_bswap((uint32_t)x)<<32) | lj_bswap((uint32_t)(x>>32)); -} -#else static LJ_AINLINE uint64_t lj_bswap64(uint64_t x) { uint64_t r; __asm__("bswap %0" : "=r" (r) : "0" (x)); return r; } -#endif #else static LJ_AINLINE uint32_t lj_bswap(uint32_t x) { @@ -244,67 +175,6 @@ static LJ_AINLINE uint32_t lj_getu32(const void *p) return ((const Unaligned32 *)p)->u; } -#elif defined(_MSC_VER) - -#define LJ_NORET __declspec(noreturn) -#define LJ_ALIGN(n) __declspec(align(n)) -#define LJ_INLINE __inline -#define LJ_AINLINE __forceinline -#define LJ_NOINLINE __declspec(noinline) -#if defined(_M_IX86) -#define LJ_FASTCALL __fastcall -#endif - -#ifdef _M_PPC -unsigned int _CountLeadingZeros(long); -#pragma intrinsic(_CountLeadingZeros) -static LJ_AINLINE uint32_t lj_fls(uint32_t x) -{ - return _CountLeadingZeros(x) ^ 31; -} -#else -unsigned char _BitScanForward(uint32_t *, unsigned long); -unsigned char _BitScanReverse(uint32_t *, unsigned long); -#pragma intrinsic(_BitScanForward) -#pragma intrinsic(_BitScanReverse) - -static LJ_AINLINE uint32_t lj_ffs(uint32_t x) -{ - uint32_t r; _BitScanForward(&r, x); return r; -} - -static LJ_AINLINE uint32_t lj_fls(uint32_t x) -{ - uint32_t r; _BitScanReverse(&r, x); return r; -} -#endif - -unsigned long _byteswap_ulong(unsigned long); -uint64_t _byteswap_uint64(uint64_t); -#define lj_bswap(x) (_byteswap_ulong((x))) -#define lj_bswap64(x) (_byteswap_uint64((x))) - -#if defined(_M_PPC) && defined(LUAJIT_NO_UNALIGNED) -/* -** Replacement for unaligned loads on Xbox 360. Disabled by default since it's -** usually more costly than the occasional stall when crossing a cache-line. -*/ -static LJ_AINLINE uint16_t lj_getu16(const void *v) -{ - const uint8_t *p = (const uint8_t *)v; - return (uint16_t)((p[0]<<8) | p[1]); -} -static LJ_AINLINE uint32_t lj_getu32(const void *v) -{ - const uint8_t *p = (const uint8_t *)v; - return (uint32_t)((p[0]<<24) | (p[1]<<16) | (p[2]<<8) | p[3]); -} -#else -/* Unaligned loads are generally ok on x86/x64. */ -#define lj_getu16(p) (*(uint16_t *)(p)) -#define lj_getu32(p) (*(uint32_t *)(p)) -#endif - #else #error "missing defines for your compiler" #endif @@ -329,11 +199,7 @@ static LJ_AINLINE uint32_t lj_getu32(const void *v) #define LJ_DATADEF #define LJ_ASMF LJ_NOAPI #define LJ_FUNCA LJ_NOAPI -#if defined(ljamalg_c) -#define LJ_FUNC static -#else #define LJ_FUNC LJ_NOAPI -#endif #define LJ_FUNC_NORET LJ_FUNC LJ_NORET #define LJ_FUNCA_NORET LJ_FUNCA LJ_NORET #define LJ_ASMF_NORET LJ_ASMF LJ_NORET diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c index 5d6795f88e..a21230c6df 100644 --- a/src/lj_dispatch.c +++ b/src/lj_dispatch.c @@ -19,17 +19,10 @@ #include "lj_bc.h" #include "lj_ff.h" #include "lj_strfmt.h" -#if LJ_HASJIT #include "lj_jit.h" -#endif -#if LJ_HASFFI #include "lj_ccallback.h" -#endif #include "lj_trace.h" #include "lj_dispatch.h" -#if LJ_HASPROFILE -#include "lj_profile.h" -#endif #include "lj_vm.h" #include "luajit.h" @@ -38,23 +31,6 @@ LJ_STATIC_ASSERT(GG_NUM_ASMFF == FF_NUM_ASMFUNC); /* -- Dispatch table management ------------------------------------------- */ -#if LJ_TARGET_MIPS -#include -LJ_FUNCA_NORET void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L, - lua_State *co); -#if !LJ_HASJIT -#define lj_dispatch_stitch lj_dispatch_ins -#endif -#if !LJ_HASPROFILE -#define lj_dispatch_profile lj_dispatch_ins -#endif - -#define GOTFUNC(name) (ASMFunction)name, -static const ASMFunction dispatch_got[] = { - GOTDEF(GOTFUNC) -}; -#undef GOTFUNC -#endif /* Initialize instruction dispatch table and hot counters. */ void lj_dispatch_init(GG_State *GG) @@ -74,12 +50,8 @@ void lj_dispatch_init(GG_State *GG) GG->g.bc_cfunc_ext = GG->g.bc_cfunc_int = BCINS_AD(BC_FUNCC, LUA_MINSTACK, 0); for (i = 0; i < GG_NUM_ASMFF; i++) GG->bcff[i] = BCINS_AD(BC__MAX+i, 0, 0); -#if LJ_TARGET_MIPS - memcpy(GG->got, dispatch_got, LJ_GOT__MAX*sizeof(ASMFunction *)); -#endif } -#if LJ_HASJIT /* Initialize hotcount table. */ void lj_dispatch_init_hotcount(global_State *g) { @@ -90,7 +62,6 @@ void lj_dispatch_init_hotcount(global_State *g) for (i = 0; i < HOTCOUNT_SIZE; i++) hotcount[i] = start; } -#endif /* Internal dispatch mode bits. */ #define DISPMODE_CALL 0x01 /* Override call dispatch. */ @@ -105,14 +76,9 @@ void lj_dispatch_update(global_State *g) { uint8_t oldmode = g->dispatchmode; uint8_t mode = 0; -#if LJ_HASJIT mode |= (G2J(g)->flags & JIT_F_ON) ? DISPMODE_JIT : 0; mode |= G2J(g)->state != LJ_TRACE_IDLE ? (DISPMODE_REC|DISPMODE_INS|DISPMODE_CALL) : 0; -#endif -#if LJ_HASPROFILE - mode |= (g->hookmask & HOOK_PROFILE) ? (DISPMODE_PROF|DISPMODE_INS) : 0; -#endif mode |= (g->hookmask & (LUA_MASKLINE|LUA_MASKCOUNT)) ? DISPMODE_INS : 0; mode |= (g->hookmask & LUA_MASKCALL) ? DISPMODE_CALL : 0; mode |= (g->hookmask & LUA_MASKRET) ? DISPMODE_RET : 0; @@ -196,17 +162,14 @@ void lj_dispatch_update(global_State *g) disp[BC_FUNCV] = f_funcv; } -#if LJ_HASJIT /* Reset hotcounts for JIT off to on transition. */ if ((mode & DISPMODE_JIT) && !(oldmode & DISPMODE_JIT)) lj_dispatch_init_hotcount(g); -#endif } } /* -- JIT mode setting ---------------------------------------------------- */ -#if LJ_HASJIT /* Set JIT mode for a single prototype. */ static void setptmode(global_State *g, GCproto *pt, int mode) { @@ -233,7 +196,6 @@ static void setptmode_all(global_State *g, GCproto *pt, int mode) } } } -#endif /* Public API function: control the JIT engine. */ int luaJIT_setmode(lua_State *L, int idx, int mode) @@ -245,22 +207,16 @@ int luaJIT_setmode(lua_State *L, int idx, int mode) if ((g->hookmask & HOOK_GC)) lj_err_caller(L, LJ_ERR_NOGCMM); switch (mm) { -#if LJ_HASJIT case LUAJIT_MODE_ENGINE: if ((mode & LUAJIT_MODE_FLUSH)) { lj_trace_flushall(L); } else { if (!(mode & LUAJIT_MODE_ON)) G2J(g)->flags &= ~(uint32_t)JIT_F_ON; -#if LJ_TARGET_X86ORX64 else if ((G2J(g)->flags & JIT_F_SSE2)) G2J(g)->flags |= (uint32_t)JIT_F_ON; else return 0; /* Don't turn on JIT compiler without SSE2 support. */ -#else - else - G2J(g)->flags |= (uint32_t)JIT_F_ON; -#endif lj_dispatch_update(g); } break; @@ -287,16 +243,6 @@ int luaJIT_setmode(lua_State *L, int idx, int mode) return 0; /* Failed. */ lj_trace_flush(G2J(g), idx); break; -#else - case LUAJIT_MODE_ENGINE: - case LUAJIT_MODE_FUNC: - case LUAJIT_MODE_ALLFUNC: - case LUAJIT_MODE_ALLSUBFUNC: - UNUSED(idx); - if ((mode & LUAJIT_MODE_ON)) - return 0; /* Failed. */ - break; -#endif case LUAJIT_MODE_WRAPCFUNC: if ((mode & LUAJIT_MODE_ON)) { if (idx != 0) { @@ -368,19 +314,11 @@ static void callhook(lua_State *L, int event, BCLine line) /* Top frame, nextframe = NULL. */ ar.i_ci = (int)((L->base-1) - tvref(L->stack)); lj_state_checkstack(L, 1+LUA_MINSTACK); -#if LJ_HASPROFILE && !LJ_PROFILE_SIGPROF - lj_profile_hook_enter(g); -#else hook_enter(g); -#endif hookf(L, &ar); lua_assert(hook_active(g)); setgcref(g->cur_L, obj2gco(L)); -#if LJ_HASPROFILE && !LJ_PROFILE_SIGPROF - lj_profile_hook_leave(g); -#else hook_leave(g); -#endif } } @@ -413,7 +351,6 @@ void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc) setcframe_pc(cf, pc); slots = cur_topslot(pt, pc, cframe_multres_n(cf)); L->top = L->base + slots; /* Fix top. */ -#if LJ_HASJIT { jit_State *J = G2J(g); if (J->state != LJ_TRACE_IDLE) { @@ -425,7 +362,6 @@ void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc) lua_assert(L->top - L->base == delta); } } -#endif if ((g->hookmask & LUA_MASKCOUNT) && g->hookcount == 0) { g->hookcount = g->hookcstart; callhook(L, LUA_HOOKCOUNT, -1); @@ -470,11 +406,8 @@ ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns *pc) GCfunc *fn = curr_func(L); BCOp op; global_State *g = G(L); -#if LJ_HASJIT jit_State *J = G2J(g); -#endif int missing = call_init(L, fn); -#if LJ_HASJIT J->L = L; if ((uintptr_t)pc & 1) { /* Marker for hot call. */ #ifdef LUA_USE_ASSERT @@ -493,7 +426,6 @@ ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns *pc) lj_trace_ins(J, pc-1); /* The interpreter bytecode PC is offset by 1. */ lua_assert(L->top - L->base == delta); } -#endif if ((g->hookmask & LUA_MASKCALL)) { int i; for (i = 0; i < missing; i++) /* Add missing parameters. */ @@ -503,21 +435,16 @@ ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns *pc) while (missing-- > 0 && tvisnil(L->top - 1)) L->top--; } -#if LJ_HASJIT out: -#endif op = bc_op(pc[-1]); /* Get FUNC* op. */ -#if LJ_HASJIT /* Use the non-hotcounting variants if JIT is off or while recording. */ if ((!(J->flags & JIT_F_ON) || J->state != LJ_TRACE_IDLE) && (op == BC_FUNCF || op == BC_FUNCV)) op = (BCOp)((int)op+(int)BC_IFUNCF-(int)BC_FUNCF); -#endif ERRNO_RESTORE return makeasmfunc(lj_bc_ofs[op]); /* Return static dispatch target. */ } -#if LJ_HASJIT /* Stitch a new trace. */ void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc) { @@ -532,26 +459,5 @@ void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc) setcframe_pc(cf, oldpc); ERRNO_RESTORE } -#endif -#if LJ_HASPROFILE -/* Profile dispatch. */ -void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc) -{ - ERRNO_SAVE - GCfunc *fn = curr_func(L); - GCproto *pt = funcproto(fn); - void *cf = cframe_raw(L->cframe); - const BCIns *oldpc = cframe_pc(cf); - global_State *g; - setcframe_pc(cf, pc); - L->top = L->base + cur_topslot(pt, pc, cframe_multres_n(cf)); - lj_profile_interpreter(L); - setcframe_pc(cf, oldpc); - g = G(L); - setgcref(g->cur_L, obj2gco(L)); - setvmstate(g, INTERP); - ERRNO_RESTORE -} -#endif diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h index 5bda51a213..47ad6e97fb 100644 --- a/src/lj_dispatch.h +++ b/src/lj_dispatch.h @@ -8,63 +8,8 @@ #include "lj_obj.h" #include "lj_bc.h" -#if LJ_HASJIT #include "lj_jit.h" -#endif - -#if LJ_TARGET_MIPS -/* Need our own global offset table for the dreaded MIPS calling conventions. */ - -#ifndef _LJ_VM_H -LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b); -#endif - -#if LJ_SOFTFP -#ifndef _LJ_IRCALL_H -extern double __adddf3(double a, double b); -extern double __subdf3(double a, double b); -extern double __muldf3(double a, double b); -extern double __divdf3(double a, double b); -#endif -#define SFGOTDEF(_) _(sqrt) _(__adddf3) _(__subdf3) _(__muldf3) _(__divdf3) -#else -#define SFGOTDEF(_) -#endif -#if LJ_HASJIT -#define JITGOTDEF(_) _(lj_trace_exit) _(lj_trace_hot) -#else -#define JITGOTDEF(_) -#endif -#if LJ_HASFFI -#define FFIGOTDEF(_) \ - _(lj_meta_equal_cd) _(lj_ccallback_enter) _(lj_ccallback_leave) -#else -#define FFIGOTDEF(_) -#endif -#define GOTDEF(_) \ - _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \ - _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \ - _(pow) _(fmod) _(ldexp) _(lj_vm_modi) \ - _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_stitch) \ - _(lj_dispatch_profile) _(lj_err_throw) \ - _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \ - _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \ - _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \ - _(lj_meta_for) _(lj_meta_istype) _(lj_meta_len) _(lj_meta_tget) \ - _(lj_meta_tset) _(lj_state_growstack) _(lj_strfmt_number) \ - _(lj_str_new) _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) \ - _(lj_tab_new) _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \ - _(lj_tab_setinth) _(lj_buf_putstr_reverse) _(lj_buf_putstr_lower) \ - _(lj_buf_putstr_upper) _(lj_buf_tostr) \ - JITGOTDEF(_) FFIGOTDEF(_) SFGOTDEF(_) -enum { -#define GOTENUM(name) LJ_GOT_##name, -GOTDEF(GOTENUM) -#undef GOTENUM - LJ_GOT__MAX -}; -#endif /* Type of hot counter. Must match the code in the assembler VM. */ /* 16 bits are sufficient. Only 0.0015% overhead with maximum slot penalty. */ @@ -89,13 +34,8 @@ typedef uint16_t HotCount; typedef struct GG_State { lua_State L; /* Main thread. */ global_State g; /* Global state. */ -#if LJ_TARGET_MIPS - ASMFunction got[LJ_GOT__MAX]; /* Global offset table. */ -#endif -#if LJ_HASJIT jit_State J; /* JIT state. */ HotCount hotcount[HOTCOUNT_SIZE]; /* Hot counters. */ -#endif ASMFunction dispatch[GG_LEN_DISP]; /* Instruction dispatch tables. */ BCIns bcff[GG_NUM_ASMFF]; /* Bytecode for ASM fast functions. */ } GG_State; @@ -121,33 +61,19 @@ typedef struct GG_State { /* Dispatch table management. */ LJ_FUNC void lj_dispatch_init(GG_State *GG); -#if LJ_HASJIT LJ_FUNC void lj_dispatch_init_hotcount(global_State *g); -#endif LJ_FUNC void lj_dispatch_update(global_State *g); /* Instruction dispatch callback for hooks or when recording. */ LJ_FUNCA void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc); LJ_FUNCA ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns*pc); -#if LJ_HASJIT LJ_FUNCA void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc); -#endif -#if LJ_HASPROFILE -LJ_FUNCA void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc); -#endif #if LJ_HASFFI && !defined(_BUILDVM_H) /* Save/restore errno and GetLastError() around hooks, exits and recording. */ #include -#if LJ_TARGET_WINDOWS -#define WIN32_LEAN_AND_MEAN -#include -#define ERRNO_SAVE int olderr = errno; DWORD oldwerr = GetLastError(); -#define ERRNO_RESTORE errno = olderr; SetLastError(oldwerr); -#else #define ERRNO_SAVE int olderr = errno; #define ERRNO_RESTORE errno = olderr; -#endif #else #define ERRNO_SAVE #define ERRNO_RESTORE diff --git a/src/lj_emit_arm.h b/src/lj_emit_arm.h deleted file mode 100644 index dee8bdccd1..0000000000 --- a/src/lj_emit_arm.h +++ /dev/null @@ -1,357 +0,0 @@ -/* -** ARM instruction emitter. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -/* -- Constant encoding --------------------------------------------------- */ - -static uint8_t emit_invai[16] = { - /* AND */ (ARMI_AND^ARMI_BIC) >> 21, - /* EOR */ 0, - /* SUB */ (ARMI_SUB^ARMI_ADD) >> 21, - /* RSB */ 0, - /* ADD */ (ARMI_ADD^ARMI_SUB) >> 21, - /* ADC */ (ARMI_ADC^ARMI_SBC) >> 21, - /* SBC */ (ARMI_SBC^ARMI_ADC) >> 21, - /* RSC */ 0, - /* TST */ 0, - /* TEQ */ 0, - /* CMP */ (ARMI_CMP^ARMI_CMN) >> 21, - /* CMN */ (ARMI_CMN^ARMI_CMP) >> 21, - /* ORR */ 0, - /* MOV */ (ARMI_MOV^ARMI_MVN) >> 21, - /* BIC */ (ARMI_BIC^ARMI_AND) >> 21, - /* MVN */ (ARMI_MVN^ARMI_MOV) >> 21 -}; - -/* Encode constant in K12 format for data processing instructions. */ -static uint32_t emit_isk12(ARMIns ai, int32_t n) -{ - uint32_t invai, i, m = (uint32_t)n; - /* K12: unsigned 8 bit value, rotated in steps of two bits. */ - for (i = 0; i < 4096; i += 256, m = lj_rol(m, 2)) - if (m <= 255) return ARMI_K12|m|i; - /* Otherwise try negation/complement with the inverse instruction. */ - invai = emit_invai[((ai >> 21) & 15)]; - if (!invai) return 0; /* Failed. No inverse instruction. */ - m = ~(uint32_t)n; - if (invai == ((ARMI_SUB^ARMI_ADD) >> 21) || - invai == (ARMI_CMP^ARMI_CMN) >> 21) m++; - for (i = 0; i < 4096; i += 256, m = lj_rol(m, 2)) - if (m <= 255) return ARMI_K12|(invai<<21)|m|i; - return 0; /* Failed. */ -} - -/* -- Emit basic instructions --------------------------------------------- */ - -static void emit_dnm(ASMState *as, ARMIns ai, Reg rd, Reg rn, Reg rm) -{ - *--as->mcp = ai | ARMF_D(rd) | ARMF_N(rn) | ARMF_M(rm); -} - -static void emit_dm(ASMState *as, ARMIns ai, Reg rd, Reg rm) -{ - *--as->mcp = ai | ARMF_D(rd) | ARMF_M(rm); -} - -static void emit_dn(ASMState *as, ARMIns ai, Reg rd, Reg rn) -{ - *--as->mcp = ai | ARMF_D(rd) | ARMF_N(rn); -} - -static void emit_nm(ASMState *as, ARMIns ai, Reg rn, Reg rm) -{ - *--as->mcp = ai | ARMF_N(rn) | ARMF_M(rm); -} - -static void emit_d(ASMState *as, ARMIns ai, Reg rd) -{ - *--as->mcp = ai | ARMF_D(rd); -} - -static void emit_n(ASMState *as, ARMIns ai, Reg rn) -{ - *--as->mcp = ai | ARMF_N(rn); -} - -static void emit_m(ASMState *as, ARMIns ai, Reg rm) -{ - *--as->mcp = ai | ARMF_M(rm); -} - -static void emit_lsox(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) -{ - lua_assert(ofs >= -255 && ofs <= 255); - if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U; - *--as->mcp = ai | ARMI_LS_P | ARMI_LSX_I | ARMF_D(rd) | ARMF_N(rn) | - ((ofs & 0xf0) << 4) | (ofs & 0x0f); -} - -static void emit_lso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) -{ - lua_assert(ofs >= -4095 && ofs <= 4095); - /* Combine LDR/STR pairs to LDRD/STRD. */ - if (*as->mcp == (ai|ARMI_LS_P|ARMI_LS_U|ARMF_D(rd^1)|ARMF_N(rn)|(ofs^4)) && - (ai & ~(ARMI_LDR^ARMI_STR)) == ARMI_STR && rd != rn && - (uint32_t)ofs <= 252 && !(ofs & 3) && !((rd ^ (ofs >>2)) & 1) && - as->mcp != as->mcloop) { - as->mcp++; - emit_lsox(as, ai == ARMI_LDR ? ARMI_LDRD : ARMI_STRD, rd&~1, rn, ofs&~4); - return; - } - if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U; - *--as->mcp = ai | ARMI_LS_P | ARMF_D(rd) | ARMF_N(rn) | ofs; -} - -#if !LJ_SOFTFP -static void emit_vlso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) -{ - lua_assert(ofs >= -1020 && ofs <= 1020 && (ofs&3) == 0); - if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U; - *--as->mcp = ai | ARMI_LS_P | ARMF_D(rd & 15) | ARMF_N(rn) | (ofs >> 2); -} -#endif - -/* -- Emit loads/stores --------------------------------------------------- */ - -/* Prefer spills of BASE/L. */ -#define emit_canremat(ref) ((ref) < ASMREF_L) - -/* Try to find a one step delta relative to another constant. */ -static int emit_kdelta1(ASMState *as, Reg d, int32_t i) -{ - RegSet work = ~as->freeset & RSET_GPR; - while (work) { - Reg r = rset_picktop(work); - IRRef ref = regcost_ref(as->cost[r]); - lua_assert(r != d); - if (emit_canremat(ref)) { - int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i); - uint32_t k = emit_isk12(ARMI_ADD, delta); - if (k) { - if (k == ARMI_K12) - emit_dm(as, ARMI_MOV, d, r); - else - emit_dn(as, ARMI_ADD^k, d, r); - return 1; - } - } - rset_clear(work, r); - } - return 0; /* Failed. */ -} - -/* Try to find a two step delta relative to another constant. */ -static int emit_kdelta2(ASMState *as, Reg d, int32_t i) -{ - RegSet work = ~as->freeset & RSET_GPR; - while (work) { - Reg r = rset_picktop(work); - IRRef ref = regcost_ref(as->cost[r]); - lua_assert(r != d); - if (emit_canremat(ref)) { - int32_t other = ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i; - if (other) { - int32_t delta = i - other; - uint32_t sh, inv = 0, k2, k; - if (delta < 0) { delta = -delta; inv = ARMI_ADD^ARMI_SUB; } - sh = lj_ffs(delta) & ~1; - k2 = emit_isk12(0, delta & (255 << sh)); - k = emit_isk12(0, delta & ~(255 << sh)); - if (k) { - emit_dn(as, ARMI_ADD^k2^inv, d, d); - emit_dn(as, ARMI_ADD^k^inv, d, r); - return 1; - } - } - } - rset_clear(work, r); - } - return 0; /* Failed. */ -} - -/* Load a 32 bit constant into a GPR. */ -static void emit_loadi(ASMState *as, Reg r, int32_t i) -{ - uint32_t k = emit_isk12(ARMI_MOV, i); - lua_assert(rset_test(as->freeset, r) || r == RID_TMP); - if (k) { - /* Standard K12 constant. */ - emit_d(as, ARMI_MOV^k, r); - } else if ((as->flags & JIT_F_ARMV6T2) && (uint32_t)i < 0x00010000u) { - /* 16 bit loword constant for ARMv6T2. */ - emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), r); - } else if (emit_kdelta1(as, r, i)) { - /* One step delta relative to another constant. */ - } else if ((as->flags & JIT_F_ARMV6T2)) { - /* 32 bit hiword/loword constant for ARMv6T2. */ - emit_d(as, ARMI_MOVT|((i>>16) & 0x0fff)|(((i>>16) & 0xf000)<<4), r); - emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), r); - } else if (emit_kdelta2(as, r, i)) { - /* Two step delta relative to another constant. */ - } else { - /* Otherwise construct the constant with up to 4 instructions. */ - /* NYI: use mvn+bic, use pc-relative loads. */ - for (;;) { - uint32_t sh = lj_ffs(i) & ~1; - int32_t m = i & (255 << sh); - i &= ~(255 << sh); - if (i == 0) { - emit_d(as, ARMI_MOV ^ emit_isk12(0, m), r); - break; - } - emit_dn(as, ARMI_ORR ^ emit_isk12(0, m), r, r); - } - } -} - -#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) - -static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); - -/* Get/set from constant pointer. */ -static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p) -{ - int32_t i = i32ptr(p); - emit_lso(as, ai, r, ra_allock(as, (i & ~4095), rset_exclude(RSET_GPR, r)), - (i & 4095)); -} - -#if !LJ_SOFTFP -/* Load a number constant into an FPR. */ -static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) -{ - cTValue *tv = ir_knum(ir); - int32_t i; - if ((as->flags & JIT_F_VFPV3) && !tv->u32.lo) { - uint32_t hi = tv->u32.hi; - uint32_t b = ((hi >> 22) & 0x1ff); - if (!(hi & 0xffff) && (b == 0x100 || b == 0x0ff)) { - *--as->mcp = ARMI_VMOVI_D | ARMF_D(r & 15) | - ((tv->u32.hi >> 12) & 0x00080000) | - ((tv->u32.hi >> 4) & 0x00070000) | - ((tv->u32.hi >> 16) & 0x0000000f); - return; - } - } - i = i32ptr(tv); - emit_vlso(as, ARMI_VLDR_D, r, - ra_allock(as, (i & ~1020), RSET_GPR), (i & 1020)); -} -#endif - -/* Get/set global_State fields. */ -#define emit_getgl(as, r, field) \ - emit_lsptr(as, ARMI_LDR, (r), (void *)&J2G(as->J)->field) -#define emit_setgl(as, r, field) \ - emit_lsptr(as, ARMI_STR, (r), (void *)&J2G(as->J)->field) - -/* Trace number is determined from pc of exit instruction. */ -#define emit_setvmstate(as, i) UNUSED(i) - -/* -- Emit control-flow instructions -------------------------------------- */ - -/* Label for internal jumps. */ -typedef MCode *MCLabel; - -/* Return label pointing to current PC. */ -#define emit_label(as) ((as)->mcp) - -static void emit_branch(ASMState *as, ARMIns ai, MCode *target) -{ - MCode *p = as->mcp; - ptrdiff_t delta = (target - p) - 1; - lua_assert(((delta + 0x00800000) >> 24) == 0); - *--p = ai | ((uint32_t)delta & 0x00ffffffu); - as->mcp = p; -} - -#define emit_jmp(as, target) emit_branch(as, ARMI_B, (target)) - -static void emit_call(ASMState *as, void *target) -{ - MCode *p = --as->mcp; - ptrdiff_t delta = ((char *)target - (char *)p) - 8; - if ((((delta>>2) + 0x00800000) >> 24) == 0) { - if ((delta & 1)) - *p = ARMI_BLX | ((uint32_t)(delta>>2) & 0x00ffffffu) | ((delta&2) << 23); - else - *p = ARMI_BL | ((uint32_t)(delta>>2) & 0x00ffffffu); - } else { /* Target out of range: need indirect call. But don't use R0-R3. */ - Reg r = ra_allock(as, i32ptr(target), RSET_RANGE(RID_R4, RID_R12+1)); - *p = ARMI_BLXr | ARMF_M(r); - } -} - -/* -- Emit generic operations --------------------------------------------- */ - -/* Generic move between two regs. */ -static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) -{ -#if LJ_SOFTFP - lua_assert(!irt_isnum(ir->t)); UNUSED(ir); -#else - if (dst >= RID_MAX_GPR) { - emit_dm(as, irt_isnum(ir->t) ? ARMI_VMOV_D : ARMI_VMOV_S, - (dst & 15), (src & 15)); - return; - } -#endif - if (as->mcp != as->mcloop) { /* Swap early registers for loads/stores. */ - MCode ins = *as->mcp, swp = (src^dst); - if ((ins & 0x0c000000) == 0x04000000 && (ins & 0x02000010) != 0x02000010) { - if (!((ins ^ (dst << 16)) & 0x000f0000)) - *as->mcp = ins ^ (swp << 16); /* Swap N in load/store. */ - if (!(ins & 0x00100000) && !((ins ^ (dst << 12)) & 0x0000f000)) - *as->mcp = ins ^ (swp << 12); /* Swap D in store. */ - } - } - emit_dm(as, ARMI_MOV, dst, src); -} - -/* Generic load of register with base and (small) offset address. */ -static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) -{ -#if LJ_SOFTFP - lua_assert(!irt_isnum(ir->t)); UNUSED(ir); -#else - if (r >= RID_MAX_GPR) - emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, base, ofs); - else -#endif - emit_lso(as, ARMI_LDR, r, base, ofs); -} - -/* Generic store of register with base and (small) offset address. */ -static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) -{ -#if LJ_SOFTFP - lua_assert(!irt_isnum(ir->t)); UNUSED(ir); -#else - if (r >= RID_MAX_GPR) - emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, base, ofs); - else -#endif - emit_lso(as, ARMI_STR, r, base, ofs); -} - -/* Emit an arithmetic/logic operation with a constant operand. */ -static void emit_opk(ASMState *as, ARMIns ai, Reg dest, Reg src, - int32_t i, RegSet allow) -{ - uint32_t k = emit_isk12(ai, i); - if (k) - emit_dn(as, ai^k, dest, src); - else - emit_dnm(as, ai, dest, src, ra_allock(as, i, allow)); -} - -/* Add offset to pointer. */ -static void emit_addptr(ASMState *as, Reg r, int32_t ofs) -{ - if (ofs) - emit_opk(as, ARMI_ADD, r, r, ofs, rset_exclude(RSET_GPR, r)); -} - -#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs)) - diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h deleted file mode 100644 index cfa18c83c2..0000000000 --- a/src/lj_emit_arm64.h +++ /dev/null @@ -1,419 +0,0 @@ -/* -** ARM64 instruction emitter. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -** -** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. -** Sponsored by Cisco Systems, Inc. -*/ - -/* -- Constant encoding --------------------------------------------------- */ - -static uint64_t get_k64val(IRIns *ir) -{ - if (ir->o == IR_KINT64) { - return ir_kint64(ir)->u64; - } else if (ir->o == IR_KGC) { - return (uint64_t)ir_kgc(ir); - } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { - return (uint64_t)ir_kptr(ir); - } else { - lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL); - return ir->i; /* Sign-extended. */ - } -} - -/* Encode constant in K12 format for data processing instructions. */ -static uint32_t emit_isk12(int64_t n) -{ - uint64_t k = (n < 0) ? -n : n; - uint32_t m = (n < 0) ? 0x40000000 : 0; - if (k < 0x1000) { - return A64I_K12|m|A64F_U12(k); - } else if ((k & 0xfff000) == k) { - return A64I_K12|m|0x400000|A64F_U12(k>>12); - } - return 0; -} - -#define emit_clz64(n) __builtin_clzll(n) -#define emit_ctz64(n) __builtin_ctzll(n) - -/* Encode constant in K13 format for logical data processing instructions. */ -static uint32_t emit_isk13(uint64_t n, int is64) -{ - int inv = 0, w = 128, lz, tz; - if (n & 1) { n = ~n; w = 64; inv = 1; } /* Avoid wrap-around of ones. */ - if (!n) return 0; /* Neither all-zero nor all-ones are allowed. */ - do { /* Find the repeat width. */ - if (is64 && (uint32_t)(n^(n>>32))) break; - n = (uint32_t)n; - if (!n) return 0; /* Ditto when passing n=0xffffffff and is64=0. */ - w = 32; if ((n^(n>>16)) & 0xffff) break; - n = n & 0xffff; w = 16; if ((n^(n>>8)) & 0xff) break; - n = n & 0xff; w = 8; if ((n^(n>>4)) & 0xf) break; - n = n & 0xf; w = 4; if ((n^(n>>2)) & 0x3) break; - n = n & 0x3; w = 2; - } while (0); - lz = emit_clz64(n); - tz = emit_ctz64(n); - if ((int64_t)(n << lz) >> (lz+tz) != -1ll) return 0; /* Non-contiguous? */ - if (inv) - return A64I_K13 | (((lz-w) & 127) << 16) | (((lz+tz-w-1) & 63) << 10); - else - return A64I_K13 | ((w-tz) << 16) | (((63-lz-tz-w-w) & 63) << 10); -} - -static uint32_t emit_isfpk64(uint64_t n) -{ - uint64_t etop9 = ((n >> 54) & 0x1ff); - if ((n << 16) == 0 && (etop9 == 0x100 || etop9 == 0x0ff)) { - return (uint32_t)(((n >> 48) & 0x7f) | ((n >> 56) & 0x80)); - } - return ~0u; -} - -/* -- Emit basic instructions --------------------------------------------- */ - -static void emit_dnma(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm, Reg ra) -{ - *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm) | A64F_A(ra); -} - -static void emit_dnm(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm) -{ - *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm); -} - -static void emit_dm(ASMState *as, A64Ins ai, Reg rd, Reg rm) -{ - *--as->mcp = ai | A64F_D(rd) | A64F_M(rm); -} - -static void emit_dn(ASMState *as, A64Ins ai, Reg rd, Reg rn) -{ - *--as->mcp = ai | A64F_D(rd) | A64F_N(rn); -} - -static void emit_nm(ASMState *as, A64Ins ai, Reg rn, Reg rm) -{ - *--as->mcp = ai | A64F_N(rn) | A64F_M(rm); -} - -static void emit_d(ASMState *as, A64Ins ai, Reg rd) -{ - *--as->mcp = ai | A64F_D(rd); -} - -static void emit_n(ASMState *as, A64Ins ai, Reg rn) -{ - *--as->mcp = ai | A64F_N(rn); -} - -static int emit_checkofs(A64Ins ai, int64_t ofs) -{ - int scale = (ai >> 30) & 3; - if (ofs < 0 || (ofs & ((1<= -256 && ofs <= 255) ? -1 : 0; - } else { - return (ofs < (4096<> 30) & 3; - lua_assert(ot); - /* Combine LDR/STR pairs to LDP/STP. */ - if ((sc == 2 || sc == 3) && - (!(ai & 0x400000) || rd != rn) && - as->mcp != as->mcloop) { - uint32_t prev = *as->mcp & ~A64F_D(31); - int ofsm = ofs - (1<>sc)) || - prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsm&0x1ff))) { - aip = (A64F_A(rd) | A64F_D(*as->mcp & 31)); - } else if (prev == (ai | A64F_N(rn) | A64F_U12(ofsp>>sc)) || - prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsp&0x1ff))) { - aip = (A64F_D(rd) | A64F_A(*as->mcp & 31)); - ofsm = ofs; - } else { - goto nopair; - } - if (ofsm >= (-64<mcp = aip | A64F_N(rn) | ((ofsm >> sc) << 15) | - (ai ^ ((ai == A64I_LDRx || ai == A64I_STRx) ? 0x50000000 : 0x90000000)); - return; - } - } -nopair: - if (ot == 1) - *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_U12(ofs >> sc); - else - *--as->mcp = (ai^A64I_LS_U) | A64F_D(rd) | A64F_N(rn) | A64F_S9(ofs & 0x1ff); -} - -/* -- Emit loads/stores --------------------------------------------------- */ - -/* Prefer rematerialization of BASE/L from global_State over spills. */ -#define emit_canremat(ref) ((ref) <= ASMREF_L) - -/* Try to find an N-step delta relative to other consts with N < lim. */ -static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim) -{ - RegSet work = ~as->freeset & RSET_GPR; - if (lim <= 1) return 0; /* Can't beat that. */ - while (work) { - Reg r = rset_picktop(work); - IRRef ref = regcost_ref(as->cost[r]); - lua_assert(r != rd); - if (ref < REF_TRUE) { - uint64_t kx = ra_iskref(ref) ? (uint64_t)ra_krefk(as, ref) : - get_k64val(IR(ref)); - int64_t delta = (int64_t)(k - kx); - if (delta == 0) { - emit_dm(as, A64I_MOVx, rd, r); - return 1; - } else { - uint32_t k12 = emit_isk12(delta < 0 ? -delta : delta); - if (k12) { - emit_dn(as, (delta < 0 ? A64I_SUBx : A64I_ADDx)^k12, rd, r); - return 1; - } - /* Do other ops or multi-step deltas pay off? Probably not. - ** E.g. XOR rarely helps with pointer consts. - */ - } - } - rset_clear(work, r); - } - return 0; /* Failed. */ -} - -static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64) -{ - uint32_t k13 = emit_isk13(u64, is64); - if (k13) { /* Can the constant be represented as a bitmask immediate? */ - emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO); - } else { - int i, zeros = 0, ones = 0, neg; - if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */ - /* Count homogeneous 16 bit fragments. */ - for (i = 0; i < 4; i++) { - uint64_t frag = (u64 >> i*16) & 0xffff; - zeros += (frag == 0); - ones += (frag == 0xffff); - } - neg = ones > zeros; /* Use MOVN if it pays off. */ - if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) { - int shift = 0, lshift = 0; - uint64_t n64 = neg ? ~u64 : u64; - if (n64 != 0) { - /* Find first/last fragment to be filled. */ - shift = (63-emit_clz64(n64)) & ~15; - lshift = emit_ctz64(n64) & ~15; - } - /* MOVK requires the original value (u64). */ - while (shift > lshift) { - uint32_t u16 = (u64 >> shift) & 0xffff; - /* Skip fragments that are correctly filled by MOVN/MOVZ. */ - if (u16 != (neg ? 0xffff : 0)) - emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd); - shift -= 16; - } - /* But MOVN needs an inverted value (n64). */ - emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) | - A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd); - } - } -} - -/* Load a 32 bit constant into a GPR. */ -#define emit_loadi(as, rd, i) emit_loadk(as, rd, i, 0) - -/* Load a 64 bit constant into a GPR. */ -#define emit_loadu64(as, rd, i) emit_loadk(as, rd, i, A64I_X) - -#define emit_loada(as, r, addr) emit_loadu64(as, (r), (uintptr_t)(addr)) - -#define glofs(as, k) \ - ((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g)) -#define mcpofs(as, k) \ - ((intptr_t)((uintptr_t)(k) - (uintptr_t)(as->mcp - 1))) -#define checkmcpofs(as, k) \ - ((((mcpofs(as, k)>>2) + 0x00040000) >> 19) == 0) - -static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); - -/* Get/set from constant pointer. */ -static void emit_lsptr(ASMState *as, A64Ins ai, Reg r, void *p) -{ - /* First, check if ip + offset is in range. */ - if ((ai & 0x00400000) && checkmcpofs(as, p)) { - emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, p)>>2), r); - } else { - Reg base = RID_GL; /* Next, try GL + offset. */ - int64_t ofs = glofs(as, p); - if (!emit_checkofs(ai, ofs)) { /* Else split up into base reg + offset. */ - int64_t i64 = i64ptr(p); - base = ra_allock(as, (i64 & ~0x7fffull), rset_exclude(RSET_GPR, r)); - ofs = i64 & 0x7fffull; - } - emit_lso(as, ai, r, base, ofs); - } -} - -/* Load 64 bit IR constant into register. */ -static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) -{ - const uint64_t *k = &ir_k64(ir)->u64; - int64_t ofs; - if (r >= RID_MAX_GPR) { - uint32_t fpk = emit_isfpk64(*k); - if (fpk != ~0u) { - emit_d(as, A64I_FMOV_DI | A64F_FP8(fpk), (r & 31)); - return; - } - } - ofs = glofs(as, k); - if (emit_checkofs(A64I_LDRx, ofs)) { - emit_lso(as, r >= RID_MAX_GPR ? A64I_LDRd : A64I_LDRx, - (r & 31), RID_GL, ofs); - } else { - if (r >= RID_MAX_GPR) { - emit_dn(as, A64I_FMOV_D_R, (r & 31), RID_TMP); - r = RID_TMP; - } - if (checkmcpofs(as, k)) - emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, k)>>2), r); - else - emit_loadu64(as, r, *k); - } -} - -/* Get/set global_State fields. */ -#define emit_getgl(as, r, field) \ - emit_lsptr(as, A64I_LDRx, (r), (void *)&J2G(as->J)->field) -#define emit_setgl(as, r, field) \ - emit_lsptr(as, A64I_STRx, (r), (void *)&J2G(as->J)->field) - -/* Trace number is determined from pc of exit instruction. */ -#define emit_setvmstate(as, i) UNUSED(i) - -/* -- Emit control-flow instructions -------------------------------------- */ - -/* Label for internal jumps. */ -typedef MCode *MCLabel; - -/* Return label pointing to current PC. */ -#define emit_label(as) ((as)->mcp) - -static void emit_cond_branch(ASMState *as, A64CC cond, MCode *target) -{ - MCode *p = --as->mcp; - ptrdiff_t delta = target - p; - lua_assert(((delta + 0x40000) >> 19) == 0); - *p = A64I_BCC | A64F_S19(delta) | cond; -} - -static void emit_branch(ASMState *as, A64Ins ai, MCode *target) -{ - MCode *p = --as->mcp; - ptrdiff_t delta = target - p; - lua_assert(((delta + 0x02000000) >> 26) == 0); - *p = ai | ((uint32_t)delta & 0x03ffffffu); -} - -static void emit_tnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit, MCode *target) -{ - MCode *p = --as->mcp; - ptrdiff_t delta = target - p; - lua_assert(bit < 63 && ((delta + 0x2000) >> 14) == 0); - if (bit > 31) ai |= A64I_X; - *p = ai | A64F_BIT(bit & 31) | A64F_S14((uint32_t)delta & 0x3fffu) | r; -} - -static void emit_cnb(ASMState *as, A64Ins ai, Reg r, MCode *target) -{ - MCode *p = --as->mcp; - ptrdiff_t delta = target - p; - lua_assert(((delta + 0x40000) >> 19) == 0); - *p = ai | A64F_S19(delta) | r; -} - -#define emit_jmp(as, target) emit_branch(as, A64I_B, (target)) - -static void emit_call(ASMState *as, void *target) -{ - MCode *p = --as->mcp; - ptrdiff_t delta = (char *)target - (char *)p; - if ((((delta>>2) + 0x02000000) >> 26) == 0) { - *p = A64I_BL | ((uint32_t)(delta>>2) & 0x03ffffffu); - } else { /* Target out of range: need indirect call. But don't use R0-R7. */ - Reg r = ra_allock(as, i64ptr(target), - RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED); - *p = A64I_BLR | A64F_N(r); - } -} - -/* -- Emit generic operations --------------------------------------------- */ - -/* Generic move between two regs. */ -static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) -{ - if (dst >= RID_MAX_GPR) { - emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D : A64I_FMOV_S, - (dst & 31), (src & 31)); - return; - } - if (as->mcp != as->mcloop) { /* Swap early registers for loads/stores. */ - MCode ins = *as->mcp, swp = (src^dst); - if ((ins & 0xbf800000) == 0xb9000000) { - if (!((ins ^ (dst << 5)) & 0x000003e0)) - *as->mcp = ins ^ (swp << 5); /* Swap N in load/store. */ - if (!(ins & 0x00400000) && !((ins ^ dst) & 0x0000001f)) - *as->mcp = ins ^ swp; /* Swap D in store. */ - } - } - emit_dm(as, A64I_MOVx, dst, src); -} - -/* Generic load of register with base and (small) offset address. */ -static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) -{ - if (r >= RID_MAX_GPR) - emit_lso(as, irt_isnum(ir->t) ? A64I_LDRd : A64I_LDRs, (r & 31), base, ofs); - else - emit_lso(as, irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw, r, base, ofs); -} - -/* Generic store of register with base and (small) offset address. */ -static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) -{ - if (r >= RID_MAX_GPR) - emit_lso(as, irt_isnum(ir->t) ? A64I_STRd : A64I_STRs, (r & 31), base, ofs); - else - emit_lso(as, irt_is64(ir->t) ? A64I_STRx : A64I_STRw, r, base, ofs); -} - -/* Emit an arithmetic operation with a constant operand. */ -static void emit_opk(ASMState *as, A64Ins ai, Reg dest, Reg src, - int32_t i, RegSet allow) -{ - uint32_t k = emit_isk12(i); - if (k) - emit_dn(as, ai^k, dest, src); - else - emit_dnm(as, ai, dest, src, ra_allock(as, i, allow)); -} - -/* Add offset to pointer. */ -static void emit_addptr(ASMState *as, Reg r, int32_t ofs) -{ - if (ofs) - emit_opk(as, ofs < 0 ? A64I_SUBx : A64I_ADDx, r, r, - ofs < 0 ? -ofs : ofs, rset_exclude(RSET_GPR, r)); -} - -#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs)) - diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h deleted file mode 100644 index 8a9ee24dce..0000000000 --- a/src/lj_emit_mips.h +++ /dev/null @@ -1,293 +0,0 @@ -/* -** MIPS instruction emitter. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -#if LJ_64 -static intptr_t get_k64val(IRIns *ir) -{ - if (ir->o == IR_KINT64) { - return (intptr_t)ir_kint64(ir)->u64; - } else if (ir->o == IR_KGC) { - return (intptr_t)ir_kgc(ir); - } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { - return (intptr_t)ir_kptr(ir); - } else { - lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL); - return ir->i; /* Sign-extended. */ - } -} -#endif - -#if LJ_64 -#define get_kval(ir) get_k64val(ir) -#else -#define get_kval(ir) ((ir)->i) -#endif - -/* -- Emit basic instructions --------------------------------------------- */ - -static void emit_dst(ASMState *as, MIPSIns mi, Reg rd, Reg rs, Reg rt) -{ - *--as->mcp = mi | MIPSF_D(rd) | MIPSF_S(rs) | MIPSF_T(rt); -} - -static void emit_dta(ASMState *as, MIPSIns mi, Reg rd, Reg rt, uint32_t a) -{ - *--as->mcp = mi | MIPSF_D(rd) | MIPSF_T(rt) | MIPSF_A(a); -} - -#define emit_ds(as, mi, rd, rs) emit_dst(as, (mi), (rd), (rs), 0) -#define emit_tg(as, mi, rt, rg) emit_dst(as, (mi), (rg)&31, 0, (rt)) - -static void emit_tsi(ASMState *as, MIPSIns mi, Reg rt, Reg rs, int32_t i) -{ - *--as->mcp = mi | MIPSF_T(rt) | MIPSF_S(rs) | (i & 0xffff); -} - -#define emit_ti(as, mi, rt, i) emit_tsi(as, (mi), (rt), 0, (i)) -#define emit_hsi(as, mi, rh, rs, i) emit_tsi(as, (mi), (rh) & 31, (rs), (i)) - -static void emit_fgh(ASMState *as, MIPSIns mi, Reg rf, Reg rg, Reg rh) -{ - *--as->mcp = mi | MIPSF_F(rf&31) | MIPSF_G(rg&31) | MIPSF_H(rh&31); -} - -#define emit_fg(as, mi, rf, rg) emit_fgh(as, (mi), (rf), (rg), 0) - -static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift) -{ - if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) { - emit_dta(as, MIPSI_ROTR, dest, src, shift); - } else { - emit_dst(as, MIPSI_OR, dest, dest, tmp); - emit_dta(as, MIPSI_SLL, dest, src, (-shift)&31); - emit_dta(as, MIPSI_SRL, tmp, src, shift); - } -} - -#if LJ_64 -static void emit_tsml(ASMState *as, MIPSIns mi, Reg rt, Reg rs, uint32_t msb, - uint32_t lsb) -{ - *--as->mcp = mi | MIPSF_T(rt) | MIPSF_S(rs) | MIPSF_M(msb) | MIPSF_L(lsb); -} -#endif - -/* -- Emit loads/stores --------------------------------------------------- */ - -/* Prefer rematerialization of BASE/L from global_State over spills. */ -#define emit_canremat(ref) ((ref) <= REF_BASE) - -/* Try to find a one step delta relative to another constant. */ -static int emit_kdelta1(ASMState *as, Reg t, intptr_t i) -{ - RegSet work = ~as->freeset & RSET_GPR; - while (work) { - Reg r = rset_picktop(work); - IRRef ref = regcost_ref(as->cost[r]); - lua_assert(r != t); - if (ref < ASMREF_L) { - intptr_t delta = (intptr_t)((uintptr_t)i - - (uintptr_t)(ra_iskref(ref) ? ra_krefk(as, ref) : get_kval(IR(ref)))); - if (checki16(delta)) { - emit_tsi(as, MIPSI_AADDIU, t, r, delta); - return 1; - } - } - rset_clear(work, r); - } - return 0; /* Failed. */ -} - -/* Load a 32 bit constant into a GPR. */ -static void emit_loadi(ASMState *as, Reg r, int32_t i) -{ - if (checki16(i)) { - emit_ti(as, MIPSI_LI, r, i); - } else { - if ((i & 0xffff)) { - intptr_t jgl = (intptr_t)(void *)J2G(as->J); - if ((uintptr_t)(i-jgl) < 65536) { - emit_tsi(as, MIPSI_ADDIU, r, RID_JGL, i-jgl-32768); - return; - } else if (emit_kdelta1(as, r, i)) { - return; - } else if ((i >> 16) == 0) { - emit_tsi(as, MIPSI_ORI, r, RID_ZERO, i); - return; - } - emit_tsi(as, MIPSI_ORI, r, r, i); - } - emit_ti(as, MIPSI_LUI, r, (i >> 16)); - } -} - -#if LJ_64 -/* Load a 64 bit constant into a GPR. */ -static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) -{ - if (checki32((int64_t)u64)) { - emit_loadi(as, r, (int32_t)u64); - } else { - uint64_t delta = u64 - (uint64_t)(void *)J2G(as->J); - if (delta < 65536) { - emit_tsi(as, MIPSI_DADDIU, r, RID_JGL, (int32_t)(delta-32768)); - } else if (emit_kdelta1(as, r, (intptr_t)u64)) { - return; - } else { - if ((u64 & 0xffff)) { - emit_tsi(as, MIPSI_ORI, r, r, u64 & 0xffff); - } - if (((u64 >> 16) & 0xffff)) { - emit_dta(as, MIPSI_DSLL, r, r, 16); - emit_tsi(as, MIPSI_ORI, r, r, (u64 >> 16) & 0xffff); - emit_dta(as, MIPSI_DSLL, r, r, 16); - } else { - emit_dta(as, MIPSI_DSLL32, r, r, 0); - } - emit_loadi(as, r, (int32_t)(u64 >> 32)); - } - /* TODO: There are probably more optimization opportunities. */ - } -} - -#define emit_loada(as, r, addr) emit_loadu64(as, (r), u64ptr((addr))) -#else -#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) -#endif - -static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); -static void ra_allockreg(ASMState *as, intptr_t k, Reg r); - -/* Get/set from constant pointer. */ -static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow) -{ - intptr_t jgl = (intptr_t)(J2G(as->J)); - intptr_t i = (intptr_t)(p); - Reg base; - if ((uint32_t)(i-jgl) < 65536) { - i = i-jgl-32768; - base = RID_JGL; - } else { - base = ra_allock(as, i-(int16_t)i, allow); - } - emit_tsi(as, mi, r, base, i); -} - -#if LJ_64 -static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) -{ - const uint64_t *k = &ir_k64(ir)->u64; - Reg r64 = r; - if (rset_test(RSET_FPR, r)) { - r64 = RID_TMP; - emit_tg(as, MIPSI_DMTC1, r64, r); - } - if ((uint32_t)((intptr_t)k-(intptr_t)J2G(as->J)) < 65536) - emit_lsptr(as, MIPSI_LD, r64, (void *)k, 0); - else - emit_loadu64(as, r64, *k); -} -#else -#define emit_loadk64(as, r, ir) \ - emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR) -#endif - -/* Get/set global_State fields. */ -static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs) -{ - emit_tsi(as, mi, r, RID_JGL, ofs-32768); -} - -#define emit_getgl(as, r, field) \ - emit_lsglptr(as, MIPSI_AL, (r), (int32_t)offsetof(global_State, field)) -#define emit_setgl(as, r, field) \ - emit_lsglptr(as, MIPSI_AS, (r), (int32_t)offsetof(global_State, field)) - -/* Trace number is determined from per-trace exit stubs. */ -#define emit_setvmstate(as, i) UNUSED(i) - -/* -- Emit control-flow instructions -------------------------------------- */ - -/* Label for internal jumps. */ -typedef MCode *MCLabel; - -/* Return label pointing to current PC. */ -#define emit_label(as) ((as)->mcp) - -static void emit_branch(ASMState *as, MIPSIns mi, Reg rs, Reg rt, MCode *target) -{ - MCode *p = as->mcp; - ptrdiff_t delta = target - p; - lua_assert(((delta + 0x8000) >> 16) == 0); - *--p = mi | MIPSF_S(rs) | MIPSF_T(rt) | ((uint32_t)delta & 0xffffu); - as->mcp = p; -} - -static void emit_jmp(ASMState *as, MCode *target) -{ - *--as->mcp = MIPSI_NOP; - emit_branch(as, MIPSI_B, RID_ZERO, RID_ZERO, (target)); -} - -static void emit_call(ASMState *as, void *target, int needcfa) -{ - MCode *p = as->mcp; - *--p = MIPSI_NOP; - if ((((uintptr_t)target ^ (uintptr_t)p) >> 28) == 0) { - *--p = (((uintptr_t)target & 1) ? MIPSI_JALX : MIPSI_JAL) | - (((uintptr_t)target >>2) & 0x03ffffffu); - } else { /* Target out of range: need indirect call. */ - *--p = MIPSI_JALR | MIPSF_S(RID_CFUNCADDR); - needcfa = 1; - } - as->mcp = p; - if (needcfa) ra_allockreg(as, (intptr_t)target, RID_CFUNCADDR); -} - -/* -- Emit generic operations --------------------------------------------- */ - -#define emit_move(as, dst, src) \ - emit_ds(as, MIPSI_MOVE, (dst), (src)) - -/* Generic move between two regs. */ -static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) -{ - if (dst < RID_MAX_GPR) - emit_move(as, dst, src); - else - emit_fg(as, irt_isnum(ir->t) ? MIPSI_MOV_D : MIPSI_MOV_S, dst, src); -} - -/* Generic load of register with base and (small) offset address. */ -static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) -{ - if (r < RID_MAX_GPR) - emit_tsi(as, irt_is64(ir->t) ? MIPSI_LD : MIPSI_LW, r, base, ofs); - else - emit_tsi(as, irt_isnum(ir->t) ? MIPSI_LDC1 : MIPSI_LWC1, - (r & 31), base, ofs); -} - -/* Generic store of register with base and (small) offset address. */ -static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) -{ - if (r < RID_MAX_GPR) - emit_tsi(as, irt_is64(ir->t) ? MIPSI_SD : MIPSI_SW, r, base, ofs); - else - emit_tsi(as, irt_isnum(ir->t) ? MIPSI_SDC1 : MIPSI_SWC1, - (r&31), base, ofs); -} - -/* Add offset to pointer. */ -static void emit_addptr(ASMState *as, Reg r, int32_t ofs) -{ - if (ofs) { - lua_assert(checki16(ofs)); - emit_tsi(as, MIPSI_AADDIU, r, r, ofs); - } -} - -#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs)) - diff --git a/src/lj_emit_ppc.h b/src/lj_emit_ppc.h deleted file mode 100644 index 21c3c2ace7..0000000000 --- a/src/lj_emit_ppc.h +++ /dev/null @@ -1,238 +0,0 @@ -/* -** PPC instruction emitter. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -/* -- Emit basic instructions --------------------------------------------- */ - -static void emit_tab(ASMState *as, PPCIns pi, Reg rt, Reg ra, Reg rb) -{ - *--as->mcp = pi | PPCF_T(rt) | PPCF_A(ra) | PPCF_B(rb); -} - -#define emit_asb(as, pi, ra, rs, rb) emit_tab(as, (pi), (rs), (ra), (rb)) -#define emit_as(as, pi, ra, rs) emit_tab(as, (pi), (rs), (ra), 0) -#define emit_ab(as, pi, ra, rb) emit_tab(as, (pi), 0, (ra), (rb)) - -static void emit_tai(ASMState *as, PPCIns pi, Reg rt, Reg ra, int32_t i) -{ - *--as->mcp = pi | PPCF_T(rt) | PPCF_A(ra) | (i & 0xffff); -} - -#define emit_ti(as, pi, rt, i) emit_tai(as, (pi), (rt), 0, (i)) -#define emit_ai(as, pi, ra, i) emit_tai(as, (pi), 0, (ra), (i)) -#define emit_asi(as, pi, ra, rs, i) emit_tai(as, (pi), (rs), (ra), (i)) - -#define emit_fab(as, pi, rf, ra, rb) \ - emit_tab(as, (pi), (rf)&31, (ra)&31, (rb)&31) -#define emit_fb(as, pi, rf, rb) emit_tab(as, (pi), (rf)&31, 0, (rb)&31) -#define emit_fac(as, pi, rf, ra, rc) \ - emit_tab(as, (pi) | PPCF_C((rc) & 31), (rf)&31, (ra)&31, 0) -#define emit_facb(as, pi, rf, ra, rc, rb) \ - emit_tab(as, (pi) | PPCF_C((rc) & 31), (rf)&31, (ra)&31, (rb)&31) -#define emit_fai(as, pi, rf, ra, i) emit_tai(as, (pi), (rf)&31, (ra), (i)) - -static void emit_rot(ASMState *as, PPCIns pi, Reg ra, Reg rs, - int32_t n, int32_t b, int32_t e) -{ - *--as->mcp = pi | PPCF_T(rs) | PPCF_A(ra) | PPCF_B(n) | - PPCF_MB(b) | PPCF_ME(e); -} - -static void emit_slwi(ASMState *as, Reg ra, Reg rs, int32_t n) -{ - lua_assert(n >= 0 && n < 32); - emit_rot(as, PPCI_RLWINM, ra, rs, n, 0, 31-n); -} - -static void emit_rotlwi(ASMState *as, Reg ra, Reg rs, int32_t n) -{ - lua_assert(n >= 0 && n < 32); - emit_rot(as, PPCI_RLWINM, ra, rs, n, 0, 31); -} - -/* -- Emit loads/stores --------------------------------------------------- */ - -/* Prefer rematerialization of BASE/L from global_State over spills. */ -#define emit_canremat(ref) ((ref) <= REF_BASE) - -/* Try to find a one step delta relative to another constant. */ -static int emit_kdelta1(ASMState *as, Reg t, int32_t i) -{ - RegSet work = ~as->freeset & RSET_GPR; - while (work) { - Reg r = rset_picktop(work); - IRRef ref = regcost_ref(as->cost[r]); - lua_assert(r != t); - if (ref < ASMREF_L) { - int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i); - if (checki16(delta)) { - emit_tai(as, PPCI_ADDI, t, r, delta); - return 1; - } - } - rset_clear(work, r); - } - return 0; /* Failed. */ -} - -/* Load a 32 bit constant into a GPR. */ -static void emit_loadi(ASMState *as, Reg r, int32_t i) -{ - if (checki16(i)) { - emit_ti(as, PPCI_LI, r, i); - } else { - if ((i & 0xffff)) { - int32_t jgl = i32ptr(J2G(as->J)); - if ((uint32_t)(i-jgl) < 65536) { - emit_tai(as, PPCI_ADDI, r, RID_JGL, i-jgl-32768); - return; - } else if (emit_kdelta1(as, r, i)) { - return; - } - emit_asi(as, PPCI_ORI, r, r, i); - } - emit_ti(as, PPCI_LIS, r, (i >> 16)); - } -} - -#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) - -static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); - -/* Get/set from constant pointer. */ -static void emit_lsptr(ASMState *as, PPCIns pi, Reg r, void *p, RegSet allow) -{ - int32_t jgl = i32ptr(J2G(as->J)); - int32_t i = i32ptr(p); - Reg base; - if ((uint32_t)(i-jgl) < 65536) { - i = i-jgl-32768; - base = RID_JGL; - } else { - base = ra_allock(as, i-(int16_t)i, allow); - } - emit_tai(as, pi, r, base, i); -} - -#define emit_loadk64(as, r, ir) \ - emit_lsptr(as, PPCI_LFD, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR) - -/* Get/set global_State fields. */ -static void emit_lsglptr(ASMState *as, PPCIns pi, Reg r, int32_t ofs) -{ - emit_tai(as, pi, r, RID_JGL, ofs-32768); -} - -#define emit_getgl(as, r, field) \ - emit_lsglptr(as, PPCI_LWZ, (r), (int32_t)offsetof(global_State, field)) -#define emit_setgl(as, r, field) \ - emit_lsglptr(as, PPCI_STW, (r), (int32_t)offsetof(global_State, field)) - -/* Trace number is determined from per-trace exit stubs. */ -#define emit_setvmstate(as, i) UNUSED(i) - -/* -- Emit control-flow instructions -------------------------------------- */ - -/* Label for internal jumps. */ -typedef MCode *MCLabel; - -/* Return label pointing to current PC. */ -#define emit_label(as) ((as)->mcp) - -static void emit_condbranch(ASMState *as, PPCIns pi, PPCCC cc, MCode *target) -{ - MCode *p = --as->mcp; - ptrdiff_t delta = (char *)target - (char *)p; - lua_assert(((delta + 0x8000) >> 16) == 0); - pi ^= (delta & 0x8000) * (PPCF_Y/0x8000); - *p = pi | PPCF_CC(cc) | ((uint32_t)delta & 0xffffu); -} - -static void emit_jmp(ASMState *as, MCode *target) -{ - MCode *p = --as->mcp; - ptrdiff_t delta = (char *)target - (char *)p; - *p = PPCI_B | (delta & 0x03fffffcu); -} - -static void emit_call(ASMState *as, void *target) -{ - MCode *p = --as->mcp; - ptrdiff_t delta = (char *)target - (char *)p; - if ((((delta>>2) + 0x00800000) >> 24) == 0) { - *p = PPCI_BL | (delta & 0x03fffffcu); - } else { /* Target out of range: need indirect call. Don't use arg reg. */ - RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1); - Reg r = ra_allock(as, i32ptr(target), allow); - *p = PPCI_BCTRL; - p[-1] = PPCI_MTCTR | PPCF_T(r); - as->mcp = p-1; - } -} - -/* -- Emit generic operations --------------------------------------------- */ - -#define emit_mr(as, dst, src) \ - emit_asb(as, PPCI_MR, (dst), (src), (src)) - -/* Generic move between two regs. */ -static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) -{ - UNUSED(ir); - if (dst < RID_MAX_GPR) - emit_mr(as, dst, src); - else - emit_fb(as, PPCI_FMR, dst, src); -} - -/* Generic load of register with base and (small) offset address. */ -static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) -{ - if (r < RID_MAX_GPR) - emit_tai(as, PPCI_LWZ, r, base, ofs); - else - emit_fai(as, irt_isnum(ir->t) ? PPCI_LFD : PPCI_LFS, r, base, ofs); -} - -/* Generic store of register with base and (small) offset address. */ -static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) -{ - if (r < RID_MAX_GPR) - emit_tai(as, PPCI_STW, r, base, ofs); - else - emit_fai(as, irt_isnum(ir->t) ? PPCI_STFD : PPCI_STFS, r, base, ofs); -} - -/* Emit a compare (for equality) with a constant operand. */ -static void emit_cmpi(ASMState *as, Reg r, int32_t k) -{ - if (checki16(k)) { - emit_ai(as, PPCI_CMPWI, r, k); - } else if (checku16(k)) { - emit_ai(as, PPCI_CMPLWI, r, k); - } else { - emit_ai(as, PPCI_CMPLWI, RID_TMP, k); - emit_asi(as, PPCI_XORIS, RID_TMP, r, (k >> 16)); - } -} - -/* Add offset to pointer. */ -static void emit_addptr(ASMState *as, Reg r, int32_t ofs) -{ - if (ofs) { - emit_tai(as, PPCI_ADDI, r, r, ofs); - if (!checki16(ofs)) - emit_tai(as, PPCI_ADDIS, r, r, (ofs + 32768) >> 16); - } -} - -static void emit_spsub(ASMState *as, int32_t ofs) -{ - if (ofs) { - emit_tai(as, PPCI_STWU, RID_TMP, RID_SP, -ofs); - emit_tai(as, PPCI_ADDI, RID_TMP, RID_SP, - CFRAME_SIZE + (as->parent ? as->parent->spadjust : 0)); - } -} - diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h index a6b8713e00..556e27f277 100644 --- a/src/lj_emit_x86.h +++ b/src/lj_emit_x86.h @@ -7,24 +7,13 @@ #define MODRM(mode, r1, r2) ((MCode)((mode)+(((r1)&7)<<3)+((r2)&7))) -#if LJ_64 #define REXRB(p, rr, rb) \ { MCode rex = 0x40 + (((rr)>>1)&4) + (((rb)>>3)&1); \ if (rex != 0x40) *--(p) = rex; } #define FORCE_REX 0x200 #define REX_64 (FORCE_REX|0x080000) #define VEX_64 0x800000 -#else -#define REXRB(p, rr, rb) ((void)0) -#define FORCE_REX 0 -#define REX_64 0 -#define VEX_64 0 -#endif -#if LJ_GC64 #define REX_GC64 REX_64 -#else -#define REX_GC64 0 -#endif #define emit_i8(as, i) (*--as->mcp = (MCode)(i)) #define emit_i32(as, i) (*(int32_t *)(as->mcp-4) = (i), as->mcp -= 4) @@ -39,9 +28,7 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx, { int n = (int8_t)xo; if (n == -60) { /* VEX-encoded instruction */ -#if LJ_64 xo ^= (((rr>>1)&4)+((rx>>2)&2)+((rb>>3)&1))<<13; -#endif *(uint32_t *)(p+delta-5) = (uint32_t)xo; return p+delta-5; } @@ -54,7 +41,6 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx, #endif *(uint32_t *)(p+delta-5) = (uint32_t)xo; p += n + delta; -#if LJ_64 { uint32_t rex = 0x40 + ((rr>>1)&(4+(FORCE_REX>>1)))+((rx>>2)&2)+((rb>>3)&1); if (rex != 0x40) { @@ -64,9 +50,6 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx, *--p = (MCode)rex; } } -#else - UNUSED(rr); UNUSED(rb); UNUSED(rx); -#endif return p; } @@ -123,14 +106,9 @@ static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs) *--p = MODRM(XM_SCALE1, RID_ESP, RID_ESP); } else { *(int32_t *)(p-4) = ofs; -#if LJ_64 p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP); p -= 5; rb = RID_ESP; -#else - p -= 4; - rb = RID_EBP; -#endif mode = XM_OFS0; } as->mcp = emit_opm(xo, mode, rr, rb, p, 0); @@ -203,10 +181,8 @@ static void emit_mrm(ASMState *as, x86Op xo, Reg rr, Reg rb) *(int32_t *)p = as->mrm.ofs; if (as->mrm.idx != RID_NONE) goto mrmidx; -#if LJ_64 *--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP); rb = RID_ESP; -#endif } else if (LJ_GC64 && rb == RID_RIP) { lua_assert(as->mrm.idx == RID_NONE); mode = XM_OFS0; @@ -289,7 +265,6 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i) } } -#if LJ_GC64 #define dispofs(as, k) \ ((intptr_t)((uintptr_t)(k) - (uintptr_t)J2GG(as->J)->dispatch)) #define mcpofs(as, k) \ @@ -299,13 +274,7 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i) /* mov r, addr */ #define emit_loada(as, r, addr) \ emit_loadu64(as, (r), (uintptr_t)(addr)) -#else -/* mov r, addr */ -#define emit_loada(as, r, addr) \ - emit_loadi(as, (r), ptr2addr((addr))) -#endif -#if LJ_64 /* mov r, imm64 or shorter 32 bit extended load. */ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) { @@ -315,7 +284,6 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) MCode *p = as->mcp; *(int32_t *)(p-4) = (int32_t)u64; as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4); -#if LJ_GC64 } else if (checki32(dispofs(as, u64))) { emit_rmro(as, XO_LEA, r|REX_64, RID_DISPATCH, (int32_t)dispofs(as, u64)); } else if (checki32(mcpofs(as, u64)) && checki32(mctopofs(as, u64))) { @@ -323,7 +291,6 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) ** RIP-relative addressing reachability for both as->mcp and as->mctop. */ emit_rmro(as, XO_LEA, r|REX_64, RID_RIP, (int32_t)mcpofs(as, u64)); -#endif } else { /* Full-size 64 bit load. */ MCode *p = as->mcp; *(uint64_t *)(p-8) = u64; @@ -333,12 +300,10 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) as->mcp = p; } } -#endif /* op r, [addr] */ static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr) { -#if LJ_GC64 if (checki32(dispofs(as, addr))) { emit_rmro(as, xo, rr, RID_DISPATCH, (int32_t)dispofs(as, addr)); } else if (checki32(mcpofs(as, addr)) && checki32(mctopofs(as, addr))) { @@ -347,16 +312,11 @@ static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr) emit_rmro(as, xo, rr, rr, 0); emit_loadu64(as, rr, (uintptr_t)addr); } else -#endif { MCode *p = as->mcp; *(int32_t *)(p-4) = ptr2addr(addr); -#if LJ_64 p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP); as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5); -#else - as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4); -#endif } } @@ -375,7 +335,6 @@ static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) } if (*k == 0) { emit_rr(as, rset_test(RSET_FPR, r) ? XO_XORPS : XO_ARITH(XOg_XOR), r, r); -#if LJ_GC64 } else if (checki32((intptr_t)k) || checki32(dispofs(as, k)) || (checki32(mcpofs(as, k)) && checki32(mctopofs(as, k)))) { emit_rma(as, xo, r64, k); @@ -394,10 +353,6 @@ static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) as->mclim = as->mcbot + MCLIM_REDZONE; } emit_rmro(as, xo, r64, RID_RIP, (int32_t)mcpofs(as, as->mctop - ir->i)); -#else - } else { - emit_rma(as, xo, r64, k); -#endif } } @@ -406,18 +361,6 @@ static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) /* Label for short jumps. */ typedef MCode *MCLabel; -#if LJ_32 && LJ_HASFFI -/* jmp short target */ -static void emit_sjmp(ASMState *as, MCLabel target) -{ - MCode *p = as->mcp; - ptrdiff_t delta = target - p; - lua_assert(delta == (int8_t)delta); - p[-1] = (MCode)(int8_t)delta; - p[-2] = XI_JMPs; - as->mcp = p - 2; -} -#endif /* jcc short target */ static void emit_sjcc(ASMState *as, int cc, MCLabel target) @@ -480,14 +423,12 @@ static void emit_jmp(ASMState *as, MCode *target) static void emit_call_(ASMState *as, MCode *target) { MCode *p = as->mcp; -#if LJ_64 if (target-p != (int32_t)(target-p)) { /* Assumes RID_RET is never an argument to calls and always clobbered. */ emit_rr(as, XO_GROUP5, XOg_CALL, RID_RET); emit_loadu64(as, RID_RET, (uint64_t)target); return; } -#endif *(int32_t *)(p-4) = jmprel(p, target); p[-5] = XI_CALL; as->mcp = p - 5; @@ -498,13 +439,8 @@ static void emit_call_(ASMState *as, MCode *target) /* -- Emit generic operations --------------------------------------------- */ /* Use 64 bit operations to handle 64 bit IR types. */ -#if LJ_64 #define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0)) #define VEX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? VEX_64 : 0)) -#else -#define REX_64IR(ir, r) (r) -#define VEX_64IR(ir, r) (r) -#endif /* Generic move between two regs. */ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) diff --git a/src/lj_err.c b/src/lj_err.c index 049294ea6f..5ffaebe40d 100644 --- a/src/lj_err.c +++ b/src/lj_err.c @@ -63,8 +63,6 @@ #if defined(__GNUC__) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND #define LJ_UNWIND_EXT 1 -#elif LJ_TARGET_WINDOWS -#define LJ_UNWIND_EXT 1 #endif /* -- Error messages ------------------------------------------------------ */ @@ -204,7 +202,6 @@ typedef struct _Unwind_Context _Unwind_Context; #define LJ_UEXCLASS_CHECK(cl) (((cl) ^ LJ_UEXCLASS) <= 0xff) #define LJ_UEXCLASS_ERRCODE(cl) ((int)((cl) & 0xff)) -#if !LJ_TARGET_ARM typedef struct _Unwind_Exception { @@ -265,7 +262,6 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions, lj_vm_unwind_c_eh)); return _URC_INSTALL_CONTEXT; } -#if LJ_TARGET_X86ORX64 else if ((actions & _UA_HANDLER_FRAME)) { /* Workaround for ancient libgcc bug. Still present in RHEL 5.5. :-/ ** Real fix: http://gcc.gnu.org/viewcvs/trunk/gcc/unwind-dw2.c?r1=121165&r2=124837&pathrev=153877&diff_format=h @@ -274,7 +270,6 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions, _Unwind_SetIP(ctx, (uintptr_t)lj_vm_unwind_rethrow); return _URC_INSTALL_CONTEXT; } -#endif #else /* This is not the proper way to escape from the unwinder. We get away with ** it on non-x64 because the interpreter restores all callee-saved regs. @@ -302,202 +297,6 @@ static void err_raise_ext(int errcode) } #endif -#else /* LJ_TARGET_ARM */ - -#define _US_VIRTUAL_UNWIND_FRAME 0 -#define _US_UNWIND_FRAME_STARTING 1 -#define _US_ACTION_MASK 3 -#define _US_FORCE_UNWIND 8 - -typedef struct _Unwind_Control_Block _Unwind_Control_Block; - -struct _Unwind_Control_Block { - uint64_t exclass; - uint32_t misc[20]; -}; - -extern int _Unwind_RaiseException(_Unwind_Control_Block *); -extern int __gnu_unwind_frame(_Unwind_Control_Block *, _Unwind_Context *); -extern int _Unwind_VRS_Set(_Unwind_Context *, int, uint32_t, int, void *); -extern int _Unwind_VRS_Get(_Unwind_Context *, int, uint32_t, int, void *); - -static inline uint32_t _Unwind_GetGR(_Unwind_Context *ctx, int r) -{ - uint32_t v; - _Unwind_VRS_Get(ctx, 0, r, 0, &v); - return v; -} - -static inline void _Unwind_SetGR(_Unwind_Context *ctx, int r, uint32_t v) -{ - _Unwind_VRS_Set(ctx, 0, r, 0, &v); -} - -extern void lj_vm_unwind_ext(void); - -/* ARM unwinder personality handler referenced from interpreter .ARM.extab. */ -LJ_FUNCA int lj_err_unwind_arm(int state, _Unwind_Control_Block *ucb, - _Unwind_Context *ctx) -{ - void *cf = (void *)_Unwind_GetGR(ctx, 13); - lua_State *L = cframe_L(cf); - int errcode; - - switch ((state & _US_ACTION_MASK)) { - case _US_VIRTUAL_UNWIND_FRAME: - if ((state & _US_FORCE_UNWIND)) break; - return _URC_HANDLER_FOUND; - case _US_UNWIND_FRAME_STARTING: - if (LJ_UEXCLASS_CHECK(ucb->exclass)) { - errcode = LJ_UEXCLASS_ERRCODE(ucb->exclass); - } else { - errcode = LUA_ERRRUN; - setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP)); - } - cf = err_unwind(L, cf, errcode); - if ((state & _US_FORCE_UNWIND) || cf == NULL) break; - _Unwind_SetGR(ctx, 15, (uint32_t)lj_vm_unwind_ext); - _Unwind_SetGR(ctx, 0, (uint32_t)ucb); - _Unwind_SetGR(ctx, 1, (uint32_t)errcode); - _Unwind_SetGR(ctx, 2, cframe_unwind_ff(cf) ? - (uint32_t)lj_vm_unwind_ff_eh : - (uint32_t)lj_vm_unwind_c_eh); - return _URC_INSTALL_CONTEXT; - default: - return _URC_FAILURE; - } - if (__gnu_unwind_frame(ucb, ctx) != _URC_OK) - return _URC_FAILURE; - return _URC_CONTINUE_UNWIND; -} - -#if LJ_UNWIND_EXT -static __thread _Unwind_Control_Block static_uex; - -static void err_raise_ext(int errcode) -{ - memset(&static_uex, 0, sizeof(static_uex)); - static_uex.exclass = LJ_UEXCLASS_MAKE(errcode); - _Unwind_RaiseException(&static_uex); -} -#endif - -#endif /* LJ_TARGET_ARM */ - -#elif LJ_ABI_WIN - -/* -** Someone in Redmond owes me several days of my life. A lot of this is -** undocumented or just plain wrong on MSDN. Some of it can be gathered -** from 3rd party docs or must be found by trial-and-error. They really -** don't want you to write your own language-specific exception handler -** or to interact gracefully with MSVC. :-( -** -** Apparently MSVC doesn't call C++ destructors for foreign exceptions -** unless you compile your C++ code with /EHa. Unfortunately this means -** catch (...) also catches things like access violations. The use of -** _set_se_translator doesn't really help, because it requires /EHa, too. -*/ - -#define WIN32_LEAN_AND_MEAN -#include - -#if LJ_TARGET_X64 -/* Taken from: http://www.nynaeve.net/?p=99 */ -typedef struct UndocumentedDispatcherContext { - ULONG64 ControlPc; - ULONG64 ImageBase; - PRUNTIME_FUNCTION FunctionEntry; - ULONG64 EstablisherFrame; - ULONG64 TargetIp; - PCONTEXT ContextRecord; - void (*LanguageHandler)(void); - PVOID HandlerData; - PUNWIND_HISTORY_TABLE HistoryTable; - ULONG ScopeIndex; - ULONG Fill0; -} UndocumentedDispatcherContext; -#else -typedef void *UndocumentedDispatcherContext; -#endif - -/* Another wild guess. */ -extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow); - -#if LJ_TARGET_X64 && defined(MINGW_SDK_INIT) -/* Workaround for broken MinGW64 declaration. */ -VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx"); -#define RtlUnwindEx RtlUnwindEx_FIXED -#endif - -#define LJ_MSVC_EXCODE ((DWORD)0xe06d7363) -#define LJ_GCC_EXCODE ((DWORD)0x20474343) - -#define LJ_EXCODE ((DWORD)0xe24c4a00) -#define LJ_EXCODE_MAKE(c) (LJ_EXCODE | (DWORD)(c)) -#define LJ_EXCODE_CHECK(cl) (((cl) ^ LJ_EXCODE) <= 0xff) -#define LJ_EXCODE_ERRCODE(cl) ((int)((cl) & 0xff)) - -/* Windows exception handler for interpreter frame. */ -LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec, - void *f, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch) -{ -#if LJ_TARGET_X64 - void *cf = f; -#else - void *cf = (char *)f - CFRAME_OFS_SEH; -#endif - lua_State *L = cframe_L(cf); - int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ? - LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN; - if ((rec->ExceptionFlags & 6)) { /* EH_UNWINDING|EH_EXIT_UNWIND */ - /* Unwind internal frames. */ - err_unwind(L, cf, errcode); - } else { - void *cf2 = err_unwind(L, cf, 0); - if (cf2) { /* We catch it, so start unwinding the upper frames. */ - if (rec->ExceptionCode == LJ_MSVC_EXCODE || - rec->ExceptionCode == LJ_GCC_EXCODE) { -#if LJ_TARGET_WINDOWS - __DestructExceptionObject(rec, 1); -#endif - setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP)); - } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) { - /* Don't catch access violations etc. */ - return 1; /* ExceptionContinueSearch */ - } -#if LJ_TARGET_X64 - /* Unwind the stack and call all handlers for all lower C frames - ** (including ourselves) again with EH_UNWINDING set. Then set - ** rsp = cf, rax = errcode and jump to the specified target. - */ - RtlUnwindEx(cf, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ? - lj_vm_unwind_ff_eh : - lj_vm_unwind_c_eh), - rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable); - /* RtlUnwindEx should never return. */ -#else - UNUSED(ctx); - UNUSED(dispatch); - /* Call all handlers for all lower C frames (including ourselves) again - ** with EH_UNWINDING set. Then call the specified function, passing cf - ** and errcode. - */ - lj_vm_rtlunwind(cf, (void *)rec, - (cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ? - (void *)lj_vm_unwind_ff : (void *)lj_vm_unwind_c, errcode); - /* lj_vm_rtlunwind does not return. */ -#endif - } - } - return 1; /* ExceptionContinueSearch */ -} - -/* Raise Windows exception. */ -static void err_raise_ext(int errcode) -{ - RaiseException(LJ_EXCODE_MAKE(errcode), 1 /* EH_NONCONTINUABLE */, 0, NULL); -} #endif @@ -712,7 +511,6 @@ LJ_NOINLINE void lj_err_callermsg(lua_State *L, const char *msg) frame = NULL; } else { pframe = frame_prevd(frame); -#if LJ_HASFFI /* Remove frame for FFI metamethods. */ if (frame_func(frame)->c.ffid >= FF_ffi_meta___index && frame_func(frame)->c.ffid <= FF_ffi_meta___tostring) { @@ -720,7 +518,6 @@ LJ_NOINLINE void lj_err_callermsg(lua_State *L, const char *msg) L->top = frame; setcframe_pc(cframe_raw(L->cframe), frame_contpc(frame)); } -#endif } } lj_debug_addloc(L, msg, pframe, frame); diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h index 060a9f8975..1fea8a097d 100644 --- a/src/lj_errmsg.h +++ b/src/lj_errmsg.h @@ -57,9 +57,6 @@ ERRDEF(NOENV, "no calling environment") ERRDEF(CYIELD, "attempt to yield across C-call boundary") ERRDEF(BADLU, "bad light userdata pointer") ERRDEF(NOGCMM, "bad action while in __gc metamethod") -#if LJ_TARGET_WINDOWS -ERRDEF(BADFPU, "bad FPU precision (use D3DCREATE_FPU_PRESERVE with DirectX)") -#endif /* Standard library function errors. */ ERRDEF(ASSERT, "assertion failed!") @@ -99,18 +96,8 @@ ERRDEF(STRCAPU, "unfinished capture") ERRDEF(STRFMT, "invalid option " LUA_QS " to " LUA_QL("format")) ERRDEF(STRGSRV, "invalid replacement value (a %s)") ERRDEF(BADMODN, "name conflict for module " LUA_QS) -#if LJ_HASJIT ERRDEF(JITPROT, "runtime code generation failed, restricted kernel?") -#if LJ_TARGET_X86ORX64 ERRDEF(NOJIT, "JIT compiler disabled, CPU does not support SSE2") -#else -ERRDEF(NOJIT, "JIT compiler disabled") -#endif -#elif defined(LJ_ARCH_NOJIT) -ERRDEF(NOJIT, "no JIT compiler for this architecture (yet)") -#else -ERRDEF(NOJIT, "JIT compiler permanently disabled by build option") -#endif ERRDEF(JITOPT, "unknown or malformed optimization flag " LUA_QS) /* Lexer/parser errors. */ @@ -150,7 +137,6 @@ ERRDEF(XGSCOPE, " jumps into the scope of local " LUA_QS) ERRDEF(BCFMT, "cannot load incompatible bytecode") ERRDEF(BCBAD, "cannot load malformed bytecode") -#if LJ_HASFFI /* FFI errors. */ ERRDEF(FFI_INVTYPE, "invalid C type") ERRDEF(FFI_INVSIZE, "size of C type is unknown or too large") @@ -174,14 +160,9 @@ ERRDEF(FFI_BADMM, LUA_QS " has no " LUA_QS " metamethod") ERRDEF(FFI_WRCONST, "attempt to write to constant location") ERRDEF(FFI_NODECL, "missing declaration for symbol " LUA_QS) ERRDEF(FFI_BADCBACK, "bad callback") -#if LJ_OS_NOJIT -ERRDEF(FFI_CBACKOV, "no support for callbacks on this OS") -#else ERRDEF(FFI_CBACKOV, "too many callbacks") -#endif ERRDEF(FFI_NYIPACKBIT, "NYI: packed bit fields") ERRDEF(FFI_NYICALL, "NYI: cannot call this C function (yet)") -#endif #undef ERRDEF diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index dfdee2dbfe..28b99e16ea 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c @@ -8,7 +8,6 @@ #include "lj_obj.h" -#if LJ_HASJIT #include "lj_err.h" #include "lj_str.h" @@ -118,13 +117,9 @@ static void recff_stitch(jit_State *J) /* Ditto for the IR. */ memmove(&J->base[1], &J->base[-1-LJ_FR2], sizeof(TRef)*nslot); -#if LJ_FR2 J->base[2] = TREF_FRAME; J->base[-1] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont))); J->base[0] = lj_ir_k64(J, IR_KNUM, u64ptr(pc)) | TREF_CONT; -#else - J->base[0] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT; -#endif J->ktrace = tref_ref((J->base[-1-LJ_FR2] = lj_ir_ktrace(J))); J->base += 2 + LJ_FR2; J->baseslot += 2 + LJ_FR2; @@ -350,11 +345,9 @@ static void LJ_FASTCALL recff_tonumber(jit_State *J, RecordFFData *rd) } tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); } -#if LJ_HASFFI } else if (tref_iscdata(tr)) { lj_crecord_tonumber(J, rd); return; -#endif } else { tr = TREF_NIL; } @@ -449,10 +442,8 @@ static void LJ_FASTCALL recff_xpairs(jit_State *J, RecordFFData *rd) static void LJ_FASTCALL recff_pcall(jit_State *J, RecordFFData *rd) { if (J->maxslot >= 1) { -#if LJ_FR2 /* Shift function arguments up. */ memmove(J->base + 1, J->base, sizeof(TRef) * J->maxslot); -#endif lj_record_call(J, 0, J->maxslot - 1); rd->nres = -1; /* Pending call. */ } /* else: Interpreter will throw. */ @@ -478,10 +469,8 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd) copyTV(J->L, &argv1, &rd->argv[1]); copyTV(J->L, &rd->argv[0], &argv1); copyTV(J->L, &rd->argv[1], &argv0); -#if LJ_FR2 /* Shift function arguments up. */ memmove(J->base + 2, J->base + 1, sizeof(TRef) * (J->maxslot-1)); -#endif /* Need to protect lj_record_call because it may throw. */ errcode = lj_vm_cpcall(J->L, NULL, J, recff_xpcall_cp); /* Always undo Lua stack swap to avoid confusing the interpreter. */ @@ -571,11 +560,7 @@ static void LJ_FASTCALL recff_math_atan2(jit_State *J, RecordFFData *rd) static void LJ_FASTCALL recff_math_ldexp(jit_State *J, RecordFFData *rd) { TRef tr = lj_ir_tonum(J, J->base[0]); -#if LJ_TARGET_X86ORX64 TRef tr2 = lj_ir_tonum(J, J->base[1]); -#else - TRef tr2 = lj_opt_narrow_toint(J, J->base[1]); -#endif J->base[0] = emitir(IRTN(IR_LDEXP), tr, tr2); UNUSED(rd); } @@ -675,9 +660,7 @@ static void LJ_FASTCALL recff_math_random(jit_State *J, RecordFFData *rd) static void LJ_FASTCALL recff_bit_tobit(jit_State *J, RecordFFData *rd) { TRef tr = J->base[0]; -#if LJ_HASFFI if (tref_iscdata(tr)) { recff_bit64_tobit(J, rd); return; } -#endif J->base[0] = lj_opt_narrow_tobit(J, tr); UNUSED(rd); } @@ -685,20 +668,16 @@ static void LJ_FASTCALL recff_bit_tobit(jit_State *J, RecordFFData *rd) /* Record unary bit.bnot, bit.bswap. */ static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd) { -#if LJ_HASFFI if (recff_bit64_unary(J, rd)) return; -#endif J->base[0] = emitir(IRTI(rd->data), lj_opt_narrow_tobit(J, J->base[0]), 0); } /* Record N-ary bit.band, bit.bor, bit.bxor. */ static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd) { -#if LJ_HASFFI if (recff_bit64_nary(J, rd)) return; -#endif { TRef tr = lj_opt_narrow_tobit(J, J->base[0]); uint32_t ot = IRTI(rd->data); @@ -712,10 +691,8 @@ static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd) /* Record bit shifts. */ static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd) { -#if LJ_HASFFI if (recff_bit64_shift(J, rd)) return; -#endif { TRef tr = lj_opt_narrow_tobit(J, J->base[0]); TRef tsh = lj_opt_narrow_tobit(J, J->base[1]); @@ -735,13 +712,9 @@ static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd) static void LJ_FASTCALL recff_bit_tohex(jit_State *J, RecordFFData *rd) { -#if LJ_HASFFI TRef hdr = recff_bufhdr(J); TRef tr = recff_bit64_tohex(J, rd, hdr); J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); -#else - recff_nyiu(J, rd); /* Don't bother working around this NYI. */ -#endif } /* -- String library fast functions --------------------------------------- */ @@ -993,15 +966,10 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd) tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_INT)); } else { -#if LJ_HASFFI tra = emitir(IRT(IR_CONV, IRT_U64), tra, (IRT_INT|(IRT_U64<<5)|IRCONV_SEXT)); tr = lj_ir_call(J, IRCALL_lj_strfmt_putfxint, tr, trsf, tra); lj_needsplit(J); -#else - recff_nyiu(J, rd); /* Don't bother working around this NYI. */ - return; -#endif } break; case STRFMT_UINT: @@ -1114,13 +1082,8 @@ static TRef recff_io_fp(jit_State *J, TRef *udp, int32_t id) { TRef tr, ud, fp; if (id) { /* io.func() */ -#if LJ_GC64 /* TODO: fix ARM32 asm_fload(), so we can use this for all archs. */ ud = lj_ir_ggfload(J, IRT_UDATA, GG_OFS(g.gcroot[id])); -#else - tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]); - ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0); -#endif } else { /* fp:method() */ ud = J->base[0]; if (!tref_isudata(ud)) @@ -1223,4 +1186,3 @@ void lj_ffrecord_func(jit_State *J) #undef IR #undef emitir -#endif diff --git a/src/lj_ffrecord.h b/src/lj_ffrecord.h index 3b407450d5..83a8abf803 100644 --- a/src/lj_ffrecord.h +++ b/src/lj_ffrecord.h @@ -9,7 +9,6 @@ #include "lj_obj.h" #include "lj_jit.h" -#if LJ_HASJIT /* Data used by handlers to record a fast function. */ typedef struct RecordFFData { TValue *argv; /* Runtime argument values. */ @@ -19,6 +18,5 @@ typedef struct RecordFFData { LJ_FUNC int32_t lj_ffrecord_select_mode(jit_State *J, TRef tr, TValue *tv); LJ_FUNC void lj_ffrecord_func(jit_State *J); -#endif #endif diff --git a/src/lj_frame.h b/src/lj_frame.h index 19c49a4aef..04350576e1 100644 --- a/src/lj_frame.h +++ b/src/lj_frame.h @@ -30,7 +30,6 @@ enum { #define FRAME_TYPEP (FRAME_TYPE|FRAME_P) /* Macros to access and modify Lua frames. */ -#if LJ_FR2 /* Two-slot frame info, required for 64 bit PC/GCRef: ** ** base-2 base-1 | base base+1 ... @@ -49,28 +48,6 @@ enum { #define setframe_gc(f, p, tp) (setgcVraw((f)-1, (p), (tp))) #define setframe_ftsz(f, sz) ((f)->ftsz = (sz)) #define setframe_pc(f, pc) ((f)->ftsz = (int64_t)(intptr_t)(pc)) -#else -/* One-slot frame info, sufficient for 32 bit PC/GCRef: -** -** base-1 | base base+1 ... -** lo hi | -** [func | PC/delta/ft] | [slots ...] -** ^-- frame | ^-- base ^-- top -** -** Continuation frames: -** -** base-2 base-1 | base base+1 ... -** lo hi lo hi | -** [cont | PC] [func | PC/delta/ft] | [slots ...] -** ^-- frame | ^-- base ^-- top -*/ -#define frame_gc(f) (gcref((f)->fr.func)) -#define frame_ftsz(f) ((ptrdiff_t)(f)->fr.tp.ftsz) -#define frame_pc(f) (mref((f)->fr.tp.pcr, const BCIns)) -#define setframe_gc(f, p, tp) (setgcref((f)->fr.func, (p)), UNUSED(tp)) -#define setframe_ftsz(f, sz) ((f)->fr.tp.ftsz = (int32_t)(sz)) -#define setframe_pc(f, pc) (setmref((f)->fr.tp.pcr, (pc))) -#endif #define frame_type(f) (frame_ftsz(f) & FRAME_TYPE) #define frame_typep(f) (frame_ftsz(f) & FRAME_TYPEP) @@ -86,22 +63,9 @@ enum { enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ -#if LJ_FR2 #define frame_contpc(f) (frame_pc((f)-2)) #define frame_contv(f) (((f)-3)->u64) -#else -#define frame_contpc(f) (frame_pc((f)-1)) -#define frame_contv(f) (((f)-1)->u32.lo) -#endif -#if LJ_FR2 #define frame_contf(f) ((ASMFunction)(uintptr_t)((f)-3)->u64) -#elif LJ_64 -#define frame_contf(f) \ - ((ASMFunction)(void *)((intptr_t)lj_vm_asm_begin + \ - (intptr_t)(int32_t)((f)-1)->u32.lo)) -#else -#define frame_contf(f) ((ASMFunction)gcrefp(((f)-1)->gcr, void)) -#endif #define frame_iscont_fficb(f) \ (LJ_HASFFI && frame_contv(f) == LJ_CONT_FFI_CALLBACK) @@ -115,61 +79,12 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ /* Macros to access and modify the C stack frame chain. */ /* These definitions must match with the arch-specific *.dasc files. */ -#if LJ_TARGET_X86 -#if LJ_ABI_WIN -#define CFRAME_OFS_ERRF (19*4) -#define CFRAME_OFS_NRES (18*4) -#define CFRAME_OFS_PREV (17*4) -#define CFRAME_OFS_L (16*4) -#define CFRAME_OFS_SEH (9*4) -#define CFRAME_OFS_PC (6*4) -#define CFRAME_OFS_MULTRES (5*4) -#define CFRAME_SIZE (16*4) -#define CFRAME_SHIFT_MULTRES 0 -#else -#define CFRAME_OFS_ERRF (15*4) -#define CFRAME_OFS_NRES (14*4) -#define CFRAME_OFS_PREV (13*4) -#define CFRAME_OFS_L (12*4) -#define CFRAME_OFS_PC (6*4) -#define CFRAME_OFS_MULTRES (5*4) -#define CFRAME_SIZE (12*4) -#define CFRAME_SHIFT_MULTRES 0 -#endif -#elif LJ_TARGET_X64 -#if LJ_ABI_WIN -#define CFRAME_OFS_PREV (13*8) -#if LJ_GC64 -#define CFRAME_OFS_PC (12*8) -#define CFRAME_OFS_L (11*8) -#define CFRAME_OFS_ERRF (21*4) -#define CFRAME_OFS_NRES (20*4) -#define CFRAME_OFS_MULTRES (8*4) -#else -#define CFRAME_OFS_PC (25*4) -#define CFRAME_OFS_L (24*4) -#define CFRAME_OFS_ERRF (23*4) -#define CFRAME_OFS_NRES (22*4) -#define CFRAME_OFS_MULTRES (21*4) -#endif -#define CFRAME_SIZE (10*8) -#define CFRAME_SIZE_JIT (CFRAME_SIZE + 9*16 + 4*8) -#define CFRAME_SHIFT_MULTRES 0 -#else #define CFRAME_OFS_PREV (4*8) -#if LJ_GC64 #define CFRAME_OFS_PC (3*8) #define CFRAME_OFS_L (2*8) #define CFRAME_OFS_ERRF (3*4) #define CFRAME_OFS_NRES (2*4) #define CFRAME_OFS_MULTRES (0*4) -#else -#define CFRAME_OFS_PC (7*4) -#define CFRAME_OFS_L (6*4) -#define CFRAME_OFS_ERRF (5*4) -#define CFRAME_OFS_NRES (4*4) -#define CFRAME_OFS_MULTRES (1*4) -#endif #if LJ_NO_UNWIND #define CFRAME_SIZE (12*8) #else @@ -177,96 +92,6 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ #endif #define CFRAME_SIZE_JIT (CFRAME_SIZE + 16) #define CFRAME_SHIFT_MULTRES 0 -#endif -#elif LJ_TARGET_ARM -#define CFRAME_OFS_ERRF 24 -#define CFRAME_OFS_NRES 20 -#define CFRAME_OFS_PREV 16 -#define CFRAME_OFS_L 12 -#define CFRAME_OFS_PC 8 -#define CFRAME_OFS_MULTRES 4 -#if LJ_ARCH_HASFPU -#define CFRAME_SIZE 128 -#else -#define CFRAME_SIZE 64 -#endif -#define CFRAME_SHIFT_MULTRES 3 -#elif LJ_TARGET_ARM64 -#define CFRAME_OFS_ERRF 196 -#define CFRAME_OFS_NRES 200 -#define CFRAME_OFS_PREV 160 -#define CFRAME_OFS_L 176 -#define CFRAME_OFS_PC 168 -#define CFRAME_OFS_MULTRES 192 -#define CFRAME_SIZE 208 -#define CFRAME_SHIFT_MULTRES 3 -#elif LJ_TARGET_PPC -#if LJ_TARGET_XBOX360 -#define CFRAME_OFS_ERRF 424 -#define CFRAME_OFS_NRES 420 -#define CFRAME_OFS_PREV 400 -#define CFRAME_OFS_L 416 -#define CFRAME_OFS_PC 412 -#define CFRAME_OFS_MULTRES 408 -#define CFRAME_SIZE 384 -#define CFRAME_SHIFT_MULTRES 3 -#elif LJ_ARCH_PPC32ON64 -#define CFRAME_OFS_ERRF 472 -#define CFRAME_OFS_NRES 468 -#define CFRAME_OFS_PREV 448 -#define CFRAME_OFS_L 464 -#define CFRAME_OFS_PC 460 -#define CFRAME_OFS_MULTRES 456 -#define CFRAME_SIZE 400 -#define CFRAME_SHIFT_MULTRES 3 -#else -#define CFRAME_OFS_ERRF 48 -#define CFRAME_OFS_NRES 44 -#define CFRAME_OFS_PREV 40 -#define CFRAME_OFS_L 36 -#define CFRAME_OFS_PC 32 -#define CFRAME_OFS_MULTRES 28 -#define CFRAME_SIZE 272 -#define CFRAME_SHIFT_MULTRES 3 -#endif -#elif LJ_TARGET_MIPS32 -#if LJ_ARCH_HASFPU -#define CFRAME_OFS_ERRF 124 -#define CFRAME_OFS_NRES 120 -#define CFRAME_OFS_PREV 116 -#define CFRAME_OFS_L 112 -#define CFRAME_SIZE 112 -#else -#define CFRAME_OFS_ERRF 76 -#define CFRAME_OFS_NRES 72 -#define CFRAME_OFS_PREV 68 -#define CFRAME_OFS_L 64 -#define CFRAME_SIZE 64 -#endif -#define CFRAME_OFS_PC 20 -#define CFRAME_OFS_MULTRES 16 -#define CFRAME_SHIFT_MULTRES 3 -#elif LJ_TARGET_MIPS64 -#if LJ_ARCH_HASFPU -#define CFRAME_OFS_ERRF 188 -#define CFRAME_OFS_NRES 184 -#define CFRAME_OFS_PREV 176 -#define CFRAME_OFS_L 168 -#define CFRAME_OFS_PC 160 -#define CFRAME_SIZE 192 -#else -#define CFRAME_OFS_ERRF 124 -#define CFRAME_OFS_NRES 120 -#define CFRAME_OFS_PREV 112 -#define CFRAME_OFS_L 104 -#define CFRAME_OFS_PC 96 -#define CFRAME_SIZE 128 -#endif -#define CFRAME_OFS_MULTRES 0 -#define CFRAME_SHIFT_MULTRES 3 -#else -#error "Missing CFRAME_* definitions for this architecture" -#endif #ifndef CFRAME_SIZE_JIT #define CFRAME_SIZE_JIT CFRAME_SIZE diff --git a/src/lj_gc.c b/src/lj_gc.c index 2aaf5b2c4f..8651c9df35 100644 --- a/src/lj_gc.c +++ b/src/lj_gc.c @@ -20,10 +20,8 @@ #include "lj_meta.h" #include "lj_state.h" #include "lj_frame.h" -#if LJ_HASFFI #include "lj_ctype.h" #include "lj_cdata.h" -#endif #include "lj_trace.h" #include "lj_vm.h" @@ -171,12 +169,10 @@ static int gc_traverse_tab(global_State *g, GCtab *t) else if (c == 'v') weak |= LJ_GC_WEAKVAL; } if (weak) { /* Weak tables are cleared in the atomic phase. */ -#if LJ_HASFFI CTState *cts = ctype_ctsG(g); if (cts && cts->finalizer == t) { weak = (int)(~0u & ~LJ_GC_WEAKVAL); } else -#endif { t->marked = (uint8_t)((t->marked & ~LJ_GC_WEAK) | weak); setgcrefr(t->gclist, g->gc.weak); @@ -223,7 +219,6 @@ static void gc_traverse_func(global_State *g, GCfunc *fn) } } -#if LJ_HASJIT /* Mark a trace. */ static void gc_marktrace(global_State *g, TraceNo traceno) { @@ -256,9 +251,6 @@ static void gc_traverse_trace(global_State *g, GCtrace *T) /* The current trace is a GC root while not anchored in the prototype (yet). */ #define gc_traverse_curtrace(g) gc_traverse_trace(g, &G2J(g)->cur) -#else -#define gc_traverse_curtrace(g) UNUSED(g) -#endif /* Traverse a prototype. */ static void gc_traverse_proto(global_State *g, GCproto *pt) @@ -267,9 +259,7 @@ static void gc_traverse_proto(global_State *g, GCproto *pt) gc_mark_str(proto_chunkname(pt)); for (i = -(ptrdiff_t)pt->sizekgc; i < 0; i++) /* Mark collectable consts. */ gc_markobj(g, proto_kgc(pt, i)); -#if LJ_HASJIT if (pt->trace) gc_marktrace(g, pt->trace); -#endif } /* Traverse the frame structure of a stack. */ @@ -335,15 +325,10 @@ static size_t propagatemark(global_State *g) gc_traverse_thread(g, th); return sizeof(lua_State) + sizeof(TValue) * th->stacksize; } else { -#if LJ_HASJIT GCtrace *T = gco2trace(o); gc_traverse_trace(g, T); return ((sizeof(GCtrace)+7)&~7) + (T->nins-T->nk)*sizeof(IRIns) + T->nsnap*sizeof(SnapShot) + T->nsnapmap*sizeof(SnapEntry); -#else - lua_assert(0); - return 0; -#endif } } @@ -368,16 +353,8 @@ static const GCFreeFunc gc_freefunc[] = { (GCFreeFunc)lj_state_free, (GCFreeFunc)lj_func_freeproto, (GCFreeFunc)lj_func_free, -#if LJ_HASJIT (GCFreeFunc)lj_trace_free, -#else - (GCFreeFunc)0, -#endif -#if LJ_HASFFI (GCFreeFunc)lj_cdata_free, -#else - (GCFreeFunc)0, -#endif (GCFreeFunc)lj_tab_free, (GCFreeFunc)lj_udata_free }; @@ -491,7 +468,6 @@ static void gc_finalize(lua_State *L) setgcrefnull(g->gc.mmudata); else setgcrefr(gcref(g->gc.mmudata)->gch.nextgc, o->gch.nextgc); -#if LJ_HASFFI if (o->gch.gct == ~LJ_TCDATA) { TValue tmp, *tv; /* Add cdata back to the GC list and make it white. */ @@ -510,7 +486,6 @@ static void gc_finalize(lua_State *L) } return; } -#endif /* Add userdata back to the main userdata list and make it white. */ setgcrefr(o->gch.nextgc, mainthread(g)->nextgc); setgcref(mainthread(g)->nextgc, o); @@ -528,7 +503,6 @@ void lj_gc_finalize_udata(lua_State *L) gc_finalize(L); } -#if LJ_HASFFI /* Finalize all cdata objects from finalizer table. */ void lj_gc_finalize_cdata(lua_State *L) { @@ -551,7 +525,6 @@ void lj_gc_finalize_cdata(lua_State *L) } } } -#endif /* Free all remaining GC objects. */ void lj_gc_freeall(global_State *g) @@ -642,9 +615,7 @@ static size_t gc_onestep(lua_State *L) lj_str_resize(L, g->strmask >> 1); /* Shrink string table. */ if (gcref(g->gc.mmudata)) { /* Need any finalizations? */ g->gc.state = GCSfinalize; -#if LJ_HASFFI g->gc.nocdatafin = 1; -#endif } else { /* Otherwise skip this phase to help the JIT. */ g->gc.state = GCSpause; /* End of GC cycle. */ g->gc.debt = 0; @@ -661,9 +632,7 @@ static size_t gc_onestep(lua_State *L) g->gc.estimate -= GCFINALIZECOST; return GCFINALIZECOST; } -#if LJ_HASFFI if (!g->gc.nocdatafin) lj_tab_rehash(L, ctype_ctsG(g)->finalizer); -#endif g->gc.state = GCSpause; /* End of GC cycle. */ g->gc.debt = 0; return 0; @@ -712,7 +681,6 @@ void LJ_FASTCALL lj_gc_step_fixtop(lua_State *L) lj_gc_step(L); } -#if LJ_HASJIT /* Perform multiple GC steps. Called from JIT-compiled code. */ int LJ_FASTCALL lj_gc_step_jit(global_State *g, MSize steps) { @@ -724,7 +692,6 @@ int LJ_FASTCALL lj_gc_step_jit(global_State *g, MSize steps) /* Return 1 to force a trace exit. */ return (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize); } -#endif /* Perform a full GC cycle. */ void lj_gc_fullgc(lua_State *L) @@ -799,14 +766,12 @@ void lj_gc_closeuv(global_State *g, GCupval *uv) } } -#if LJ_HASJIT /* Mark a trace if it's saved during the propagation phase. */ void lj_gc_barriertrace(global_State *g, uint32_t traceno) { if (g->gc.state == GCSpropagate || g->gc.state == GCSatomic) gc_marktrace(g, traceno); } -#endif /* -- Allocator ----------------------------------------------------------- */ diff --git a/src/lj_gc.h b/src/lj_gc.h index 669bbe9240..40e7415945 100644 --- a/src/lj_gc.h +++ b/src/lj_gc.h @@ -48,17 +48,11 @@ enum { /* Collector. */ LJ_FUNC size_t lj_gc_separateudata(global_State *g, int all); LJ_FUNC void lj_gc_finalize_udata(lua_State *L); -#if LJ_HASFFI LJ_FUNC void lj_gc_finalize_cdata(lua_State *L); -#else -#define lj_gc_finalize_cdata(L) UNUSED(L) -#endif LJ_FUNC void lj_gc_freeall(global_State *g); LJ_FUNCA int LJ_FASTCALL lj_gc_step(lua_State *L); LJ_FUNCA void LJ_FASTCALL lj_gc_step_fixtop(lua_State *L); -#if LJ_HASJIT LJ_FUNC int LJ_FASTCALL lj_gc_step_jit(global_State *g, MSize steps); -#endif LJ_FUNC void lj_gc_fullgc(lua_State *L); /* GC check: drive collector forward if the GC threshold has been reached. */ @@ -73,9 +67,7 @@ LJ_FUNC void lj_gc_fullgc(lua_State *L); LJ_FUNC void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v); LJ_FUNCA void LJ_FASTCALL lj_gc_barrieruv(global_State *g, TValue *tv); LJ_FUNC void lj_gc_closeuv(global_State *g, GCupval *uv); -#if LJ_HASJIT LJ_FUNC void lj_gc_barriertrace(global_State *g, uint32_t traceno); -#endif /* Move the GC propagation frontier back for tables (make it gray again). */ static LJ_AINLINE void lj_gc_barrierback(global_State *g, GCtab *t) diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c index c219ffac0a..246fa69d78 100644 --- a/src/lj_gdbjit.c +++ b/src/lj_gdbjit.c @@ -8,7 +8,6 @@ #include "lj_obj.h" -#if LJ_HASJIT #include "lj_gc.h" #include "lj_err.h" @@ -22,796 +21,3 @@ /* This is not compiled in by default. ** Enable with -DLUAJIT_USE_GDBJIT in the Makefile and recompile everything. */ -#ifdef LUAJIT_USE_GDBJIT - -/* The GDB JIT API allows JIT compilers to pass debug information about -** JIT-compiled code back to GDB. You need at least GDB 7.0 or higher -** to see it in action. -** -** This is a passive API, so it works even when not running under GDB -** or when attaching to an already running process. Alas, this implies -** enabling it always has a non-negligible overhead -- do not use in -** release mode! -** -** The LuaJIT GDB JIT client is rather minimal at the moment. It gives -** each trace a symbol name and adds a source location and frame unwind -** information. Obviously LuaJIT itself and any embedding C application -** should be compiled with debug symbols, too (see the Makefile). -** -** Traces are named TRACE_1, TRACE_2, ... these correspond to the trace -** numbers from -jv or -jdump. Use "break TRACE_1" or "tbreak TRACE_1" etc. -** to set breakpoints on specific traces (even ahead of their creation). -** -** The source location for each trace allows listing the corresponding -** source lines with the GDB command "list" (but only if the Lua source -** has been loaded from a file). Currently this is always set to the -** location where the trace has been started. -** -** Frame unwind information can be inspected with the GDB command -** "info frame". This also allows proper backtraces across JIT-compiled -** code with the GDB command "bt". -** -** You probably want to add the following settings to a .gdbinit file -** (or add them to ~/.gdbinit): -** set disassembly-flavor intel -** set breakpoint pending on -** -** Here's a sample GDB session: -** ------------------------------------------------------------------------ - -$ cat >x.lua -for outer=1,100 do - for inner=1,100 do end -end -^D - -$ luajit -jv x.lua -[TRACE 1 x.lua:2] -[TRACE 2 (1/3) x.lua:1 -> 1] - -$ gdb --quiet --args luajit x.lua -(gdb) tbreak TRACE_1 -Function "TRACE_1" not defined. -Temporary breakpoint 1 (TRACE_1) pending. -(gdb) run -Starting program: luajit x.lua - -Temporary breakpoint 1, TRACE_1 () at x.lua:2 -2 for inner=1,100 do end -(gdb) list -1 for outer=1,100 do -2 for inner=1,100 do end -3 end -(gdb) bt -#0 TRACE_1 () at x.lua:2 -#1 0x08053690 in lua_pcall [...] -[...] -#7 0x0806ff90 in main [...] -(gdb) disass TRACE_1 -Dump of assembler code for function TRACE_1: -0xf7fd9fba : mov DWORD PTR ds:0xf7e0e2a0,0x1 -0xf7fd9fc4 : movsd xmm7,QWORD PTR [edx+0x20] -[...] -0xf7fd9ff8 : jmp 0xf7fd2014 -End of assembler dump. -(gdb) tbreak TRACE_2 -Function "TRACE_2" not defined. -Temporary breakpoint 2 (TRACE_2) pending. -(gdb) cont -Continuing. - -Temporary breakpoint 2, TRACE_2 () at x.lua:1 -1 for outer=1,100 do -(gdb) info frame -Stack level 0, frame at 0xffffd7c0: - eip = 0xf7fd9f60 in TRACE_2 (x.lua:1); saved eip 0x8053690 - called by frame at 0xffffd7e0 - source language unknown. - Arglist at 0xffffd78c, args: - Locals at 0xffffd78c, Previous frame's sp is 0xffffd7c0 - Saved registers: - ebx at 0xffffd7ac, ebp at 0xffffd7b8, esi at 0xffffd7b0, edi at 0xffffd7b4, - eip at 0xffffd7bc -(gdb) - -** ------------------------------------------------------------------------ -*/ - -/* -- GDB JIT API --------------------------------------------------------- */ - -/* GDB JIT actions. */ -enum { - GDBJIT_NOACTION = 0, - GDBJIT_REGISTER, - GDBJIT_UNREGISTER -}; - -/* GDB JIT entry. */ -typedef struct GDBJITentry { - struct GDBJITentry *next_entry; - struct GDBJITentry *prev_entry; - const char *symfile_addr; - uint64_t symfile_size; -} GDBJITentry; - -/* GDB JIT descriptor. */ -typedef struct GDBJITdesc { - uint32_t version; - uint32_t action_flag; - GDBJITentry *relevant_entry; - GDBJITentry *first_entry; -} GDBJITdesc; - -GDBJITdesc __jit_debug_descriptor = { - 1, GDBJIT_NOACTION, NULL, NULL -}; - -/* GDB sets a breakpoint at this function. */ -void LJ_NOINLINE __jit_debug_register_code() -{ - __asm__ __volatile__(""); -}; - -/* -- In-memory ELF object definitions ------------------------------------ */ - -/* ELF definitions. */ -typedef struct ELFheader { - uint8_t emagic[4]; - uint8_t eclass; - uint8_t eendian; - uint8_t eversion; - uint8_t eosabi; - uint8_t eabiversion; - uint8_t epad[7]; - uint16_t type; - uint16_t machine; - uint32_t version; - uintptr_t entry; - uintptr_t phofs; - uintptr_t shofs; - uint32_t flags; - uint16_t ehsize; - uint16_t phentsize; - uint16_t phnum; - uint16_t shentsize; - uint16_t shnum; - uint16_t shstridx; -} ELFheader; - -typedef struct ELFsectheader { - uint32_t name; - uint32_t type; - uintptr_t flags; - uintptr_t addr; - uintptr_t ofs; - uintptr_t size; - uint32_t link; - uint32_t info; - uintptr_t align; - uintptr_t entsize; -} ELFsectheader; - -#define ELFSECT_IDX_ABS 0xfff1 - -enum { - ELFSECT_TYPE_PROGBITS = 1, - ELFSECT_TYPE_SYMTAB = 2, - ELFSECT_TYPE_STRTAB = 3, - ELFSECT_TYPE_NOBITS = 8 -}; - -#define ELFSECT_FLAGS_WRITE 1 -#define ELFSECT_FLAGS_ALLOC 2 -#define ELFSECT_FLAGS_EXEC 4 - -typedef struct ELFsymbol { -#if LJ_64 - uint32_t name; - uint8_t info; - uint8_t other; - uint16_t sectidx; - uintptr_t value; - uint64_t size; -#else - uint32_t name; - uintptr_t value; - uint32_t size; - uint8_t info; - uint8_t other; - uint16_t sectidx; -#endif -} ELFsymbol; - -enum { - ELFSYM_TYPE_FUNC = 2, - ELFSYM_TYPE_FILE = 4, - ELFSYM_BIND_LOCAL = 0 << 4, - ELFSYM_BIND_GLOBAL = 1 << 4, -}; - -/* DWARF definitions. */ -#define DW_CIE_VERSION 1 - -enum { - DW_CFA_nop = 0x0, - DW_CFA_offset_extended = 0x5, - DW_CFA_def_cfa = 0xc, - DW_CFA_def_cfa_offset = 0xe, - DW_CFA_offset_extended_sf = 0x11, - DW_CFA_advance_loc = 0x40, - DW_CFA_offset = 0x80 -}; - -enum { - DW_EH_PE_udata4 = 3, - DW_EH_PE_textrel = 0x20 -}; - -enum { - DW_TAG_compile_unit = 0x11 -}; - -enum { - DW_children_no = 0, - DW_children_yes = 1 -}; - -enum { - DW_AT_name = 0x03, - DW_AT_stmt_list = 0x10, - DW_AT_low_pc = 0x11, - DW_AT_high_pc = 0x12 -}; - -enum { - DW_FORM_addr = 0x01, - DW_FORM_data4 = 0x06, - DW_FORM_string = 0x08 -}; - -enum { - DW_LNS_extended_op = 0, - DW_LNS_copy = 1, - DW_LNS_advance_pc = 2, - DW_LNS_advance_line = 3 -}; - -enum { - DW_LNE_end_sequence = 1, - DW_LNE_set_address = 2 -}; - -enum { -#if LJ_TARGET_X86 - DW_REG_AX, DW_REG_CX, DW_REG_DX, DW_REG_BX, - DW_REG_SP, DW_REG_BP, DW_REG_SI, DW_REG_DI, - DW_REG_RA, -#elif LJ_TARGET_X64 - /* Yes, the order is strange, but correct. */ - DW_REG_AX, DW_REG_DX, DW_REG_CX, DW_REG_BX, - DW_REG_SI, DW_REG_DI, DW_REG_BP, DW_REG_SP, - DW_REG_8, DW_REG_9, DW_REG_10, DW_REG_11, - DW_REG_12, DW_REG_13, DW_REG_14, DW_REG_15, - DW_REG_RA, -#elif LJ_TARGET_ARM - DW_REG_SP = 13, - DW_REG_RA = 14, -#elif LJ_TARGET_ARM64 - DW_REG_SP = 31, - DW_REG_RA = 30, -#elif LJ_TARGET_PPC - DW_REG_SP = 1, - DW_REG_RA = 65, - DW_REG_CR = 70, -#elif LJ_TARGET_MIPS - DW_REG_SP = 29, - DW_REG_RA = 31, -#else -#error "Unsupported target architecture" -#endif -}; - -/* Minimal list of sections for the in-memory ELF object. */ -enum { - GDBJIT_SECT_NULL, - GDBJIT_SECT_text, - GDBJIT_SECT_eh_frame, - GDBJIT_SECT_shstrtab, - GDBJIT_SECT_strtab, - GDBJIT_SECT_symtab, - GDBJIT_SECT_debug_info, - GDBJIT_SECT_debug_abbrev, - GDBJIT_SECT_debug_line, - GDBJIT_SECT__MAX -}; - -enum { - GDBJIT_SYM_UNDEF, - GDBJIT_SYM_FILE, - GDBJIT_SYM_FUNC, - GDBJIT_SYM__MAX -}; - -/* In-memory ELF object. */ -typedef struct GDBJITobj { - ELFheader hdr; /* ELF header. */ - ELFsectheader sect[GDBJIT_SECT__MAX]; /* ELF sections. */ - ELFsymbol sym[GDBJIT_SYM__MAX]; /* ELF symbol table. */ - uint8_t space[4096]; /* Space for various section data. */ -} GDBJITobj; - -/* Combined structure for GDB JIT entry and ELF object. */ -typedef struct GDBJITentryobj { - GDBJITentry entry; - size_t sz; - GDBJITobj obj; -} GDBJITentryobj; - -/* Template for in-memory ELF header. */ -static const ELFheader elfhdr_template = { - .emagic = { 0x7f, 'E', 'L', 'F' }, - .eclass = LJ_64 ? 2 : 1, - .eendian = LJ_ENDIAN_SELECT(1, 2), - .eversion = 1, -#if LJ_TARGET_LINUX - .eosabi = 0, /* Nope, it's not 3. */ -#elif defined(__FreeBSD__) - .eosabi = 9, -#elif defined(__NetBSD__) - .eosabi = 2, -#elif defined(__OpenBSD__) - .eosabi = 12, -#elif defined(__DragonFly__) - .eosabi = 0, -#elif (defined(__sun__) && defined(__svr4__)) - .eosabi = 6, -#else - .eosabi = 0, -#endif - .eabiversion = 0, - .epad = { 0, 0, 0, 0, 0, 0, 0 }, - .type = 1, -#if LJ_TARGET_X86 - .machine = 3, -#elif LJ_TARGET_X64 - .machine = 62, -#elif LJ_TARGET_ARM - .machine = 40, -#elif LJ_TARGET_ARM64 - .machine = 183, -#elif LJ_TARGET_PPC - .machine = 20, -#elif LJ_TARGET_MIPS - .machine = 8, -#else -#error "Unsupported target architecture" -#endif - .version = 1, - .entry = 0, - .phofs = 0, - .shofs = offsetof(GDBJITobj, sect), - .flags = 0, - .ehsize = sizeof(ELFheader), - .phentsize = 0, - .phnum = 0, - .shentsize = sizeof(ELFsectheader), - .shnum = GDBJIT_SECT__MAX, - .shstridx = GDBJIT_SECT_shstrtab -}; - -/* -- In-memory ELF object generation ------------------------------------- */ - -/* Context for generating the ELF object for the GDB JIT API. */ -typedef struct GDBJITctx { - uint8_t *p; /* Pointer to next address in obj.space. */ - uint8_t *startp; /* Pointer to start address in obj.space. */ - GCtrace *T; /* Generate symbols for this trace. */ - uintptr_t mcaddr; /* Machine code address. */ - MSize szmcode; /* Size of machine code. */ - MSize spadjp; /* Stack adjustment for parent trace or interpreter. */ - MSize spadj; /* Stack adjustment for trace itself. */ - BCLine lineno; /* Starting line number. */ - const char *filename; /* Starting file name. */ - size_t objsize; /* Final size of ELF object. */ - GDBJITobj obj; /* In-memory ELF object. */ -} GDBJITctx; - -/* Add a zero-terminated string. */ -static uint32_t gdbjit_strz(GDBJITctx *ctx, const char *str) -{ - uint8_t *p = ctx->p; - uint32_t ofs = (uint32_t)(p - ctx->startp); - do { - *p++ = (uint8_t)*str; - } while (*str++); - ctx->p = p; - return ofs; -} - -/* Append a decimal number. */ -static void gdbjit_catnum(GDBJITctx *ctx, uint32_t n) -{ - if (n >= 10) { uint32_t m = n / 10; n = n % 10; gdbjit_catnum(ctx, m); } - *ctx->p++ = '0' + n; -} - -/* Add a SLEB128 value. */ -static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v) -{ - uint8_t *p = ctx->p; - for (; (uint32_t)(v+0x40) >= 0x80; v >>= 7) - *p++ = (uint8_t)((v & 0x7f) | 0x80); - *p++ = (uint8_t)(v & 0x7f); - ctx->p = p; -} - -/* Shortcuts to generate DWARF structures. */ -#define DB(x) (*p++ = (x)) -#define DI8(x) (*(int8_t *)p = (x), p++) -#define DU16(x) (*(uint16_t *)p = (x), p += 2) -#define DU32(x) (*(uint32_t *)p = (x), p += 4) -#define DADDR(x) (*(uintptr_t *)p = (x), p += sizeof(uintptr_t)) -#define DUV(x) (p = (uint8_t *)lj_strfmt_wuleb128((char *)p, (x))) -#define DSV(x) (ctx->p = p, gdbjit_sleb128(ctx, (x)), p = ctx->p) -#define DSTR(str) (ctx->p = p, gdbjit_strz(ctx, (str)), p = ctx->p) -#define DALIGNNOP(s) while ((uintptr_t)p & ((s)-1)) *p++ = DW_CFA_nop -#define DSECT(name, stmt) \ - { uint32_t *szp_##name = (uint32_t *)p; p += 4; stmt \ - *szp_##name = (uint32_t)((p-(uint8_t *)szp_##name)-4); } \ - -/* Initialize ELF section headers. */ -static void LJ_FASTCALL gdbjit_secthdr(GDBJITctx *ctx) -{ - ELFsectheader *sect; - - *ctx->p++ = '\0'; /* Empty string at start of string table. */ - -#define SECTDEF(id, tp, al) \ - sect = &ctx->obj.sect[GDBJIT_SECT_##id]; \ - sect->name = gdbjit_strz(ctx, "." #id); \ - sect->type = ELFSECT_TYPE_##tp; \ - sect->align = (al) - - SECTDEF(text, NOBITS, 16); - sect->flags = ELFSECT_FLAGS_ALLOC|ELFSECT_FLAGS_EXEC; - sect->addr = ctx->mcaddr; - sect->ofs = 0; - sect->size = ctx->szmcode; - - SECTDEF(eh_frame, PROGBITS, sizeof(uintptr_t)); - sect->flags = ELFSECT_FLAGS_ALLOC; - - SECTDEF(shstrtab, STRTAB, 1); - SECTDEF(strtab, STRTAB, 1); - - SECTDEF(symtab, SYMTAB, sizeof(uintptr_t)); - sect->ofs = offsetof(GDBJITobj, sym); - sect->size = sizeof(ctx->obj.sym); - sect->link = GDBJIT_SECT_strtab; - sect->entsize = sizeof(ELFsymbol); - sect->info = GDBJIT_SYM_FUNC; - - SECTDEF(debug_info, PROGBITS, 1); - SECTDEF(debug_abbrev, PROGBITS, 1); - SECTDEF(debug_line, PROGBITS, 1); - -#undef SECTDEF -} - -/* Initialize symbol table. */ -static void LJ_FASTCALL gdbjit_symtab(GDBJITctx *ctx) -{ - ELFsymbol *sym; - - *ctx->p++ = '\0'; /* Empty string at start of string table. */ - - sym = &ctx->obj.sym[GDBJIT_SYM_FILE]; - sym->name = gdbjit_strz(ctx, "JIT mcode"); - sym->sectidx = ELFSECT_IDX_ABS; - sym->info = ELFSYM_TYPE_FILE|ELFSYM_BIND_LOCAL; - - sym = &ctx->obj.sym[GDBJIT_SYM_FUNC]; - sym->name = gdbjit_strz(ctx, "TRACE_"); ctx->p--; - gdbjit_catnum(ctx, ctx->T->traceno); *ctx->p++ = '\0'; - sym->sectidx = GDBJIT_SECT_text; - sym->value = 0; - sym->size = ctx->szmcode; - sym->info = ELFSYM_TYPE_FUNC|ELFSYM_BIND_GLOBAL; -} - -/* Initialize .eh_frame section. */ -static void LJ_FASTCALL gdbjit_ehframe(GDBJITctx *ctx) -{ - uint8_t *p = ctx->p; - uint8_t *framep = p; - - /* Emit DWARF EH CIE. */ - DSECT(CIE, - DU32(0); /* Offset to CIE itself. */ - DB(DW_CIE_VERSION); - DSTR("zR"); /* Augmentation. */ - DUV(1); /* Code alignment factor. */ - DSV(-(int32_t)sizeof(uintptr_t)); /* Data alignment factor. */ - DB(DW_REG_RA); /* Return address register. */ - DB(1); DB(DW_EH_PE_textrel|DW_EH_PE_udata4); /* Augmentation data. */ - DB(DW_CFA_def_cfa); DUV(DW_REG_SP); DUV(sizeof(uintptr_t)); -#if LJ_TARGET_PPC - DB(DW_CFA_offset_extended_sf); DB(DW_REG_RA); DSV(-1); -#else - DB(DW_CFA_offset|DW_REG_RA); DUV(1); -#endif - DALIGNNOP(sizeof(uintptr_t)); - ) - - /* Emit DWARF EH FDE. */ - DSECT(FDE, - DU32((uint32_t)(p-framep)); /* Offset to CIE. */ - DU32(0); /* Machine code offset relative to .text. */ - DU32(ctx->szmcode); /* Machine code length. */ - DB(0); /* Augmentation data. */ - /* Registers saved in CFRAME. */ -#if LJ_TARGET_X86 - DB(DW_CFA_offset|DW_REG_BP); DUV(2); - DB(DW_CFA_offset|DW_REG_DI); DUV(3); - DB(DW_CFA_offset|DW_REG_SI); DUV(4); - DB(DW_CFA_offset|DW_REG_BX); DUV(5); -#elif LJ_TARGET_X64 - DB(DW_CFA_offset|DW_REG_BP); DUV(2); - DB(DW_CFA_offset|DW_REG_BX); DUV(3); - DB(DW_CFA_offset|DW_REG_15); DUV(4); - DB(DW_CFA_offset|DW_REG_14); DUV(5); - /* Extra registers saved for JIT-compiled code. */ - DB(DW_CFA_offset|DW_REG_13); DUV(LJ_GC64 ? 10 : 9); - DB(DW_CFA_offset|DW_REG_12); DUV(LJ_GC64 ? 11 : 10); -#elif LJ_TARGET_ARM - { - int i; - for (i = 11; i >= 4; i--) { DB(DW_CFA_offset|i); DUV(2+(11-i)); } - } -#elif LJ_TARGET_ARM64 - { - int i; - DB(DW_CFA_offset|31); DUV(2); - for (i = 28; i >= 19; i--) { DB(DW_CFA_offset|i); DUV(3+(28-i)); } - for (i = 15; i >= 8; i--) { DB(DW_CFA_offset|32|i); DUV(28-i); } - } -#elif LJ_TARGET_PPC - { - int i; - DB(DW_CFA_offset_extended); DB(DW_REG_CR); DUV(55); - for (i = 14; i <= 31; i++) { - DB(DW_CFA_offset|i); DUV(37+(31-i)); - DB(DW_CFA_offset|32|i); DUV(2+2*(31-i)); - } - } -#elif LJ_TARGET_MIPS - { - int i; - DB(DW_CFA_offset|30); DUV(2); - for (i = 23; i >= 16; i--) { DB(DW_CFA_offset|i); DUV(26-i); } - for (i = 30; i >= 20; i -= 2) { DB(DW_CFA_offset|32|i); DUV(42-i); } - } -#else -#error "Unsupported target architecture" -#endif - if (ctx->spadjp != ctx->spadj) { /* Parent/interpreter stack frame size. */ - DB(DW_CFA_def_cfa_offset); DUV(ctx->spadjp); - DB(DW_CFA_advance_loc|1); /* Only an approximation. */ - } - DB(DW_CFA_def_cfa_offset); DUV(ctx->spadj); /* Trace stack frame size. */ - DALIGNNOP(sizeof(uintptr_t)); - ) - - ctx->p = p; -} - -/* Initialize .debug_info section. */ -static void LJ_FASTCALL gdbjit_debuginfo(GDBJITctx *ctx) -{ - uint8_t *p = ctx->p; - - DSECT(info, - DU16(2); /* DWARF version. */ - DU32(0); /* Abbrev offset. */ - DB(sizeof(uintptr_t)); /* Pointer size. */ - - DUV(1); /* Abbrev #1: DW_TAG_compile_unit. */ - DSTR(ctx->filename); /* DW_AT_name. */ - DADDR(ctx->mcaddr); /* DW_AT_low_pc. */ - DADDR(ctx->mcaddr + ctx->szmcode); /* DW_AT_high_pc. */ - DU32(0); /* DW_AT_stmt_list. */ - ) - - ctx->p = p; -} - -/* Initialize .debug_abbrev section. */ -static void LJ_FASTCALL gdbjit_debugabbrev(GDBJITctx *ctx) -{ - uint8_t *p = ctx->p; - - /* Abbrev #1: DW_TAG_compile_unit. */ - DUV(1); DUV(DW_TAG_compile_unit); - DB(DW_children_no); - DUV(DW_AT_name); DUV(DW_FORM_string); - DUV(DW_AT_low_pc); DUV(DW_FORM_addr); - DUV(DW_AT_high_pc); DUV(DW_FORM_addr); - DUV(DW_AT_stmt_list); DUV(DW_FORM_data4); - DB(0); DB(0); - - ctx->p = p; -} - -#define DLNE(op, s) (DB(DW_LNS_extended_op), DUV(1+(s)), DB((op))) - -/* Initialize .debug_line section. */ -static void LJ_FASTCALL gdbjit_debugline(GDBJITctx *ctx) -{ - uint8_t *p = ctx->p; - - DSECT(line, - DU16(2); /* DWARF version. */ - DSECT(header, - DB(1); /* Minimum instruction length. */ - DB(1); /* is_stmt. */ - DI8(0); /* Line base for special opcodes. */ - DB(2); /* Line range for special opcodes. */ - DB(3+1); /* Opcode base at DW_LNS_advance_line+1. */ - DB(0); DB(1); DB(1); /* Standard opcode lengths. */ - /* Directory table. */ - DB(0); - /* File name table. */ - DSTR(ctx->filename); DUV(0); DUV(0); DUV(0); - DB(0); - ) - - DLNE(DW_LNE_set_address, sizeof(uintptr_t)); DADDR(ctx->mcaddr); - if (ctx->lineno) { - DB(DW_LNS_advance_line); DSV(ctx->lineno-1); - } - DB(DW_LNS_copy); - DB(DW_LNS_advance_pc); DUV(ctx->szmcode); - DLNE(DW_LNE_end_sequence, 0); - ) - - ctx->p = p; -} - -#undef DLNE - -/* Undef shortcuts. */ -#undef DB -#undef DI8 -#undef DU16 -#undef DU32 -#undef DADDR -#undef DUV -#undef DSV -#undef DSTR -#undef DALIGNNOP -#undef DSECT - -/* Type of a section initializer callback. */ -typedef void (LJ_FASTCALL *GDBJITinitf)(GDBJITctx *ctx); - -/* Call section initializer and set the section offset and size. */ -static void gdbjit_initsect(GDBJITctx *ctx, int sect, GDBJITinitf initf) -{ - ctx->startp = ctx->p; - ctx->obj.sect[sect].ofs = (uintptr_t)((char *)ctx->p - (char *)&ctx->obj); - initf(ctx); - ctx->obj.sect[sect].size = (uintptr_t)(ctx->p - ctx->startp); -} - -#define SECTALIGN(p, a) \ - ((p) = (uint8_t *)(((uintptr_t)(p) + ((a)-1)) & ~(uintptr_t)((a)-1))) - -/* Build in-memory ELF object. */ -static void gdbjit_buildobj(GDBJITctx *ctx) -{ - GDBJITobj *obj = &ctx->obj; - /* Fill in ELF header and clear structures. */ - memcpy(&obj->hdr, &elfhdr_template, sizeof(ELFheader)); - memset(&obj->sect, 0, sizeof(ELFsectheader)*GDBJIT_SECT__MAX); - memset(&obj->sym, 0, sizeof(ELFsymbol)*GDBJIT_SYM__MAX); - /* Initialize sections. */ - ctx->p = obj->space; - gdbjit_initsect(ctx, GDBJIT_SECT_shstrtab, gdbjit_secthdr); - gdbjit_initsect(ctx, GDBJIT_SECT_strtab, gdbjit_symtab); - gdbjit_initsect(ctx, GDBJIT_SECT_debug_info, gdbjit_debuginfo); - gdbjit_initsect(ctx, GDBJIT_SECT_debug_abbrev, gdbjit_debugabbrev); - gdbjit_initsect(ctx, GDBJIT_SECT_debug_line, gdbjit_debugline); - SECTALIGN(ctx->p, sizeof(uintptr_t)); - gdbjit_initsect(ctx, GDBJIT_SECT_eh_frame, gdbjit_ehframe); - ctx->objsize = (size_t)((char *)ctx->p - (char *)obj); - lua_assert(ctx->objsize < sizeof(GDBJITobj)); -} - -#undef SECTALIGN - -/* -- Interface to GDB JIT API -------------------------------------------- */ - -static int gdbjit_lock; - -static void gdbjit_lock_acquire() -{ - while (__sync_lock_test_and_set(&gdbjit_lock, 1)) { - /* Just spin; futexes or pthreads aren't worth the portability cost. */ - } -} - -static void gdbjit_lock_release() -{ - __sync_lock_release(&gdbjit_lock); -} - -/* Add new entry to GDB JIT symbol chain. */ -static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx) -{ - /* Allocate memory for GDB JIT entry and ELF object. */ - MSize sz = (MSize)(sizeof(GDBJITentryobj) - sizeof(GDBJITobj) + ctx->objsize); - GDBJITentryobj *eo = lj_mem_newt(L, sz, GDBJITentryobj); - memcpy(&eo->obj, &ctx->obj, ctx->objsize); /* Copy ELF object. */ - eo->sz = sz; - ctx->T->gdbjit_entry = (void *)eo; - /* Link new entry to chain and register it. */ - eo->entry.prev_entry = NULL; - gdbjit_lock_acquire(); - eo->entry.next_entry = __jit_debug_descriptor.first_entry; - if (eo->entry.next_entry) - eo->entry.next_entry->prev_entry = &eo->entry; - eo->entry.symfile_addr = (const char *)&eo->obj; - eo->entry.symfile_size = ctx->objsize; - __jit_debug_descriptor.first_entry = &eo->entry; - __jit_debug_descriptor.relevant_entry = &eo->entry; - __jit_debug_descriptor.action_flag = GDBJIT_REGISTER; - __jit_debug_register_code(); - gdbjit_lock_release(); -} - -/* Add debug info for newly compiled trace and notify GDB. */ -void lj_gdbjit_addtrace(jit_State *J, GCtrace *T) -{ - GDBJITctx ctx; - GCproto *pt = &gcref(T->startpt)->pt; - TraceNo parent = T->ir[REF_BASE].op1; - const BCIns *startpc = mref(T->startpc, const BCIns); - ctx.T = T; - ctx.mcaddr = (uintptr_t)T->mcode; - ctx.szmcode = T->szmcode; - ctx.spadjp = CFRAME_SIZE_JIT + - (MSize)(parent ? traceref(J, parent)->spadjust : 0); - ctx.spadj = CFRAME_SIZE_JIT + T->spadjust; - lua_assert(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc); - ctx.lineno = lj_debug_line(pt, proto_bcpos(pt, startpc)); - ctx.filename = proto_chunknamestr(pt); - if (*ctx.filename == '@' || *ctx.filename == '=') - ctx.filename++; - else - ctx.filename = "(string)"; - gdbjit_buildobj(&ctx); - gdbjit_newentry(J->L, &ctx); -} - -/* Delete debug info for trace and notify GDB. */ -void lj_gdbjit_deltrace(jit_State *J, GCtrace *T) -{ - GDBJITentryobj *eo = (GDBJITentryobj *)T->gdbjit_entry; - if (eo) { - gdbjit_lock_acquire(); - if (eo->entry.prev_entry) - eo->entry.prev_entry->next_entry = eo->entry.next_entry; - else - __jit_debug_descriptor.first_entry = eo->entry.next_entry; - if (eo->entry.next_entry) - eo->entry.next_entry->prev_entry = eo->entry.prev_entry; - __jit_debug_descriptor.relevant_entry = &eo->entry; - __jit_debug_descriptor.action_flag = GDBJIT_UNREGISTER; - __jit_debug_register_code(); - gdbjit_lock_release(); - lj_mem_free(J2G(J), eo, eo->sz); - } -} - -#endif -#endif diff --git a/src/lj_gdbjit.h b/src/lj_gdbjit.h index bbaa1568c4..d2c8eb8f6e 100644 --- a/src/lj_gdbjit.h +++ b/src/lj_gdbjit.h @@ -9,14 +9,7 @@ #include "lj_obj.h" #include "lj_jit.h" -#if LJ_HASJIT && defined(LUAJIT_USE_GDBJIT) - -LJ_FUNC void lj_gdbjit_addtrace(jit_State *J, GCtrace *T); -LJ_FUNC void lj_gdbjit_deltrace(jit_State *J, GCtrace *T); - -#else #define lj_gdbjit_addtrace(J, T) UNUSED(T) #define lj_gdbjit_deltrace(J, T) UNUSED(T) -#endif #endif diff --git a/src/lj_ir.c b/src/lj_ir.c index 5baece67e6..f512368601 100644 --- a/src/lj_ir.c +++ b/src/lj_ir.c @@ -12,7 +12,6 @@ #include "lj_obj.h" -#if LJ_HASJIT #include "lj_gc.h" #include "lj_buf.h" @@ -23,11 +22,9 @@ #include "lj_ircall.h" #include "lj_iropt.h" #include "lj_trace.h" -#if LJ_HASFFI #include "lj_ctype.h" #include "lj_cdata.h" #include "lj_carith.h" -#endif #include "lj_vm.h" #include "lj_strscan.h" #include "lj_strfmt.h" @@ -187,11 +184,7 @@ static LJ_AINLINE IRRef ir_nextk64(jit_State *J) return ref; } -#if LJ_GC64 #define ir_nextkgc ir_nextk64 -#else -#define ir_nextkgc ir_nextk -#endif /* Intern int32_t constant. */ TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k) @@ -312,17 +305,10 @@ TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr) { IRIns *ir, *cir = J->cur.ir; IRRef ref; -#if LJ_64 && !LJ_GC64 - lua_assert((void *)(uintptr_t)u32ptr(ptr) == ptr); -#endif for (ref = J->chain[op]; ref; ref = cir[ref].prev) if (ir_kptr(&cir[ref]) == ptr) goto found; -#if LJ_GC64 ref = ir_nextk64(J); -#else - ref = ir_nextk(J); -#endif ir = IR(ref); ir->op12 = 0; setmref(ir[LJ_GC64].ptr, ptr); @@ -389,14 +375,12 @@ void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir) case IR_KPTR: case IR_KKPTR: setlightudV(tv, ir_kptr(ir)); break; case IR_KNULL: setlightudV(tv, NULL); break; case IR_KNUM: setnumV(tv, ir_knum(ir)->n); break; -#if LJ_HASFFI case IR_KINT64: { GCcdata *cd = lj_cdata_new_(L, CTID_INT64, 8); *(uint64_t *)cdataptr(cd) = ir_kint64(ir)->u64; setcdataV(L, tv, cd); break; } -#endif default: lua_assert(0); break; } } @@ -491,4 +475,3 @@ void lj_ir_rollback(jit_State *J, IRRef ref) #undef fins #undef emitir -#endif diff --git a/src/lj_ir.h b/src/lj_ir.h index 34c2785394..bcdb28c8f2 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h @@ -376,18 +376,10 @@ typedef struct IRType1 { uint8_t irt; } IRType1; #define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA)) #define irt_isint64(t) (irt_typerange((t), IRT_I64, IRT_U64)) -#if LJ_GC64 #define IRT_IS64 \ ((1u<> irt_type(t)) & 1) #define irt_is64orfp(t) (((IRT_IS64|(1u<>irt_type(t)) & 1) @@ -402,10 +394,6 @@ static LJ_AINLINE IRType itype2irt(const TValue *tv) return IRT_INT; else if (tvisnum(tv)) return IRT_NUM; -#if LJ_64 && !LJ_GC64 - else if (tvislightud(tv)) - return IRT_LIGHTUD; -#endif else return (IRType)~itype(tv); } diff --git a/src/lj_ircall.h b/src/lj_ircall.h index 973c36e6ec..2f95b06645 100644 --- a/src/lj_ircall.h +++ b/src/lj_ircall.h @@ -51,38 +51,17 @@ typedef struct CCallInfo { #define CCI_XARGS(ci) (((ci)->flags >> CCI_XARGS_SHIFT) & 3) #define CCI_XA (1u << CCI_XARGS_SHIFT) -#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) -#define CCI_XNARGS(ci) (CCI_NARGS((ci)) + CCI_XARGS((ci))) -#else #define CCI_XNARGS(ci) CCI_NARGS((ci)) -#endif /* Helpers for conditional function definitions. */ #define IRCALLCOND_ANY(x) x -#if LJ_TARGET_X86ORX64 #define IRCALLCOND_FPMATH(x) NULL -#else -#define IRCALLCOND_FPMATH(x) x -#endif -#if LJ_SOFTFP -#define IRCALLCOND_SOFTFP(x) x -#if LJ_HASFFI -#define IRCALLCOND_SOFTFP_FFI(x) x -#else -#define IRCALLCOND_SOFTFP_FFI(x) NULL -#endif -#else #define IRCALLCOND_SOFTFP(x) NULL #define IRCALLCOND_SOFTFP_FFI(x) NULL -#endif -#if LJ_SOFTFP && LJ_TARGET_MIPS32 -#define IRCALLCOND_SOFTFP_MIPS(x) x -#else #define IRCALLCOND_SOFTFP_MIPS(x) NULL -#endif #define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS32) @@ -92,33 +71,14 @@ typedef struct CCallInfo { #define IRCALLCOND_FP64_FFI(x) NULL #endif -#if LJ_HASFFI #define IRCALLCOND_FFI(x) x -#if LJ_32 -#define IRCALLCOND_FFI32(x) x -#else #define IRCALLCOND_FFI32(x) NULL -#endif -#else -#define IRCALLCOND_FFI(x) NULL -#define IRCALLCOND_FFI32(x) NULL -#endif -#if LJ_SOFTFP -#define XA_FP CCI_XA -#define XA2_FP (CCI_XA+CCI_XA) -#else #define XA_FP 0 #define XA2_FP 0 -#endif -#if LJ_32 -#define XA_64 CCI_XA -#define XA2_64 (CCI_XA+CCI_XA) -#else #define XA_64 0 #define XA2_64 0 -#endif /* Function definitions for CALL* instructions. */ #define IRCALLDEF(_) \ @@ -240,79 +200,6 @@ LJ_FUNC TRef lj_ir_call(jit_State *J, IRCallID id, ...); LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1]; /* Soft-float declarations. */ -#if LJ_SOFTFP -#if LJ_TARGET_ARM -#define softfp_add __aeabi_dadd -#define softfp_sub __aeabi_dsub -#define softfp_mul __aeabi_dmul -#define softfp_div __aeabi_ddiv -#define softfp_cmp __aeabi_cdcmple -#define softfp_i2d __aeabi_i2d -#define softfp_d2i __aeabi_d2iz -#define softfp_ui2d __aeabi_ui2d -#define softfp_f2d __aeabi_f2d -#define softfp_d2ui __aeabi_d2uiz -#define softfp_d2f __aeabi_d2f -#define softfp_i2f __aeabi_i2f -#define softfp_ui2f __aeabi_ui2f -#define softfp_f2i __aeabi_f2iz -#define softfp_f2ui __aeabi_f2uiz -#define fp64_l2d __aeabi_l2d -#define fp64_ul2d __aeabi_ul2d -#define fp64_l2f __aeabi_l2f -#define fp64_ul2f __aeabi_ul2f -#if LJ_TARGET_IOS -#define fp64_d2l __fixdfdi -#define fp64_d2ul __fixunsdfdi -#define fp64_f2l __fixsfdi -#define fp64_f2ul __fixunssfdi -#else -#define fp64_d2l __aeabi_d2lz -#define fp64_d2ul __aeabi_d2ulz -#define fp64_f2l __aeabi_f2lz -#define fp64_f2ul __aeabi_f2ulz -#endif -#elif LJ_TARGET_MIPS -#define softfp_add __adddf3 -#define softfp_sub __subdf3 -#define softfp_mul __muldf3 -#define softfp_div __divdf3 -#define softfp_cmp __ledf2 -#define softfp_i2d __floatsidf -#define softfp_d2i __fixdfsi -#define softfp_ui2d __floatunsidf -#define softfp_f2d __extendsfdf2 -#define softfp_d2ui __fixunsdfsi -#define softfp_d2f __truncdfsf2 -#define softfp_i2f __floatsisf -#define softfp_ui2f __floatunsisf -#define softfp_f2i __fixsfsi -#define softfp_f2ui __fixunssfsi -#else -#error "Missing soft-float definitions for target architecture" -#endif -extern double softfp_add(double a, double b); -extern double softfp_sub(double a, double b); -extern double softfp_mul(double a, double b); -extern double softfp_div(double a, double b); -extern void softfp_cmp(double a, double b); -extern double softfp_i2d(int32_t a); -extern int32_t softfp_d2i(double a); -#if LJ_HASFFI -extern double softfp_ui2d(uint32_t a); -extern double softfp_f2d(float a); -extern uint32_t softfp_d2ui(double a); -extern float softfp_d2f(double a); -extern float softfp_i2f(int32_t a); -extern float softfp_ui2f(uint32_t a); -extern int32_t softfp_f2i(float a); -extern uint32_t softfp_f2ui(float a); -#endif -#if LJ_TARGET_MIPS -extern double lj_vm_sfmin(double a, double b); -extern double lj_vm_sfmax(double a, double b); -#endif -#endif #if LJ_HASFFI && LJ_NEED_FP64 && !(LJ_TARGET_ARM && LJ_SOFTFP) #ifdef __GNUC__ diff --git a/src/lj_iropt.h b/src/lj_iropt.h index 73aef0ef38..0684ab2a75 100644 --- a/src/lj_iropt.h +++ b/src/lj_iropt.h @@ -11,7 +11,6 @@ #include "lj_obj.h" #include "lj_jit.h" -#if LJ_HASJIT /* IR emitter. */ LJ_FUNC void LJ_FASTCALL lj_ir_growtop(jit_State *J); LJ_FUNC TRef LJ_FASTCALL lj_ir_emit(jit_State *J); @@ -50,11 +49,7 @@ LJ_FUNC TRef lj_ir_knull(jit_State *J, IRType t); LJ_FUNC TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot); LJ_FUNC TRef lj_ir_ktrace(jit_State *J); -#if LJ_64 #define lj_ir_kintp(J, k) lj_ir_kint64(J, (uint64_t)(k)) -#else -#define lj_ir_kintp(J, k) lj_ir_kint(J, (int32_t)(k)) -#endif static LJ_AINLINE TRef lj_ir_knum(jit_State *J, lua_Number n) { @@ -137,9 +132,7 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_convert(jit_State *J); LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_index(jit_State *J, TRef key); LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_toint(jit_State *J, TRef tr); LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_tobit(jit_State *J, TRef tr); -#if LJ_HASFFI LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef key); -#endif LJ_FUNC TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc, IROp op); LJ_FUNC TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc); @@ -150,13 +143,8 @@ LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase); /* Optimization passes. */ LJ_FUNC void lj_opt_dce(jit_State *J); LJ_FUNC int lj_opt_loop(jit_State *J); -#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) -LJ_FUNC void lj_opt_split(jit_State *J); -#else #define lj_opt_split(J) UNUSED(J) -#endif LJ_FUNC void lj_opt_sink(jit_State *J); -#endif #endif diff --git a/src/lj_jit.h b/src/lj_jit.h index 92054e3df6..b3408e9bb2 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -13,7 +13,6 @@ #define JIT_F_ON 0x00000001 /* CPU-specific JIT engine flags. */ -#if LJ_TARGET_X86ORX64 #define JIT_F_SSE2 0x00000010 #define JIT_F_SSE3 0x00000020 #define JIT_F_SSE4_1 0x00000040 @@ -24,41 +23,6 @@ /* Names for the CPU-specific flags. Must match the order above. */ #define JIT_F_CPU_FIRST JIT_F_SSE2 #define JIT_F_CPUSTRING "\4SSE2\4SSE3\6SSE4.1\3AMD\4ATOM\4BMI2" -#elif LJ_TARGET_ARM -#define JIT_F_ARMV6_ 0x00000010 -#define JIT_F_ARMV6T2_ 0x00000020 -#define JIT_F_ARMV7 0x00000040 -#define JIT_F_VFPV2 0x00000080 -#define JIT_F_VFPV3 0x00000100 - -#define JIT_F_ARMV6 (JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7) -#define JIT_F_ARMV6T2 (JIT_F_ARMV6T2_|JIT_F_ARMV7) -#define JIT_F_VFP (JIT_F_VFPV2|JIT_F_VFPV3) - -/* Names for the CPU-specific flags. Must match the order above. */ -#define JIT_F_CPU_FIRST JIT_F_ARMV6_ -#define JIT_F_CPUSTRING "\5ARMv6\7ARMv6T2\5ARMv7\5VFPv2\5VFPv3" -#elif LJ_TARGET_PPC -#define JIT_F_SQRT 0x00000010 -#define JIT_F_ROUND 0x00000020 - -/* Names for the CPU-specific flags. Must match the order above. */ -#define JIT_F_CPU_FIRST JIT_F_SQRT -#define JIT_F_CPUSTRING "\4SQRT\5ROUND" -#elif LJ_TARGET_MIPS -#define JIT_F_MIPSXXR2 0x00000010 - -/* Names for the CPU-specific flags. Must match the order above. */ -#define JIT_F_CPU_FIRST JIT_F_MIPSXXR2 -#if LJ_TARGET_MIPS32 -#define JIT_F_CPUSTRING "\010MIPS32R2" -#else -#define JIT_F_CPUSTRING "\010MIPS64R2" -#endif -#else -#define JIT_F_CPU_FIRST 0 -#define JIT_F_CPUSTRING "" -#endif /* Optimization flags. */ #define JIT_F_OPT_MASK 0x0fff0000 @@ -87,13 +51,8 @@ JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_SINK|JIT_F_OPT_FUSE) #define JIT_F_OPT_DEFAULT JIT_F_OPT_3 -#if LJ_TARGET_WINDOWS || LJ_64 /* See: http://blogs.msdn.com/oldnewthing/archive/2003/10/08/55239.aspx */ #define JIT_P_sizemcode_DEFAULT 64 -#else -/* Could go as low as 4K, but the mmap() overhead would be rather high. */ -#define JIT_P_sizemcode_DEFAULT 32 -#endif /* Optimization parameters and their defaults. Length is a char in octal! */ #define JIT_PARAMDEF(_) \ @@ -152,11 +111,7 @@ typedef enum { } PostProc; /* Machine code type. */ -#if LJ_TARGET_X86ORX64 typedef uint8_t MCode; -#else -typedef uint32_t MCode; -#endif /* Stack snapshot header. */ typedef struct SnapShot { @@ -183,9 +138,6 @@ LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT); #define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref)) #define SNAP_TR(slot, tr) \ (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK))) -#if !LJ_FR2 -#define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc)) -#endif #define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz)) #define snap_ref(sn) ((sn) & 0xffff) #define snap_slot(sn) ((BCReg)((sn) >> 24)) @@ -194,13 +146,9 @@ LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT); static LJ_AINLINE const BCIns *snap_pc(SnapEntry *sn) { -#if LJ_FR2 uint64_t pcbase; memcpy(&pcbase, sn, sizeof(uint64_t)); return (const BCIns *)(pcbase >> 8); -#else - return (const BCIns *)(uintptr_t)*sn; -#endif } /* Snapshot and exit numbers. */ @@ -230,9 +178,7 @@ typedef struct GCtrace { uint8_t topslot; /* Top stack slot already checked to be allocated. */ uint8_t linktype; /* Type of link. */ IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */ -#if LJ_GC64 uint32_t unused_gc64; -#endif GCRef gclist; IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */ IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */ @@ -255,9 +201,6 @@ typedef struct GCtrace { TraceNo1 nextside; /* Next side trace of same root trace. */ uint8_t sinktags; /* Trace has SINK tags. */ uint8_t unused1; -#ifdef LUAJIT_USE_GDBJIT - void *gdbjit_entry; /* GDB JIT entry. */ -#endif } GCtrace; #define gco2trace(o) check_exp((o)->gch.gct == ~LJ_TTRACE, (GCtrace *)(o)) @@ -325,41 +268,15 @@ enum { }; enum { -#if LJ_TARGET_X86ORX64 LJ_K64_TOBIT, /* 2^52 + 2^51 */ LJ_K64_2P64, /* 2^64 */ LJ_K64_M2P64, /* -2^64 */ -#if LJ_32 - LJ_K64_M2P64_31, /* -2^64 or -2^31 */ -#else LJ_K64_M2P64_31 = LJ_K64_M2P64, -#endif -#endif -#if LJ_TARGET_MIPS - LJ_K64_2P31, /* 2^31 */ -#if LJ_64 - LJ_K64_2P63, /* 2^63 */ - LJ_K64_M2P64, /* -2^64 */ -#endif -#endif LJ_K64__MAX, }; enum { -#if LJ_TARGET_X86ORX64 LJ_K32_M2P64_31, /* -2^64 or -2^31 */ -#endif -#if LJ_TARGET_PPC - LJ_K32_2P52_2P31, /* 2^52 + 2^31 */ - LJ_K32_2P52, /* 2^52 */ -#endif -#if LJ_TARGET_PPC || LJ_TARGET_MIPS - LJ_K32_2P31, /* 2^31 */ -#endif -#if LJ_TARGET_MIPS64 - LJ_K32_2P63, /* 2^63 */ - LJ_K32_M2P64, /* -2^64 */ -#endif LJ_K32__MAX }; @@ -368,13 +285,8 @@ enum { ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) /* Set/reset flag to activate the SPLIT pass for the current trace. */ -#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) -#define lj_needsplit(J) (J->needsplit = 1) -#define lj_resetsplit(J) (J->needsplit = 0) -#else #define lj_needsplit(J) UNUSED(J) #define lj_resetsplit(J) UNUSED(J) -#endif /* Fold state is used to fold instructions on-the-fly. */ typedef struct FoldState { @@ -431,9 +343,6 @@ typedef struct jit_State { MSize sizesnapmap; /* Size of temp. snapshot map buffer. */ PostProc postproc; /* Required post-processing after execution. */ -#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) - uint8_t needsplit; /* Need SPLIT pass. */ -#endif uint8_t retryrec; /* Retry recording. */ GCRef *trace; /* Array of traces. */ @@ -477,15 +386,7 @@ typedef struct jit_State { TValue errinfo; /* Additional info element for trace errors. */ -#if LJ_HASPROFILE - GCproto *prev_pt; /* Previous prototype. */ - BCLine prev_line; /* Previous line. */ - int prof_mode; /* Profiling mode: 0, 'f', 'l'. */ -#endif } -#if LJ_TARGET_ARM -LJ_ALIGN(16) /* For DISPATCH-relative addresses in assembler part. */ -#endif jit_State; /* Trivial PRNG e.g. used for penalty randomization. */ diff --git a/src/lj_lex.c b/src/lj_lex.c index 2d2f8194cf..3e27edc390 100644 --- a/src/lj_lex.c +++ b/src/lj_lex.c @@ -14,12 +14,10 @@ #include "lj_err.h" #include "lj_buf.h" #include "lj_str.h" -#if LJ_HASFFI #include "lj_tab.h" #include "lj_ctype.h" #include "lj_cdata.h" #include "lualib.h" -#endif #include "lj_state.h" #include "lj_lex.h" #include "lj_parse.h" @@ -106,7 +104,6 @@ static void lex_number(LexState *ls, TValue *tv) setitype(tv, LJ_TISNUM); } else if (fmt == STRSCAN_NUM) { /* Already in correct format. */ -#if LJ_HASFFI } else if (fmt != STRSCAN_ERROR) { lua_State *L = ls->L; GCcdata *cd; @@ -125,7 +122,6 @@ static void lex_number(LexState *ls, TValue *tv) *(uint64_t *)cdataptr(cd) = tv->u64; } lj_parse_keepcdata(ls, tv, cd); -#endif } else { lua_assert(fmt == STRSCAN_ERROR); lj_lex_error(ls, TK_number, LJ_ERR_XNUMBER); diff --git a/src/lj_lib.h b/src/lj_lib.h index 37ec9d7800..8f6823e1cd 100644 --- a/src/lj_lib.h +++ b/src/lj_lib.h @@ -47,25 +47,10 @@ LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg); LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst); /* Avoid including lj_frame.h. */ -#if LJ_GC64 #define lj_lib_upvalue(L, n) \ (&gcval(L->base-2)->fn.c.upvalue[(n)-1]) -#elif LJ_FR2 -#define lj_lib_upvalue(L, n) \ - (&gcref((L->base-2)->gcr)->fn.c.upvalue[(n)-1]) -#else -#define lj_lib_upvalue(L, n) \ - (&gcref((L->base-1)->fr.func)->fn.c.upvalue[(n)-1]) -#endif -#if LJ_TARGET_WINDOWS -#define lj_lib_checkfpu(L) \ - do { setnumV(L->top++, (lua_Number)1437217655); \ - if (lua_tointeger(L, -1) != 1437217655) lj_err_caller(L, LJ_ERR_BADFPU); \ - L->top--; } while (0) -#else #define lj_lib_checkfpu(L) UNUSED(L) -#endif LJ_FUNC GCfunc *lj_lib_pushcc(lua_State *L, lua_CFunction f, int id, int n); #define lj_lib_pushcf(L, fn, id) (lj_lib_pushcc(L, (fn), (id), 0)) diff --git a/src/lj_mcode.c b/src/lj_mcode.c index 77035bf72a..4d6e492189 100644 --- a/src/lj_mcode.c +++ b/src/lj_mcode.c @@ -7,30 +7,22 @@ #define LUA_CORE #include "lj_obj.h" -#if LJ_HASJIT #include "lj_gc.h" #include "lj_err.h" #include "lj_jit.h" #include "lj_mcode.h" #include "lj_trace.h" #include "lj_dispatch.h" -#endif -#if LJ_HASJIT || LJ_HASFFI #include "lj_vm.h" -#endif /* -- OS-specific functions ----------------------------------------------- */ -#if LJ_HASJIT || LJ_HASFFI /* Define this if you want to run LuaJIT with Valgrind. */ #ifdef LUAJIT_USE_VALGRIND #include #endif -#if LJ_TARGET_IOS -void sys_icache_invalidate(void *start, size_t len); -#endif /* Synchronize data/instruction cache. */ void lj_mcode_sync(void *start, void *end) @@ -38,54 +30,11 @@ void lj_mcode_sync(void *start, void *end) #ifdef LUAJIT_USE_VALGRIND VALGRIND_DISCARD_TRANSLATIONS(start, (char *)end-(char *)start); #endif -#if LJ_TARGET_X86ORX64 UNUSED(start); UNUSED(end); -#elif LJ_TARGET_IOS - sys_icache_invalidate(start, (char *)end-(char *)start); -#elif LJ_TARGET_PPC - lj_vm_cachesync(start, end); -#elif defined(__GNUC__) - __clear_cache(start, end); -#else -#error "Missing builtin to flush instruction cache" -#endif } -#endif - -#if LJ_HASJIT - -#if LJ_TARGET_WINDOWS -#define WIN32_LEAN_AND_MEAN -#include -#define MCPROT_RW PAGE_READWRITE -#define MCPROT_RX PAGE_EXECUTE_READ -#define MCPROT_RWX PAGE_EXECUTE_READWRITE - -static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, DWORD prot) -{ - void *p = VirtualAlloc((void *)hint, sz, - MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot); - if (!p && !hint) - lj_trace_err(J, LJ_TRERR_MCODEAL); - return p; -} - -static void mcode_free(jit_State *J, void *p, size_t sz) -{ - UNUSED(J); UNUSED(sz); - VirtualFree(p, 0, MEM_RELEASE); -} - -static int mcode_setprot(void *p, size_t sz, DWORD prot) -{ - DWORD oprot; - return !VirtualProtect(p, sz, prot, &oprot); -} - -#elif LJ_TARGET_POSIX #include @@ -118,30 +67,6 @@ static int mcode_setprot(void *p, size_t sz, int prot) return mprotect(p, sz, prot); } -#elif LJ_64 - -#error "Missing OS support for explicit placement of executable memory" - -#else - -/* Fallback allocator. This will fail if memory is not executable by default. */ -#define LUAJIT_UNPROTECT_MCODE -#define MCPROT_RW 0 -#define MCPROT_RX 0 -#define MCPROT_RWX 0 - -static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot) -{ - UNUSED(hint); UNUSED(prot); - return lj_mem_new(J->L, sz); -} - -static void mcode_free(jit_State *J, void *p, size_t sz) -{ - lj_mem_free(J2G(J), p, sz); -} - -#endif /* -- MCode area protection ----------------------------------------------- */ @@ -204,11 +129,7 @@ static void mcode_protect(jit_State *J, int prot) /* -- MCode area allocation ----------------------------------------------- */ -#if LJ_64 #define mcode_validptr(p) (p) -#else -#define mcode_validptr(p) ((p) && (uintptr_t)(p) < 0xffff0000) -#endif #ifdef LJ_TARGET_JUMPRANGE @@ -219,13 +140,7 @@ static void *mcode_alloc(jit_State *J, size_t sz) ** Try addresses within a distance of target-range/2+1MB..target+range/2-1MB. ** Use half the jump range so every address in the range can reach any other. */ -#if LJ_TARGET_MIPS - /* Use the middle of the 256MB-aligned region. */ - uintptr_t target = ((uintptr_t)(void *)lj_vm_exit_handler & - ~(uintptr_t)0x0fffffffu) + 0x08000000u; -#else uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler & ~(uintptr_t)0xffff; -#endif const uintptr_t range = (1u << (LJ_TARGET_JUMPRANGE-1)) - (1u << 21); /* First try a contiguous area below the last one. */ uintptr_t hint = J->mcarea ? (uintptr_t)J->mcarea - sz : 0; @@ -255,17 +170,7 @@ static void *mcode_alloc(jit_State *J, size_t sz) /* All memory addresses are reachable by relative jumps. */ static void *mcode_alloc(jit_State *J, size_t sz) { -#ifdef __OpenBSD__ - /* Allow better executable memory allocation for OpenBSD W^X mode. */ - void *p = mcode_alloc_at(J, 0, sz, MCPROT_RUN); - if (p && mcode_setprot(p, sz, MCPROT_GEN)) { - mcode_free(J, p, sz); - return NULL; - } - return p; -#else return mcode_alloc_at(J, 0, sz, MCPROT_GEN); -#endif } #endif @@ -384,4 +289,3 @@ void lj_mcode_limiterr(jit_State *J, size_t need) lj_trace_err(J, LJ_TRERR_MCODELM); /* Retry with new area. */ } -#endif diff --git a/src/lj_mcode.h b/src/lj_mcode.h index f0847e931e..63e507719c 100644 --- a/src/lj_mcode.h +++ b/src/lj_mcode.h @@ -8,11 +8,8 @@ #include "lj_obj.h" -#if LJ_HASJIT || LJ_HASFFI LJ_FUNC void lj_mcode_sync(void *start, void *end); -#endif -#if LJ_HASJIT #include "lj_jit.h" @@ -25,6 +22,5 @@ LJ_FUNC_NORET void lj_mcode_limiterr(jit_State *J, size_t need); #define lj_mcode_commitbot(J, m) (J->mcbot = (m)) -#endif #endif diff --git a/src/lj_meta.c b/src/lj_meta.c index 0bd4d8429b..70c3b99407 100644 --- a/src/lj_meta.c +++ b/src/lj_meta.c @@ -73,7 +73,6 @@ cTValue *lj_meta_lookup(lua_State *L, cTValue *o, MMS mm) return niltv(L); } -#if LJ_HASFFI /* Tailcall from C function. */ int lj_meta_tailcall(lua_State *L, cTValue *tv) { @@ -100,7 +99,6 @@ int lj_meta_tailcall(lua_State *L, cTValue *tv) */ return 0; } -#endif /* Setup call to metamethod to be run by Assembler VM. */ static TValue *mmcall(lua_State *L, ASMFunction cont, cTValue *mo, @@ -346,7 +344,6 @@ TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne) return (TValue *)(intptr_t)ne; } -#if LJ_HASFFI TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins) { ASMFunction cont = (bc_op(ins) & 1) ? lj_cont_condf : lj_cont_condt; @@ -373,7 +370,6 @@ TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins) else return (TValue *)(intptr_t)(bc_op(ins) & 1); } -#endif /* Helper for ordered comparisons. String compare, __lt/__le metamethods. */ TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op) diff --git a/src/lj_meta.h b/src/lj_meta.h index 73b4572473..94e7fdf139 100644 --- a/src/lj_meta.h +++ b/src/lj_meta.h @@ -12,9 +12,7 @@ LJ_FUNC void lj_meta_init(lua_State *L); LJ_FUNC cTValue *lj_meta_cache(GCtab *mt, MMS mm, GCstr *name); LJ_FUNC cTValue *lj_meta_lookup(lua_State *L, cTValue *o, MMS mm); -#if LJ_HASFFI LJ_FUNC int lj_meta_tailcall(lua_State *L, cTValue *tv); -#endif #define lj_meta_fastg(g, mt, mm) \ ((mt) == NULL ? NULL : ((mt)->nomm & (1u<<(mm))) ? NULL : \ diff --git a/src/lj_obj.h b/src/lj_obj.h index 52372c3e7e..1760fb5d9a 100644 --- a/src/lj_obj.h +++ b/src/lj_obj.h @@ -17,49 +17,29 @@ /* Memory and GC object sizes. */ typedef uint32_t MSize; -#if LJ_GC64 typedef uint64_t GCSize; -#else -typedef uint32_t GCSize; -#endif /* Memory reference */ typedef struct MRef { -#if LJ_GC64 uint64_t ptr64; /* True 64 bit pointer. */ -#else - uint32_t ptr32; /* Pseudo 32 bit pointer. */ -#endif } MRef; -#if LJ_GC64 #define mref(r, t) ((t *)(void *)(r).ptr64) #define setmref(r, p) ((r).ptr64 = (uint64_t)(void *)(p)) #define setmrefr(r, v) ((r).ptr64 = (v).ptr64) -#else -#define mref(r, t) ((t *)(void *)(uintptr_t)(r).ptr32) - -#define setmref(r, p) ((r).ptr32 = (uint32_t)(uintptr_t)(void *)(p)) -#define setmrefr(r, v) ((r).ptr32 = (v).ptr32) -#endif /* -- GC object references (32 bit address space) ------------------------- */ /* GCobj reference */ typedef struct GCRef { -#if LJ_GC64 uint64_t gcptr64; /* True 64 bit pointer. */ -#else - uint32_t gcptr32; /* Pseudo 32 bit pointer. */ -#endif } GCRef; /* Common GC header for all collectable objects. */ #define GCHeader GCRef nextgc; uint8_t marked; uint8_t gct /* This occupies 6 bytes, so use the next 2 bytes for non-32 bit fields. */ -#if LJ_GC64 #define gcref(r) ((GCobj *)(r).gcptr64) #define gcrefp(r, t) ((t *)(void *)(r).gcptr64) #define gcrefu(r) ((r).gcptr64) @@ -71,17 +51,6 @@ typedef struct GCRef { #define setgcrefp(r, p) ((r).gcptr64 = (uint64_t)(p)) #define setgcrefnull(r) ((r).gcptr64 = 0) #define setgcrefr(r, v) ((r).gcptr64 = (v).gcptr64) -#else -#define gcref(r) ((GCobj *)(uintptr_t)(r).gcptr32) -#define gcrefp(r, t) ((t *)(void *)(uintptr_t)(r).gcptr32) -#define gcrefu(r) ((r).gcptr32) -#define gcrefeq(r1, r2) ((r1).gcptr32 == (r2).gcptr32) - -#define setgcref(r, gc) ((r).gcptr32 = (uint32_t)(uintptr_t)&(gc)->gch) -#define setgcrefp(r, p) ((r).gcptr32 = (uint32_t)(uintptr_t)(p)) -#define setgcrefnull(r) ((r).gcptr32 = 0) -#define setgcrefr(r, v) ((r).gcptr32 = (v).gcptr32) -#endif #define gcnext(gc) (gcref((gc)->gch.nextgc)) @@ -172,7 +141,6 @@ typedef union { typedef LJ_ALIGN(8) union TValue { uint64_t u64; /* 64 bit pattern overlaps number. */ lua_Number n; /* Number object overlaps split tag/value object. */ -#if LJ_GC64 GCRef gcr; /* GCobj reference with tag. */ int64_t it64; struct { @@ -181,27 +149,7 @@ typedef LJ_ALIGN(8) union TValue { , uint32_t it; /* Internal object tag. Must overlap MSW of number. */ ) }; -#else - struct { - LJ_ENDIAN_LOHI( - union { - GCRef gcr; /* GCobj reference (if any). */ - int32_t i; /* Integer value. */ - }; - , uint32_t it; /* Internal object tag. Must overlap MSW of number. */ - ) - }; -#endif -#if LJ_FR2 int64_t ftsz; /* Frame type and size of previous frame, or PC. */ -#else - struct { - LJ_ENDIAN_LOHI( - GCRef func; /* Function for next frame (or dummy L). */ - , FrameLink tp; /* Link to previous frame. */ - ) - } fr; -#endif struct { LJ_ENDIAN_LOHI( uint32_t lo; /* Lower 32 bits of number. */ @@ -271,19 +219,13 @@ typedef const TValue cTValue; #define LJ_TNUMX (~13u) /* Integers have itype == LJ_TISNUM doubles have itype < LJ_TISNUM */ -#if LJ_64 && !LJ_GC64 -#define LJ_TISNUM 0xfffeffffu -#else #define LJ_TISNUM LJ_TNUMX -#endif #define LJ_TISTRUECOND LJ_TFALSE #define LJ_TISPRI LJ_TTRUE #define LJ_TISGCV (LJ_TSTR+1) #define LJ_TISTABUD LJ_TTAB -#if LJ_GC64 #define LJ_GCVMASK (((uint64_t)1 << 47) - 1) -#endif /* -- String object ------------------------------------------------------- */ @@ -358,9 +300,7 @@ typedef struct GCproto { uint8_t numparams; /* Number of parameters. */ uint8_t framesize; /* Fixed frame size. */ MSize sizebc; /* Number of bytecode instructions. */ -#if LJ_GC64 uint32_t unused_gc64; -#endif GCRef gclist; MRef k; /* Split constant array (points to the middle). */ MRef uv; /* Upvalue list. local slot|0x8000 or parent uv idx. */ @@ -472,9 +412,6 @@ typedef struct Node { TValue val; /* Value object. Must be first field. */ TValue key; /* Key object. */ MRef next; /* Hash chain. */ -#if !LJ_GC64 - MRef freetop; /* Top of free elements (stored in t->node[0]). */ -#endif } Node; LJ_STATIC_ASSERT(offsetof(Node, val) == 0); @@ -489,22 +426,15 @@ typedef struct GCtab { MRef node; /* Hash part. */ uint32_t asize; /* Size of array part (keys [0, asize-1]). */ uint32_t hmask; /* Hash part mask (size of hash part - 1). */ -#if LJ_GC64 MRef freetop; /* Top of free elements. */ -#endif } GCtab; #define sizetabcolo(n) ((n)*sizeof(TValue) + sizeof(GCtab)) #define tabref(r) (&gcref((r))->tab) #define noderef(r) (mref((r), Node)) #define nextnode(n) (mref((n)->next, Node)) -#if LJ_GC64 #define getfreetop(t, n) (noderef((t)->freetop)) #define setfreetop(t, n, v) (setmref((t)->freetop, (v))) -#else -#define getfreetop(t, n) (noderef((n)->freetop)) -#define setfreetop(t, n, v) (setmref((n)->freetop, (v))) -#endif /* -- State objects ------------------------------------------------------- */ @@ -523,19 +453,9 @@ enum { #define setvmstate(g, st) ((g)->vmstate = ~LJ_VMST_##st) /* Metamethods. ORDER MM */ -#ifdef LJ_HASFFI #define MMDEF_FFI(_) _(new) -#else -#define MMDEF_FFI(_) -#endif -#if LJ_52 || LJ_HASFFI #define MMDEF_PAIRS(_) _(pairs) _(ipairs) -#else -#define MMDEF_PAIRS(_) -#define MM_pairs 255 -#define MM_ipairs 255 -#endif #define MMDEF(_) \ _(index) _(newindex) _(gc) _(mode) _(eq) _(len) \ @@ -666,13 +586,7 @@ struct lua_State { #define registry(L) (&G(L)->registrytv) /* Macros to access the currently executing (Lua) function. */ -#if LJ_GC64 #define curr_func(L) (&gcval(L->base-2)->fn) -#elif LJ_FR2 -#define curr_func(L) (&gcref((L->base-2)->gcr)->fn) -#else -#define curr_func(L) (&gcref((L->base-1)->fr.func)->fn) -#endif #define curr_funcisL(L) (isluafunc(curr_func(L))) #define curr_proto(L) (funcproto(curr_func(L))) #define curr_topL(L) (L->base + curr_proto(L)->framesize) @@ -736,21 +650,12 @@ typedef union GCobj { #endif /* Macros to test types. */ -#if LJ_GC64 #define itype(o) ((uint32_t)((o)->it64 >> 47)) #define tvisnil(o) ((o)->it64 == -1) -#else -#define itype(o) ((o)->it) -#define tvisnil(o) (itype(o) == LJ_TNIL) -#endif #define tvisfalse(o) (itype(o) == LJ_TFALSE) #define tvistrue(o) (itype(o) == LJ_TTRUE) #define tvisbool(o) (tvisfalse(o) || tvistrue(o)) -#if LJ_64 && !LJ_GC64 -#define tvislightud(o) (((int32_t)itype(o) >> 15) == -2) -#else #define tvislightud(o) (itype(o) == LJ_TLIGHTUD) -#endif #define tvisstr(o) (itype(o) == LJ_TSTR) #define tvisfunc(o) (itype(o) == LJ_TFUNC) #define tvisthread(o) (itype(o) == LJ_TTHREAD) @@ -769,37 +674,20 @@ typedef union GCobj { /* Special macros to test numbers for NaN, +0, -0, +1 and raw equality. */ #define tvisnan(o) ((o)->n != (o)->n) -#if LJ_64 #define tviszero(o) (((o)->u64 << 1) == 0) -#else -#define tviszero(o) (((o)->u32.lo | ((o)->u32.hi << 1)) == 0) -#endif #define tvispzero(o) ((o)->u64 == 0) #define tvismzero(o) ((o)->u64 == U64x(80000000,00000000)) #define tvispone(o) ((o)->u64 == U64x(3ff00000,00000000)) #define rawnumequal(o1, o2) ((o1)->u64 == (o2)->u64) /* Macros to convert type ids. */ -#if LJ_64 && !LJ_GC64 -#define itypemap(o) \ - (tvisnumber(o) ? ~LJ_TNUMX : tvislightud(o) ? ~LJ_TLIGHTUD : ~itype(o)) -#else #define itypemap(o) (tvisnumber(o) ? ~LJ_TNUMX : ~itype(o)) -#endif /* Macros to get tagged values. */ -#if LJ_GC64 #define gcval(o) ((GCobj *)(gcrefu((o)->gcr) & LJ_GCVMASK)) -#else -#define gcval(o) (gcref((o)->gcr)) -#endif #define boolV(o) check_exp(tvisbool(o), (LJ_TFALSE - itype(o))) -#if LJ_64 #define lightudV(o) \ check_exp(tvislightud(o), (void *)((o)->u64 & U64x(00007fff,ffffffff))) -#else -#define lightudV(o) check_exp(tvislightud(o), gcrefp((o)->gcr, void)) -#endif #define gcV(o) check_exp(tvisgcv(o), gcval(o)) #define strV(o) check_exp(tvisstr(o), &gcval(o)->str) #define funcV(o) check_exp(tvisfunc(o), &gcval(o)->fn) @@ -812,48 +700,21 @@ typedef union GCobj { #define intV(o) check_exp(tvisint(o), (int32_t)(o)->i) /* Macros to set tagged values. */ -#if LJ_GC64 #define setitype(o, i) ((o)->it = ((i) << 15)) #define setnilV(o) ((o)->it64 = -1) #define setpriV(o, x) ((o)->it64 = (int64_t)~((uint64_t)~(x)<<47)) #define setboolV(o, x) ((o)->it64 = (int64_t)~((uint64_t)((x)+1)<<47)) -#else -#define setitype(o, i) ((o)->it = (i)) -#define setnilV(o) ((o)->it = LJ_TNIL) -#define setboolV(o, x) ((o)->it = LJ_TFALSE-(uint32_t)(x)) -#define setpriV(o, i) (setitype((o), (i))) -#endif static LJ_AINLINE void setlightudV(TValue *o, void *p) { -#if LJ_GC64 o->u64 = (uint64_t)p | (((uint64_t)LJ_TLIGHTUD) << 47); -#elif LJ_64 - o->u64 = (uint64_t)p | (((uint64_t)0xffff) << 48); -#else - setgcrefp(o->gcr, p); setitype(o, LJ_TLIGHTUD); -#endif } -#if LJ_64 #define checklightudptr(L, p) \ (((uint64_t)(p) >> 47) ? (lj_err_msg(L, LJ_ERR_BADLU), NULL) : (p)) -#else -#define checklightudptr(L, p) (p) -#endif -#if LJ_FR2 #define contptr(f) ((void *)(f)) #define setcont(o, f) ((o)->u64 = (uint64_t)(uintptr_t)contptr(f)) -#elif LJ_64 -#define contptr(f) \ - ((void *)(uintptr_t)(uint32_t)((intptr_t)(f) - (intptr_t)lj_vm_asm_begin)) -#define setcont(o, f) \ - ((o)->u64 = (uint64_t)(void *)(f) - (uint64_t)lj_vm_asm_begin) -#else -#define contptr(f) ((void *)(f)) -#define setcont(o, f) setlightudV((o), contptr(f)) -#endif #define tvchecklive(L, o) \ UNUSED(L), lua_assert(!tvisgcv(o) || \ @@ -861,11 +722,7 @@ static LJ_AINLINE void setlightudV(TValue *o, void *p) static LJ_AINLINE void setgcVraw(TValue *o, GCobj *v, uint32_t itype) { -#if LJ_GC64 setgcreft(o->gcr, v, itype); -#else - setgcref(o->gcr, v); setitype(o, itype); -#endif } static LJ_AINLINE void setgcV(lua_State *L, TValue *o, GCobj *v, uint32_t it) @@ -908,11 +765,7 @@ static LJ_AINLINE void setint64V(TValue *o, int64_t i) setnumV(o, (lua_Number)i); } -#if LJ_64 #define setintptrV(o, i) setint64V((o), (i)) -#else -#define setintptrV(o, i) setintV((o), (i)) -#endif /* Copy tagged values. */ static LJ_AINLINE void copyTV(lua_State *L, TValue *o1, const TValue *o2) @@ -922,30 +775,18 @@ static LJ_AINLINE void copyTV(lua_State *L, TValue *o1, const TValue *o2) /* -- Number to integer conversion ---------------------------------------- */ -#if LJ_SOFTFP -LJ_ASMF int32_t lj_vm_tobit(double x); -#endif static LJ_AINLINE int32_t lj_num2bit(lua_Number n) { -#if LJ_SOFTFP - return lj_vm_tobit(n); -#else TValue o; o.n = n + 6755399441055744.0; /* 2^52 + 2^51 */ return (int32_t)o.u32.lo; -#endif } #define lj_num2int(n) ((int32_t)(n)) static LJ_AINLINE uint64_t lj_num2u64(lua_Number n) { -#ifdef _MSC_VER - if (n >= 9223372036854775808.0) /* They think it's a feature. */ - return (uint64_t)(int64_t)(n - 18446744073709551616.0); - else -#endif return (uint64_t)n; } diff --git a/src/lj_opt_dce.c b/src/lj_opt_dce.c index 2417f3242a..702bcfb44f 100644 --- a/src/lj_opt_dce.c +++ b/src/lj_opt_dce.c @@ -8,7 +8,6 @@ #include "lj_obj.h" -#if LJ_HASJIT #include "lj_ir.h" #include "lj_jit.h" @@ -75,4 +74,3 @@ void lj_opt_dce(jit_State *J) #undef IR -#endif diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 408811f2cd..f4de37759f 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -12,7 +12,6 @@ #include "lj_obj.h" -#if LJ_HASJIT #include "lj_buf.h" #include "lj_str.h" @@ -22,10 +21,8 @@ #include "lj_ircall.h" #include "lj_iropt.h" #include "lj_trace.h" -#if LJ_HASFFI #include "lj_ctype.h" #include "lj_carith.h" -#endif #include "lj_vm.h" #include "lj_strscan.h" #include "lj_strfmt.h" @@ -197,12 +194,8 @@ LJFOLDF(kfold_numabsneg) LJFOLD(LDEXP KNUM KINT) LJFOLDF(kfold_ldexp) { -#if LJ_TARGET_X86ORX64 UNUSED(J); return NEXTFOLD; -#else - return lj_ir_knum(J, ldexp(knumleft, fright->i)); -#endif } LJFOLD(FPMATH KNUM any) @@ -347,7 +340,6 @@ LJFOLDF(kfold_intcomp0) static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op) { switch (op) { -#if LJ_HASFFI case IR_ADD: k1 += k2; break; case IR_SUB: k1 -= k2; break; case IR_MUL: k1 *= k2; break; @@ -359,7 +351,6 @@ static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op) case IR_BSAR: k1 >>= (k2 & 63); break; case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 63)); break; case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 63)); break; -#endif default: UNUSED(k2); lua_assert(0); break; } return k1; @@ -382,7 +373,6 @@ LJFOLD(MOD KINT64 KINT64) LJFOLD(POW KINT64 KINT64) LJFOLDF(kfold_int64arith2) { -#if LJ_HASFFI uint64_t k1 = ir_k64(fleft)->u64, k2 = ir_k64(fright)->u64; if (irt_isi64(fins->t)) { k1 = fins->o == IR_DIV ? lj_carith_divi64((int64_t)k1, (int64_t)k2) : @@ -394,9 +384,6 @@ LJFOLDF(kfold_int64arith2) lj_carith_powu64(k1, k2); } return INT64FOLD(k1); -#else - UNUSED(J); lua_assert(0); return FAILFOLD; -#endif } LJFOLD(BSHL KINT64 KINT) @@ -406,33 +393,21 @@ LJFOLD(BROL KINT64 KINT) LJFOLD(BROR KINT64 KINT) LJFOLDF(kfold_int64shift) { -#if LJ_HASFFI uint64_t k = ir_k64(fleft)->u64; int32_t sh = (fright->i & 63); return INT64FOLD(lj_carith_shift64(k, sh, fins->o - IR_BSHL)); -#else - UNUSED(J); lua_assert(0); return FAILFOLD; -#endif } LJFOLD(BNOT KINT64) LJFOLDF(kfold_bnot64) { -#if LJ_HASFFI return INT64FOLD(~ir_k64(fleft)->u64); -#else - UNUSED(J); lua_assert(0); return FAILFOLD; -#endif } LJFOLD(BSWAP KINT64) LJFOLDF(kfold_bswap64) { -#if LJ_HASFFI return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64)); -#else - UNUSED(J); lua_assert(0); return FAILFOLD; -#endif } LJFOLD(LT KINT64 KINT64) @@ -445,7 +420,6 @@ LJFOLD(ULE KINT64 KINT64) LJFOLD(UGT KINT64 KINT64) LJFOLDF(kfold_int64comp) { -#if LJ_HASFFI uint64_t a = ir_k64(fleft)->u64, b = ir_k64(fright)->u64; switch ((IROp)fins->o) { case IR_LT: return CONDFOLD(a < b); @@ -458,21 +432,14 @@ LJFOLDF(kfold_int64comp) case IR_UGT: return CONDFOLD((uint64_t)a > (uint64_t)b); default: lua_assert(0); return FAILFOLD; } -#else - UNUSED(J); lua_assert(0); return FAILFOLD; -#endif } LJFOLD(UGE any KINT64) LJFOLDF(kfold_int64comp0) { -#if LJ_HASFFI if (ir_k64(fright)->u64 == 0) return DROPFOLD; return NEXTFOLD; -#else - UNUSED(J); lua_assert(0); return FAILFOLD; -#endif } /* -- Constant folding for strings ---------------------------------------- */ @@ -713,12 +680,7 @@ LJFOLD(ADD KGC KINT64) LJFOLDF(kfold_add_kgc) { GCobj *o = ir_kgc(fleft); -#if LJ_64 ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64; -#else - ptrdiff_t ofs = fright->i; -#endif -#if LJ_HASFFI if (irt_iscdata(fleft->t)) { CType *ct = ctype_raw(ctype_ctsG(J2G(J)), gco2cd(o)->ctypeid); if (ctype_isnum(ct->info) || ctype_isenum(ct->info) || @@ -726,7 +688,6 @@ LJFOLDF(kfold_add_kgc) ctype_iscomplex(ct->info) || ctype_isvector(ct->info)) return lj_ir_kkptr(J, (char *)o + ofs); } -#endif return lj_ir_kptr(J, (char *)o + ofs); } @@ -737,11 +698,7 @@ LJFOLD(ADD KKPTR KINT64) LJFOLDF(kfold_add_kptr) { void *p = ir_kptr(fleft); -#if LJ_64 ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64; -#else - ptrdiff_t ofs = fright->i; -#endif return lj_ir_kptr_(J, fleft->o, (char *)p + ofs); } @@ -842,14 +799,7 @@ LJFOLDF(kfold_conv_knum_int_num) LJFOLD(CONV KNUM IRCONV_U32_NUM) LJFOLDF(kfold_conv_knum_u32_num) { -#ifdef _MSC_VER - { /* Workaround for MSVC bug. */ - volatile uint32_t u = (uint32_t)knumleft; - return INTFOLD((int32_t)u); - } -#else return INTFOLD((int32_t)(uint32_t)knumleft); -#endif } LJFOLD(CONV KNUM IRCONV_I64_NUM) @@ -1089,11 +1039,9 @@ LJFOLDF(simplify_numpow_kx) lua_Number n = knumleft; if (n == 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */ fins->o = IR_CONV; -#if LJ_TARGET_X86ORX64 fins->op1 = fins->op2; fins->op2 = IRCONV_NUM_INT; fins->op2 = (IRRef1)lj_opt_fold(J); -#endif fins->op1 = (IRRef1)lj_ir_knum_one(J); fins->o = IR_LDEXP; return RETRYFOLD; @@ -1135,14 +1083,7 @@ LJFOLDF(simplify_conv_i64_num) fins->op2 = ((IRT_I64<<5)|IRT_INT|IRCONV_SEXT); return RETRYFOLD; } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) { -#if LJ_TARGET_X64 return fleft->op1; -#else - /* Reduce to a zero-extension. */ - fins->op1 = fleft->op1; - fins->op2 = (IRT_I64<<5)|IRT_U32; - return RETRYFOLD; -#endif } return NEXTFOLD; } @@ -1229,14 +1170,8 @@ LJFOLDF(simplify_conv_sext) lua_assert(irt_isint(J->scev.t)); if (lo && IR(lo)->i + ofs >= 0) { ok_reduce: -#if LJ_TARGET_X64 /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */ return LEFTFOLD; -#else - /* Reduce to a (cheaper) zero-extension. */ - fins->op2 &= ~IRCONV_SEXT; - return RETRYFOLD; -#endif } } return NEXTFOLD; @@ -1403,13 +1338,9 @@ LJFOLDF(simplify_intmul_k32) LJFOLD(MUL any KINT64) LJFOLDF(simplify_intmul_k64) { -#if LJ_HASFFI if (ir_kint64(fright)->u64 < 0x80000000u) return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64); return NEXTFOLD; -#else - UNUSED(J); lua_assert(0); return FAILFOLD; -#endif } LJFOLD(MOD any KINT) @@ -1717,7 +1648,6 @@ LJFOLD(BOR BOR KINT64) LJFOLD(BXOR BXOR KINT64) LJFOLDF(reassoc_intarith_k64) { -#if LJ_HASFFI IRIns *irk = IR(fleft->op2); if (irk->o == IR_KINT64) { uint64_t k = kfold_int64arith(ir_k64(irk)->u64, @@ -1728,9 +1658,6 @@ LJFOLDF(reassoc_intarith_k64) return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */ } return NEXTFOLD; -#else - UNUSED(J); lua_assert(0); return FAILFOLD; -#endif } LJFOLD(MIN MIN any) @@ -2511,4 +2438,3 @@ TRef LJ_FASTCALL lj_opt_cselim(jit_State *J, IRRef lim) #undef knumright #undef emitir -#endif diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c index 04c6d06ddf..697089ab2e 100644 --- a/src/lj_opt_loop.c +++ b/src/lj_opt_loop.c @@ -8,7 +8,6 @@ #include "lj_obj.h" -#if LJ_HASJIT #include "lj_err.h" #include "lj_buf.h" @@ -446,4 +445,3 @@ int lj_opt_loop(jit_State *J) #undef emitir #undef emitir_raw -#endif diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c index cc177d39e8..4355438e1e 100644 --- a/src/lj_opt_mem.c +++ b/src/lj_opt_mem.c @@ -11,7 +11,6 @@ #include "lj_obj.h" -#if LJ_HASJIT #include "lj_tab.h" #include "lj_ir.h" @@ -932,4 +931,3 @@ int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref) #undef fleft #undef fright -#endif diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c index cd96ca4b4f..f87e8332e3 100644 --- a/src/lj_opt_narrow.c +++ b/src/lj_opt_narrow.c @@ -9,7 +9,6 @@ #include "lj_obj.h" -#if LJ_HASJIT #include "lj_bc.h" #include "lj_ir.h" @@ -495,7 +494,6 @@ TRef LJ_FASTCALL lj_opt_narrow_tobit(jit_State *J, TRef tr) return narrow_stripov(J, tr, IR_SUBOV, (IRT_INT<<5)|IRT_INT|IRCONV_TOBIT); } -#if LJ_HASFFI /* Narrow C array index (overflow undefined). */ TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr) { @@ -507,7 +505,6 @@ TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr) LJ_64 ? ((IRT_INTP<<5)|IRT_INT|IRCONV_SEXT) : ((IRT_INTP<<5)|IRT_INT|IRCONV_TOBIT)); } -#endif /* -- Narrowing of arithmetic operators ----------------------------------- */ @@ -651,4 +648,3 @@ IRType lj_opt_narrow_forl(jit_State *J, cTValue *tv) #undef emitir #undef emitir_raw -#endif diff --git a/src/lj_opt_sink.c b/src/lj_opt_sink.c index 929ccb6187..ea6c404f56 100644 --- a/src/lj_opt_sink.c +++ b/src/lj_opt_sink.c @@ -8,7 +8,6 @@ #include "lj_obj.h" -#if LJ_HASJIT #include "lj_ir.h" #include "lj_jit.h" @@ -93,7 +92,6 @@ static void sink_mark_ins(jit_State *J) irt_setmark(IR(ir->op2)->t); /* Mark stored value. */ break; } -#if LJ_HASFFI case IR_CNEWI: if (irt_isphi(ir->t) && (!sink_checkphi(J, ir, ir->op2) || @@ -101,13 +99,10 @@ static void sink_mark_ins(jit_State *J) !sink_checkphi(J, ir, (ir+1)->op2)))) irt_setmark(ir->t); /* Mark ineligible allocation. */ /* fallthrough */ -#endif case IR_USTORE: irt_setmark(IR(ir->op2)->t); /* Mark stored value. */ break; -#if LJ_HASFFI case IR_CALLXS: -#endif case IR_CALLS: irt_setmark(IR(ir->op1)->t); /* Mark (potentially) stored values. */ break; @@ -186,9 +181,7 @@ static void sink_sweep_ins(jit_State *J) ir->prev = REGSP_INIT; } break; -#if LJ_HASFFI case IR_CNEW: case IR_CNEWI: -#endif case IR_TNEW: case IR_TDUP: if (!irt_ismarked(ir->t)) { ir->t.irt &= ~IRT_GUARD; @@ -247,4 +240,3 @@ void lj_opt_sink(jit_State *J) #undef IR -#endif diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c index fc9352042e..1e065bc2fc 100644 --- a/src/lj_opt_split.c +++ b/src/lj_opt_split.c @@ -8,863 +8,3 @@ #include "lj_obj.h" -#if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) - -#include "lj_err.h" -#include "lj_buf.h" -#include "lj_ir.h" -#include "lj_jit.h" -#include "lj_ircall.h" -#include "lj_iropt.h" -#include "lj_dispatch.h" -#include "lj_vm.h" - -/* SPLIT pass: -** -** This pass splits up 64 bit IR instructions into multiple 32 bit IR -** instructions. It's only active for soft-float targets or for 32 bit CPUs -** which lack native 64 bit integer operations (the FFI is currently the -** only emitter for 64 bit integer instructions). -** -** Splitting the IR in a separate pass keeps each 32 bit IR assembler -** backend simple. Only a small amount of extra functionality needs to be -** implemented. This is much easier than adding support for allocating -** register pairs to each backend (believe me, I tried). A few simple, but -** important optimizations can be performed by the SPLIT pass, which would -** be tedious to do in the backend. -** -** The basic idea is to replace each 64 bit IR instruction with its 32 bit -** equivalent plus an extra HIOP instruction. The splitted IR is not passed -** through FOLD or any other optimizations, so each HIOP is guaranteed to -** immediately follow it's counterpart. The actual functionality of HIOP is -** inferred from the previous instruction. -** -** The operands of HIOP hold the hiword input references. The output of HIOP -** is the hiword output reference, which is also used to hold the hiword -** register or spill slot information. The register allocator treats this -** instruction independently of any other instruction, which improves code -** quality compared to using fixed register pairs. -** -** It's easier to split up some instructions into two regular 32 bit -** instructions. E.g. XLOAD is split up into two XLOADs with two different -** addresses. Obviously 64 bit constants need to be split up into two 32 bit -** constants, too. Some hiword instructions can be entirely omitted, e.g. -** when zero-extending a 32 bit value to 64 bits. 64 bit arguments for calls -** are split up into two 32 bit arguments each. -** -** On soft-float targets, floating-point instructions are directly converted -** to soft-float calls by the SPLIT pass (except for comparisons and MIN/MAX). -** HIOP for number results has the type IRT_SOFTFP ("sfp" in -jdump). -** -** Here's the IR and x64 machine code for 'x.b = x.a + 1' for a struct with -** two int64_t fields: -** -** 0100 p32 ADD base +8 -** 0101 i64 XLOAD 0100 -** 0102 i64 ADD 0101 +1 -** 0103 p32 ADD base +16 -** 0104 i64 XSTORE 0103 0102 -** -** mov rax, [esi+0x8] -** add rax, +0x01 -** mov [esi+0x10], rax -** -** Here's the transformed IR and the x86 machine code after the SPLIT pass: -** -** 0100 p32 ADD base +8 -** 0101 int XLOAD 0100 -** 0102 p32 ADD base +12 -** 0103 int XLOAD 0102 -** 0104 int ADD 0101 +1 -** 0105 int HIOP 0103 +0 -** 0106 p32 ADD base +16 -** 0107 int XSTORE 0106 0104 -** 0108 int HIOP 0106 0105 -** -** mov eax, [esi+0x8] -** mov ecx, [esi+0xc] -** add eax, +0x01 -** adc ecx, +0x00 -** mov [esi+0x10], eax -** mov [esi+0x14], ecx -** -** You may notice the reassociated hiword address computation, which is -** later fused into the mov operands by the assembler. -*/ - -/* Some local macros to save typing. Undef'd at the end. */ -#define IR(ref) (&J->cur.ir[(ref)]) - -/* Directly emit the transformed IR without updating chains etc. */ -static IRRef split_emit(jit_State *J, uint16_t ot, IRRef1 op1, IRRef1 op2) -{ - IRRef nref = lj_ir_nextins(J); - IRIns *ir = IR(nref); - ir->ot = ot; - ir->op1 = op1; - ir->op2 = op2; - return nref; -} - -#if LJ_SOFTFP -/* Emit a (checked) number to integer conversion. */ -static IRRef split_num2int(jit_State *J, IRRef lo, IRRef hi, int check) -{ - IRRef tmp, res; -#if LJ_LE - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), lo, hi); -#else - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hi, lo); -#endif - res = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_softfp_d2i); - if (check) { - tmp = split_emit(J, IRTI(IR_CALLN), res, IRCALL_softfp_i2d); - split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); - split_emit(J, IRTGI(IR_EQ), tmp, lo); - split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), tmp+1, hi); - } - return res; -} - -/* Emit a CALLN with one split 64 bit argument. */ -static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir, - IRIns *ir, IRCallID id) -{ - IRRef tmp, op1 = ir->op1; - J->cur.nins--; -#if LJ_LE - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); -#else - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); -#endif - ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); - return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); -} -#endif - -/* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */ -static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir, - IRIns *ir, IRCallID id) -{ - IRRef tmp, op1 = ir->op1, op2 = ir->op2; - J->cur.nins--; -#if LJ_LE - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); -#else - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); -#endif - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev); - ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); - return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); -} - -/* Emit a CALLN with two split 64 bit arguments. */ -static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir, - IRIns *ir, IRCallID id) -{ - IRRef tmp, op1 = ir->op1, op2 = ir->op2; - J->cur.nins--; -#if LJ_LE - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev); - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]); -#else - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]); - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev); -#endif - ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); - return split_emit(J, - IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT), - tmp, tmp); -} - -/* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */ -static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref) -{ - IRRef nref = oir[ref].prev; - IRIns *ir = IR(nref); - int32_t ofs = 4; - if (ir->o == IR_KPTR) - return lj_ir_kptr(J, (char *)ir_kptr(ir) + ofs); - if (ir->o == IR_ADD && irref_isk(ir->op2) && !irt_isphi(oir[ref].t)) { - /* Reassociate address. */ - ofs += IR(ir->op2)->i; - nref = ir->op1; - if (ofs == 0) return nref; - } - return split_emit(J, IRT(IR_ADD, IRT_PTR), nref, lj_ir_kint(J, ofs)); -} - -#if LJ_HASFFI -static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst, - IRIns *oir, IRIns *nir, IRIns *ir) -{ - IROp op = ir->o; - IRRef kref = nir->op2; - if (irref_isk(kref)) { /* Optimize constant shifts. */ - int32_t k = (IR(kref)->i & 63); - IRRef lo = nir->op1, hi = hisubst[ir->op1]; - if (op == IR_BROL || op == IR_BROR) { - if (op == IR_BROR) k = (-k & 63); - if (k >= 32) { IRRef t = lo; lo = hi; hi = t; k -= 32; } - if (k == 0) { - passthrough: - J->cur.nins--; - ir->prev = lo; - return hi; - } else { - TRef k1, k2; - IRRef t1, t2, t3, t4; - J->cur.nins--; - k1 = lj_ir_kint(J, k); - k2 = lj_ir_kint(J, (-k & 31)); - t1 = split_emit(J, IRTI(IR_BSHL), lo, k1); - t2 = split_emit(J, IRTI(IR_BSHL), hi, k1); - t3 = split_emit(J, IRTI(IR_BSHR), lo, k2); - t4 = split_emit(J, IRTI(IR_BSHR), hi, k2); - ir->prev = split_emit(J, IRTI(IR_BOR), t1, t4); - return split_emit(J, IRTI(IR_BOR), t2, t3); - } - } else if (k == 0) { - goto passthrough; - } else if (k < 32) { - if (op == IR_BSHL) { - IRRef t1 = split_emit(J, IRTI(IR_BSHL), hi, kref); - IRRef t2 = split_emit(J, IRTI(IR_BSHR), lo, lj_ir_kint(J, (-k&31))); - return split_emit(J, IRTI(IR_BOR), t1, t2); - } else { - IRRef t1 = ir->prev, t2; - lua_assert(op == IR_BSHR || op == IR_BSAR); - nir->o = IR_BSHR; - t2 = split_emit(J, IRTI(IR_BSHL), hi, lj_ir_kint(J, (-k&31))); - ir->prev = split_emit(J, IRTI(IR_BOR), t1, t2); - return split_emit(J, IRTI(op), hi, kref); - } - } else { - if (op == IR_BSHL) { - if (k == 32) - J->cur.nins--; - else - lo = ir->prev; - ir->prev = lj_ir_kint(J, 0); - return lo; - } else { - lua_assert(op == IR_BSHR || op == IR_BSAR); - if (k == 32) { - J->cur.nins--; - ir->prev = hi; - } else { - nir->op1 = hi; - } - if (op == IR_BSHR) - return lj_ir_kint(J, 0); - else - return split_emit(J, IRTI(IR_BSAR), hi, lj_ir_kint(J, 31)); - } - } - } - return split_call_li(J, hisubst, oir, ir, - op - IR_BSHL + IRCALL_lj_carith_shl64); -} - -static IRRef split_bitop(jit_State *J, IRRef1 *hisubst, - IRIns *nir, IRIns *ir) -{ - IROp op = ir->o; - IRRef hi, kref = nir->op2; - if (irref_isk(kref)) { /* Optimize bit operations with lo constant. */ - int32_t k = IR(kref)->i; - if (k == 0 || k == -1) { - if (op == IR_BAND) k = ~k; - if (k == 0) { - J->cur.nins--; - ir->prev = nir->op1; - } else if (op == IR_BXOR) { - nir->o = IR_BNOT; - nir->op2 = 0; - } else { - J->cur.nins--; - ir->prev = kref; - } - } - } - hi = hisubst[ir->op1]; - kref = hisubst[ir->op2]; - if (irref_isk(kref)) { /* Optimize bit operations with hi constant. */ - int32_t k = IR(kref)->i; - if (k == 0 || k == -1) { - if (op == IR_BAND) k = ~k; - if (k == 0) { - return hi; - } else if (op == IR_BXOR) { - return split_emit(J, IRTI(IR_BNOT), hi, 0); - } else { - return kref; - } - } - } - return split_emit(J, IRTI(op), hi, kref); -} -#endif - -/* Substitute references of a snapshot. */ -static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir) -{ - SnapEntry *map = &J->cur.snapmap[snap->mapofs]; - MSize n, nent = snap->nent; - for (n = 0; n < nent; n++) { - SnapEntry sn = map[n]; - IRIns *ir = &oir[snap_ref(sn)]; - if (!(LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && irref_isk(snap_ref(sn)))) - map[n] = ((sn & 0xffff0000) | ir->prev); - } -} - -/* Transform the old IR to the new IR. */ -static void split_ir(jit_State *J) -{ - IRRef nins = J->cur.nins, nk = J->cur.nk; - MSize irlen = nins - nk; - MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1)); - IRIns *oir = (IRIns *)lj_buf_tmp(J->L, need); - IRRef1 *hisubst; - IRRef ref, snref; - SnapShot *snap; - - /* Copy old IR to buffer. */ - memcpy(oir, IR(nk), irlen*sizeof(IRIns)); - /* Bias hiword substitution table and old IR. Loword kept in field prev. */ - hisubst = (IRRef1 *)&oir[irlen] - nk; - oir -= nk; - - /* Remove all IR instructions, but retain IR constants. */ - J->cur.nins = REF_FIRST; - J->loopref = 0; - - /* Process constants and fixed references. */ - for (ref = nk; ref <= REF_BASE; ref++) { - IRIns *ir = &oir[ref]; - if ((LJ_SOFTFP && ir->o == IR_KNUM) || ir->o == IR_KINT64) { - /* Split up 64 bit constant. */ - TValue tv = *ir_k64(ir); - ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo); - hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi); - } else { - ir->prev = ref; /* Identity substitution for loword. */ - hisubst[ref] = 0; - } - if (irt_is64(ir->t) && ir->o != IR_KNULL) - ref++; - } - - /* Process old IR instructions. */ - snap = J->cur.snap; - snref = snap->ref; - for (ref = REF_FIRST; ref < nins; ref++) { - IRIns *ir = &oir[ref]; - IRRef nref = lj_ir_nextins(J); - IRIns *nir = IR(nref); - IRRef hi = 0; - - if (ref >= snref) { - snap->ref = nref; - split_subst_snap(J, snap++, oir); - snref = snap < &J->cur.snap[J->cur.nsnap] ? snap->ref : ~(IRRef)0; - } - - /* Copy-substitute old instruction to new instruction. */ - nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev; - nir->op2 = ir->op2 < nk ? ir->op2 : oir[ir->op2].prev; - ir->prev = nref; /* Loword substitution. */ - nir->o = ir->o; - nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI); - hisubst[ref] = 0; - - /* Split 64 bit instructions. */ -#if LJ_SOFTFP - if (irt_isnum(ir->t)) { - nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ - /* Note: hi ref = lo ref + 1! Required for SNAP_SOFTFPNUM logic. */ - switch (ir->o) { - case IR_ADD: - hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_add); - break; - case IR_SUB: - hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_sub); - break; - case IR_MUL: - hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_mul); - break; - case IR_DIV: - hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div); - break; - case IR_POW: - hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi); - break; - case IR_FPMATH: - /* Try to rejoin pow from EXP2, MUL and LOG2. */ - if (nir->op2 == IRFPM_EXP2 && nir->op1 > J->loopref) { - IRIns *irp = IR(nir->op1); - if (irp->o == IR_CALLN && irp->op2 == IRCALL_softfp_mul) { - IRIns *irm4 = IR(irp->op1); - IRIns *irm3 = IR(irm4->op1); - IRIns *irm12 = IR(irm3->op1); - IRIns *irl1 = IR(irm12->op1); - if (irm12->op1 > J->loopref && irl1->o == IR_CALLN && - irl1->op2 == IRCALL_lj_vm_log2) { - IRRef tmp = irl1->op1; /* Recycle first two args from LOG2. */ - IRRef arg3 = irm3->op2, arg4 = irm4->op2; - J->cur.nins--; - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg3); - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg4); - ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_pow); - hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); - break; - } - } - } - hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2); - break; - case IR_ATAN2: - hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2); - break; - case IR_LDEXP: - hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp); - break; - case IR_NEG: case IR_ABS: - nir->o = IR_CONV; /* Pass through loword. */ - nir->op2 = (IRT_INT << 5) | IRT_INT; - hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP), - hisubst[ir->op1], - lj_ir_kint(J, (int32_t)(0x7fffffffu + (ir->o == IR_NEG)))); - break; - case IR_SLOAD: - if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */ - nir->op2 &= ~IRSLOAD_CONVERT; - ir->prev = nref = split_emit(J, IRTI(IR_CALLN), nref, - IRCALL_softfp_i2d); - hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); - break; - } - /* fallthrough */ - case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: - case IR_STRTO: - hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); - break; - case IR_FLOAD: - lua_assert(ir->op1 == REF_NIL); - hi = lj_ir_kint(J, *(int32_t*)((char*)J2GG(J) + ir->op2 + LJ_LE*4)); - nir->op2 += LJ_BE*4; - break; - case IR_XLOAD: { - IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */ - J->cur.nins--; - hi = split_ptr(J, oir, ir->op1); /* Insert the hiref ADD. */ -#if LJ_BE - hi = split_emit(J, IRT(IR_XLOAD, IRT_INT), hi, ir->op2); - inslo.t.irt = IRT_SOFTFP | (inslo.t.irt & IRT_GUARD); -#endif - nref = lj_ir_nextins(J); - nir = IR(nref); - *nir = inslo; /* Re-emit lo XLOAD. */ -#if LJ_LE - hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2); - ir->prev = nref; -#else - ir->prev = hi; hi = nref; -#endif - break; - } - case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_XSTORE: - split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]); - break; - case IR_CONV: { /* Conversion to number. Others handled below. */ - IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); - UNUSED(st); -#if LJ_32 && LJ_HASFFI - if (st == IRT_I64 || st == IRT_U64) { - hi = split_call_l(J, hisubst, oir, ir, - st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d); - break; - } -#endif - lua_assert(st == IRT_INT || - (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT))); - nir->o = IR_CALLN; -#if LJ_32 && LJ_HASFFI - nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d : - st == IRT_FLOAT ? IRCALL_softfp_f2d : - IRCALL_softfp_ui2d; -#else - nir->op2 = IRCALL_softfp_i2d; -#endif - hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); - break; - } - case IR_CALLN: - case IR_CALLL: - case IR_CALLS: - case IR_CALLXS: - goto split_call; - case IR_PHI: - if (nir->op1 == nir->op2) - J->cur.nins--; /* Drop useless PHIs. */ - if (hisubst[ir->op1] != hisubst[ir->op2]) - split_emit(J, IRT(IR_PHI, IRT_SOFTFP), - hisubst[ir->op1], hisubst[ir->op2]); - break; - case IR_HIOP: - J->cur.nins--; /* Drop joining HIOP. */ - ir->prev = nir->op1; - hi = nir->op2; - break; - default: - lua_assert(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX); - hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), - hisubst[ir->op1], hisubst[ir->op2]); - break; - } - } else -#endif -#if LJ_32 && LJ_HASFFI - if (irt_isint64(ir->t)) { - IRRef hiref = hisubst[ir->op1]; - nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ - switch (ir->o) { - case IR_ADD: - case IR_SUB: - /* Use plain op for hiword if loword cannot produce a carry/borrow. */ - if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) { - ir->prev = nir->op1; /* Pass through loword. */ - nir->op1 = hiref; nir->op2 = hisubst[ir->op2]; - hi = nref; - break; - } - /* fallthrough */ - case IR_NEG: - hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]); - break; - case IR_MUL: - hi = split_call_ll(J, hisubst, oir, ir, IRCALL_lj_carith_mul64); - break; - case IR_DIV: - hi = split_call_ll(J, hisubst, oir, ir, - irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : - IRCALL_lj_carith_divu64); - break; - case IR_MOD: - hi = split_call_ll(J, hisubst, oir, ir, - irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : - IRCALL_lj_carith_modu64); - break; - case IR_POW: - hi = split_call_ll(J, hisubst, oir, ir, - irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : - IRCALL_lj_carith_powu64); - break; - case IR_BNOT: - hi = split_emit(J, IRTI(IR_BNOT), hiref, 0); - break; - case IR_BSWAP: - ir->prev = split_emit(J, IRTI(IR_BSWAP), hiref, 0); - hi = nref; - break; - case IR_BAND: case IR_BOR: case IR_BXOR: - hi = split_bitop(J, hisubst, nir, ir); - break; - case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: - hi = split_bitshift(J, hisubst, oir, nir, ir); - break; - case IR_FLOAD: - lua_assert(ir->op2 == IRFL_CDATA_INT64); - hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4); -#if LJ_BE - ir->prev = hi; hi = nref; -#endif - break; - case IR_XLOAD: - hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, oir, ir->op1), ir->op2); -#if LJ_BE - ir->prev = hi; hi = nref; -#endif - break; - case IR_XSTORE: - split_emit(J, IRTI(IR_HIOP), nir->op1, hisubst[ir->op2]); - break; - case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */ - IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); -#if LJ_SOFTFP - if (st == IRT_NUM) { /* NUM to 64 bit int conv. */ - hi = split_call_l(J, hisubst, oir, ir, - irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul); - } else if (st == IRT_FLOAT) { /* FLOAT to 64 bit int conv. */ - nir->o = IR_CALLN; - nir->op2 = irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul; - hi = split_emit(J, IRTI(IR_HIOP), nref, nref); - } -#else - if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */ - hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref); - } -#endif - else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */ - /* Drop cast, since assembler doesn't care. But fwd both parts. */ - hi = hiref; - goto fwdlo; - } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */ - IRRef k31 = lj_ir_kint(J, 31); - nir = IR(nref); /* May have been reallocated. */ - ir->prev = nir->op1; /* Pass through loword. */ - nir->o = IR_BSAR; /* hi = bsar(lo, 31). */ - nir->op2 = k31; - hi = nref; - } else { /* Zero-extend to 64 bit. */ - hi = lj_ir_kint(J, 0); - goto fwdlo; - } - break; - } - case IR_CALLXS: - goto split_call; - case IR_PHI: { - IRRef hiref2; - if ((irref_isk(nir->op1) && irref_isk(nir->op2)) || - nir->op1 == nir->op2) - J->cur.nins--; /* Drop useless PHIs. */ - hiref2 = hisubst[ir->op2]; - if (!((irref_isk(hiref) && irref_isk(hiref2)) || hiref == hiref2)) - split_emit(J, IRTI(IR_PHI), hiref, hiref2); - break; - } - case IR_HIOP: - J->cur.nins--; /* Drop joining HIOP. */ - ir->prev = nir->op1; - hi = nir->op2; - break; - default: - lua_assert(ir->o <= IR_NE); /* Comparisons. */ - split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]); - break; - } - } else -#endif -#if LJ_SOFTFP - if (ir->o == IR_SLOAD) { - if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from number to int. */ - nir->op2 &= ~IRSLOAD_CONVERT; - if (!(nir->op2 & IRSLOAD_TYPECHECK)) - nir->t.irt = IRT_INT; /* Drop guard. */ - split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); - ir->prev = split_num2int(J, nref, nref+1, irt_isguard(ir->t)); - } - } else if (ir->o == IR_TOBIT) { - IRRef tmp, op1 = ir->op1; - J->cur.nins--; -#if LJ_LE - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); -#else - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); -#endif - ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit); - } else if (ir->o == IR_TOSTR) { - if (hisubst[ir->op1]) { - if (irref_isk(ir->op1)) - nir->op1 = ir->op1; - else - split_emit(J, IRT(IR_HIOP, IRT_NIL), hisubst[ir->op1], nref); - } - } else if (ir->o == IR_HREF || ir->o == IR_NEWREF) { - if (irref_isk(ir->op2) && hisubst[ir->op2]) - nir->op2 = ir->op2; - } else -#endif - if (ir->o == IR_CONV) { /* See above, too. */ - IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); -#if LJ_32 && LJ_HASFFI - if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */ -#if LJ_SOFTFP - if (irt_isfloat(ir->t)) { - split_call_l(J, hisubst, oir, ir, - st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f); - J->cur.nins--; /* Drop unused HIOP. */ - } -#else - if (irt_isfp(ir->t)) { /* 64 bit integer to FP conversion. */ - ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)), - hisubst[ir->op1], nref); - } -#endif - else { /* Truncate to lower 32 bits. */ - fwdlo: - ir->prev = nir->op1; /* Forward loword. */ - /* Replace with NOP to avoid messing up the snapshot logic. */ - nir->ot = IRT(IR_NOP, IRT_NIL); - nir->op1 = nir->op2 = 0; - } - } -#endif -#if LJ_SOFTFP && LJ_32 && LJ_HASFFI - else if (irt_isfloat(ir->t)) { - if (st == IRT_NUM) { - split_call_l(J, hisubst, oir, ir, IRCALL_softfp_d2f); - J->cur.nins--; /* Drop unused HIOP. */ - } else { - nir->o = IR_CALLN; - nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f; - } - } else if (st == IRT_FLOAT) { - nir->o = IR_CALLN; - nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui; - } else -#endif -#if LJ_SOFTFP - if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) { - if (irt_isguard(ir->t)) { - lua_assert(st == IRT_NUM && irt_isint(ir->t)); - J->cur.nins--; - ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1); - } else { - split_call_l(J, hisubst, oir, ir, -#if LJ_32 && LJ_HASFFI - st == IRT_NUM ? - (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) : - (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui) -#else - IRCALL_softfp_d2i -#endif - ); - J->cur.nins--; /* Drop unused HIOP. */ - } - } -#endif - } else if (ir->o == IR_CALLXS) { - IRRef hiref; - split_call: - hiref = hisubst[ir->op1]; - if (hiref) { - IROpT ot = nir->ot; - IRRef op2 = nir->op2; - nir->ot = IRT(IR_CARG, IRT_NIL); -#if LJ_LE - nir->op2 = hiref; -#else - nir->op2 = nir->op1; nir->op1 = hiref; -#endif - ir->prev = nref = split_emit(J, ot, nref, op2); - } - if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t)) - hi = split_emit(J, - IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT), - nref, nref); - } else if (ir->o == IR_CARG) { - IRRef hiref = hisubst[ir->op1]; - if (hiref) { - IRRef op2 = nir->op2; -#if LJ_LE - nir->op2 = hiref; -#else - nir->op2 = nir->op1; nir->op1 = hiref; -#endif - ir->prev = nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2); - nir = IR(nref); - } - hiref = hisubst[ir->op2]; - if (hiref) { -#if !LJ_TARGET_X86 - int carg = 0; - IRIns *cir; - for (cir = IR(nir->op1); cir->o == IR_CARG; cir = IR(cir->op1)) - carg++; - if ((carg & 1) == 0) { /* Align 64 bit arguments. */ - IRRef op2 = nir->op2; - nir->op2 = REF_NIL; - nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2); - nir = IR(nref); - } -#endif -#if LJ_BE - { IRRef tmp = nir->op2; nir->op2 = hiref; hiref = tmp; } -#endif - ir->prev = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, hiref); - } - } else if (ir->o == IR_CNEWI) { - if (hisubst[ir->op2]) - split_emit(J, IRT(IR_HIOP, IRT_NIL), nref, hisubst[ir->op2]); - } else if (ir->o == IR_LOOP) { - J->loopref = nref; /* Needed by assembler. */ - } - hisubst[ref] = hi; /* Store hiword substitution. */ - } - if (snref == nins) { /* Substitution for last snapshot. */ - snap->ref = J->cur.nins; - split_subst_snap(J, snap, oir); - } - - /* Add PHI marks. */ - for (ref = J->cur.nins-1; ref >= REF_FIRST; ref--) { - IRIns *ir = IR(ref); - if (ir->o != IR_PHI) break; - if (!irref_isk(ir->op1)) irt_setphi(IR(ir->op1)->t); - if (ir->op2 > J->loopref) irt_setphi(IR(ir->op2)->t); - } -} - -/* Protected callback for split pass. */ -static TValue *cpsplit(lua_State *L, lua_CFunction dummy, void *ud) -{ - jit_State *J = (jit_State *)ud; - split_ir(J); - UNUSED(L); UNUSED(dummy); - return NULL; -} - -#if defined(LUA_USE_ASSERT) || LJ_SOFTFP -/* Slow, but sure way to check whether a SPLIT pass is needed. */ -static int split_needsplit(jit_State *J) -{ - IRIns *ir, *irend; - IRRef ref; - for (ir = IR(REF_FIRST), irend = IR(J->cur.nins); ir < irend; ir++) - if (LJ_SOFTFP ? irt_is64orfp(ir->t) : irt_isint64(ir->t)) - return 1; - if (LJ_SOFTFP) { - for (ref = J->chain[IR_SLOAD]; ref; ref = IR(ref)->prev) - if ((IR(ref)->op2 & IRSLOAD_CONVERT)) - return 1; - if (J->chain[IR_TOBIT]) - return 1; - } - for (ref = J->chain[IR_CONV]; ref; ref = IR(ref)->prev) { - IRType st = (IR(ref)->op2 & IRCONV_SRCMASK); - if ((LJ_SOFTFP && (st == IRT_NUM || st == IRT_FLOAT)) || - st == IRT_I64 || st == IRT_U64) - return 1; - } - return 0; /* Nope. */ -} -#endif - -/* SPLIT pass. */ -void lj_opt_split(jit_State *J) -{ -#if LJ_SOFTFP - if (!J->needsplit) - J->needsplit = split_needsplit(J); -#else - lua_assert(J->needsplit >= split_needsplit(J)); /* Verify flag. */ -#endif - if (J->needsplit) { - int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit); - if (errcode) { - /* Completely reset the trace to avoid inconsistent dump on abort. */ - J->cur.nins = J->cur.nk = REF_BASE; - J->cur.nsnap = 0; - lj_err_throw(J->L, errcode); /* Propagate errors. */ - } - } -} - -#undef IR - -#endif diff --git a/src/lj_parse.c b/src/lj_parse.c index 08f7cfa6ac..65818e8615 100644 --- a/src/lj_parse.c +++ b/src/lj_parse.c @@ -19,9 +19,7 @@ #include "lj_func.h" #include "lj_state.h" #include "lj_bc.h" -#if LJ_HASFFI #include "lj_ctype.h" -#endif #include "lj_strfmt.h" #include "lj_lex.h" #include "lj_parse.h" @@ -241,7 +239,6 @@ GCstr *lj_parse_keepstr(LexState *ls, const char *str, size_t len) return s; } -#if LJ_HASFFI /* Anchor cdata to avoid GC. */ void lj_parse_keepcdata(LexState *ls, TValue *tv, GCcdata *cd) { @@ -250,7 +247,6 @@ void lj_parse_keepcdata(LexState *ls, TValue *tv, GCcdata *cd) setcdataV(L, tv, cd); setboolV(lj_tab_set(L, ls->fs->kt, tv), 1); } -#endif /* -- Jump list handling -------------------------------------------------- */ @@ -523,12 +519,10 @@ static void expr_toreg_nobranch(FuncState *fs, ExpDesc *e, BCReg reg) else #endif ins = BCINS_AD(BC_KNUM, reg, const_num(fs, e)); -#if LJ_HASFFI } else if (e->k == VKCDATA) { fs->flags |= PROTO_FFI; ins = BCINS_AD(BC_KCDATA, reg, const_gc(fs, obj2gco(cdataV(&e->u.nval)), LJ_TCDATA)); -#endif } else if (e->k == VRELOCABLE) { setbc_a(bcptr(fs, e), reg); goto noins; @@ -947,7 +941,6 @@ static void bcemit_unop(FuncState *fs, BCOp op, ExpDesc *e) } else { lua_assert(op == BC_UNM || op == BC_LEN); if (op == BC_UNM && !expr_hasjump(e)) { /* Constant-fold negations. */ -#if LJ_HASFFI if (e->k == VKCDATA) { /* Fold in-place since cdata is not interned. */ GCcdata *cd = cdataV(&e->u.nval); int64_t *p = (int64_t *)cdataptr(cd); @@ -957,7 +950,6 @@ static void bcemit_unop(FuncState *fs, BCOp op, ExpDesc *e) *p = -*p; return; } else -#endif if (expr_isnumk(e) && !expr_numiszero(e)) { /* Avoid folding to -0. */ TValue *o = expr_numtv(e); if (tvisint(o)) { @@ -1858,9 +1850,7 @@ static void parse_body(LexState *ls, ExpDesc *e, int needself, BCLine line) /* Store new prototype in the constant array of the parent. */ expr_init(e, VRELOCABLE, bcemit_AD(pfs, BC_FNEW, 0, const_gc(pfs, obj2gco(pt), LJ_TPROTO))); -#if LJ_HASFFI pfs->flags |= (fs.flags & PROTO_FFI); -#endif if (!(pfs->flags & PROTO_CHILD)) { if (pfs->flags & PROTO_HAS_RETURN) pfs->flags |= PROTO_FIXUP_RETURN; diff --git a/src/lj_parse.h b/src/lj_parse.h index ceeab6994f..acf3049a22 100644 --- a/src/lj_parse.h +++ b/src/lj_parse.h @@ -11,8 +11,6 @@ LJ_FUNC GCproto *lj_parse(LexState *ls); LJ_FUNC GCstr *lj_parse_keepstr(LexState *ls, const char *str, size_t l); -#if LJ_HASFFI LJ_FUNC void lj_parse_keepcdata(LexState *ls, TValue *tv, GCcdata *cd); -#endif #endif diff --git a/src/lj_profile.c b/src/lj_profile.c index 116998e1e8..ae37dab4b5 100644 --- a/src/lj_profile.c +++ b/src/lj_profile.c @@ -8,361 +8,3 @@ #include "lj_obj.h" -#if LJ_HASPROFILE - -#include "lj_buf.h" -#include "lj_frame.h" -#include "lj_debug.h" -#include "lj_dispatch.h" -#if LJ_HASJIT -#include "lj_jit.h" -#include "lj_trace.h" -#endif -#include "lj_profile.h" - -#include "luajit.h" - -#if LJ_PROFILE_SIGPROF - -#include -#include -#define profile_lock(ps) UNUSED(ps) -#define profile_unlock(ps) UNUSED(ps) - -#elif LJ_PROFILE_PTHREAD - -#include -#include -#if LJ_TARGET_PS3 -#include -#endif -#define profile_lock(ps) pthread_mutex_lock(&ps->lock) -#define profile_unlock(ps) pthread_mutex_unlock(&ps->lock) - -#elif LJ_PROFILE_WTHREAD - -#define WIN32_LEAN_AND_MEAN -#if LJ_TARGET_XBOX360 -#include -#include -#else -#include -#endif -typedef unsigned int (WINAPI *WMM_TPFUNC)(unsigned int); -#define profile_lock(ps) EnterCriticalSection(&ps->lock) -#define profile_unlock(ps) LeaveCriticalSection(&ps->lock) - -#endif - -/* Profiler state. */ -typedef struct ProfileState { - global_State *g; /* VM state that started the profiler. */ - luaJIT_profile_callback cb; /* Profiler callback. */ - void *data; /* Profiler callback data. */ - SBuf sb; /* String buffer for stack dumps. */ - int interval; /* Sample interval in milliseconds. */ - int samples; /* Number of samples for next callback. */ - int vmstate; /* VM state when profile timer triggered. */ -#if LJ_PROFILE_SIGPROF - struct sigaction oldsa; /* Previous SIGPROF state. */ -#elif LJ_PROFILE_PTHREAD - pthread_mutex_t lock; /* g->hookmask update lock. */ - pthread_t thread; /* Timer thread. */ - int abort; /* Abort timer thread. */ -#elif LJ_PROFILE_WTHREAD -#if LJ_TARGET_WINDOWS - HINSTANCE wmm; /* WinMM library handle. */ - WMM_TPFUNC wmm_tbp; /* WinMM timeBeginPeriod function. */ - WMM_TPFUNC wmm_tep; /* WinMM timeEndPeriod function. */ -#endif - CRITICAL_SECTION lock; /* g->hookmask update lock. */ - HANDLE thread; /* Timer thread. */ - int abort; /* Abort timer thread. */ -#endif -} ProfileState; - -/* Sadly, we have to use a static profiler state. -** -** The SIGPROF variant needs a static pointer to the global state, anyway. -** And it would be hard to extend for multiple threads. You can still use -** multiple VMs in multiple threads, but only profile one at a time. -*/ -static ProfileState profile_state; - -/* Default sample interval in milliseconds. */ -#define LJ_PROFILE_INTERVAL_DEFAULT 10 - -/* -- Profiler/hook interaction ------------------------------------------- */ - -#if !LJ_PROFILE_SIGPROF -void LJ_FASTCALL lj_profile_hook_enter(global_State *g) -{ - ProfileState *ps = &profile_state; - if (ps->g) { - profile_lock(ps); - hook_enter(g); - profile_unlock(ps); - } else { - hook_enter(g); - } -} - -void LJ_FASTCALL lj_profile_hook_leave(global_State *g) -{ - ProfileState *ps = &profile_state; - if (ps->g) { - profile_lock(ps); - hook_leave(g); - profile_unlock(ps); - } else { - hook_leave(g); - } -} -#endif - -/* -- Profile callbacks --------------------------------------------------- */ - -/* Callback from profile hook (HOOK_PROFILE already cleared). */ -void LJ_FASTCALL lj_profile_interpreter(lua_State *L) -{ - ProfileState *ps = &profile_state; - global_State *g = G(L); - uint8_t mask; - profile_lock(ps); - mask = (g->hookmask & ~HOOK_PROFILE); - if (!(mask & HOOK_VMEVENT)) { - int samples = ps->samples; - ps->samples = 0; - g->hookmask = HOOK_VMEVENT; - lj_dispatch_update(g); - profile_unlock(ps); - ps->cb(ps->data, L, samples, ps->vmstate); /* Invoke user callback. */ - profile_lock(ps); - mask |= (g->hookmask & HOOK_PROFILE); - } - g->hookmask = mask; - lj_dispatch_update(g); - profile_unlock(ps); -} - -/* Trigger profile hook. Asynchronous call from OS-specific profile timer. */ -static void profile_trigger(ProfileState *ps) -{ - global_State *g = ps->g; - uint8_t mask; - profile_lock(ps); - ps->samples++; /* Always increment number of samples. */ - mask = g->hookmask; - if (!(mask & (HOOK_PROFILE|HOOK_VMEVENT))) { /* Set profile hook. */ - int st = g->vmstate; - ps->vmstate = st >= 0 ? 'N' : - st == ~LJ_VMST_INTERP ? 'I' : - st == ~LJ_VMST_C ? 'C' : - st == ~LJ_VMST_GC ? 'G' : 'J'; - g->hookmask = (mask | HOOK_PROFILE); - lj_dispatch_update(g); - } - profile_unlock(ps); -} - -/* -- OS-specific profile timer handling ---------------------------------- */ - -#if LJ_PROFILE_SIGPROF - -/* SIGPROF handler. */ -static void profile_signal(int sig) -{ - UNUSED(sig); - profile_trigger(&profile_state); -} - -/* Start profiling timer. */ -static void profile_timer_start(ProfileState *ps) -{ - int interval = ps->interval; - struct itimerval tm; - struct sigaction sa; - tm.it_value.tv_sec = tm.it_interval.tv_sec = interval / 1000; - tm.it_value.tv_usec = tm.it_interval.tv_usec = (interval % 1000) * 1000; - setitimer(ITIMER_PROF, &tm, NULL); - sa.sa_flags = SA_RESTART; - sa.sa_handler = profile_signal; - sigemptyset(&sa.sa_mask); - sigaction(SIGPROF, &sa, &ps->oldsa); -} - -/* Stop profiling timer. */ -static void profile_timer_stop(ProfileState *ps) -{ - struct itimerval tm; - tm.it_value.tv_sec = tm.it_interval.tv_sec = 0; - tm.it_value.tv_usec = tm.it_interval.tv_usec = 0; - setitimer(ITIMER_PROF, &tm, NULL); - sigaction(SIGPROF, &ps->oldsa, NULL); -} - -#elif LJ_PROFILE_PTHREAD - -/* POSIX timer thread. */ -static void *profile_thread(ProfileState *ps) -{ - int interval = ps->interval; -#if !LJ_TARGET_PS3 - struct timespec ts; - ts.tv_sec = interval / 1000; - ts.tv_nsec = (interval % 1000) * 1000000; -#endif - while (1) { -#if LJ_TARGET_PS3 - sys_timer_usleep(interval * 1000); -#else - nanosleep(&ts, NULL); -#endif - if (ps->abort) break; - profile_trigger(ps); - } - return NULL; -} - -/* Start profiling timer thread. */ -static void profile_timer_start(ProfileState *ps) -{ - pthread_mutex_init(&ps->lock, 0); - ps->abort = 0; - pthread_create(&ps->thread, NULL, (void *(*)(void *))profile_thread, ps); -} - -/* Stop profiling timer thread. */ -static void profile_timer_stop(ProfileState *ps) -{ - ps->abort = 1; - pthread_join(ps->thread, NULL); - pthread_mutex_destroy(&ps->lock); -} - -#elif LJ_PROFILE_WTHREAD - -/* Windows timer thread. */ -static DWORD WINAPI profile_thread(void *psx) -{ - ProfileState *ps = (ProfileState *)psx; - int interval = ps->interval; -#if LJ_TARGET_WINDOWS - ps->wmm_tbp(interval); -#endif - while (1) { - Sleep(interval); - if (ps->abort) break; - profile_trigger(ps); - } -#if LJ_TARGET_WINDOWS - ps->wmm_tep(interval); -#endif - return 0; -} - -/* Start profiling timer thread. */ -static void profile_timer_start(ProfileState *ps) -{ -#if LJ_TARGET_WINDOWS - if (!ps->wmm) { /* Load WinMM library on-demand. */ - ps->wmm = LoadLibraryExA("winmm.dll", NULL, 0); - if (ps->wmm) { - ps->wmm_tbp = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeBeginPeriod"); - ps->wmm_tep = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeEndPeriod"); - if (!ps->wmm_tbp || !ps->wmm_tep) { - ps->wmm = NULL; - return; - } - } - } -#endif - InitializeCriticalSection(&ps->lock); - ps->abort = 0; - ps->thread = CreateThread(NULL, 0, profile_thread, ps, 0, NULL); -} - -/* Stop profiling timer thread. */ -static void profile_timer_stop(ProfileState *ps) -{ - ps->abort = 1; - WaitForSingleObject(ps->thread, INFINITE); - DeleteCriticalSection(&ps->lock); -} - -#endif - -/* -- Public profiling API ------------------------------------------------ */ - -/* Start profiling. */ -LUA_API void luaJIT_profile_start(lua_State *L, const char *mode, - luaJIT_profile_callback cb, void *data) -{ - ProfileState *ps = &profile_state; - int interval = LJ_PROFILE_INTERVAL_DEFAULT; - while (*mode) { - int m = *mode++; - switch (m) { - case 'i': - interval = 0; - while (*mode >= '0' && *mode <= '9') - interval = interval * 10 + (*mode++ - '0'); - if (interval <= 0) interval = 1; - break; -#if LJ_HASJIT - case 'l': case 'f': - L2J(L)->prof_mode = m; - lj_trace_flushall(L); - break; -#endif - default: /* Ignore unknown mode chars. */ - break; - } - } - if (ps->g) { - luaJIT_profile_stop(L); - if (ps->g) return; /* Profiler in use by another VM. */ - } - ps->g = G(L); - ps->interval = interval; - ps->cb = cb; - ps->data = data; - ps->samples = 0; - lj_buf_init(L, &ps->sb); - profile_timer_start(ps); -} - -/* Stop profiling. */ -LUA_API void luaJIT_profile_stop(lua_State *L) -{ - ProfileState *ps = &profile_state; - global_State *g = ps->g; - if (G(L) == g) { /* Only stop profiler if started by this VM. */ - profile_timer_stop(ps); - g->hookmask &= ~HOOK_PROFILE; - lj_dispatch_update(g); -#if LJ_HASJIT - G2J(g)->prof_mode = 0; - lj_trace_flushall(L); -#endif - lj_buf_free(g, &ps->sb); - setmref(ps->sb.b, NULL); - setmref(ps->sb.e, NULL); - ps->g = NULL; - } -} - -/* Return a compact stack dump. */ -LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt, - int depth, size_t *len) -{ - ProfileState *ps = &profile_state; - SBuf *sb = &ps->sb; - setsbufL(sb, L); - lj_buf_reset(sb); - lj_debug_dumpstack(L, sb, fmt, depth); - *len = (size_t)sbuflen(sb); - return sbufB(sb); -} - -#endif diff --git a/src/lj_profile.h b/src/lj_profile.h index 0cccfd78cd..4e630468fa 100644 --- a/src/lj_profile.h +++ b/src/lj_profile.h @@ -8,14 +8,5 @@ #include "lj_obj.h" -#if LJ_HASPROFILE - -LJ_FUNC void LJ_FASTCALL lj_profile_interpreter(lua_State *L); -#if !LJ_PROFILE_SIGPROF -LJ_FUNC void LJ_FASTCALL lj_profile_hook_enter(global_State *g); -LJ_FUNC void LJ_FASTCALL lj_profile_hook_leave(global_State *g); -#endif - -#endif #endif diff --git a/src/lj_record.c b/src/lj_record.c index 9d0469c425..b47b473b87 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -8,21 +8,15 @@ #include "lj_obj.h" -#if LJ_HASJIT #include "lj_err.h" #include "lj_str.h" #include "lj_tab.h" #include "lj_meta.h" #include "lj_frame.h" -#if LJ_HASFFI #include "lj_ctype.h" -#endif #include "lj_bc.h" #include "lj_ff.h" -#if LJ_HASPROFILE -#include "lj_debug.h" -#endif #include "lj_ir.h" #include "lj_jit.h" #include "lj_ircall.h" @@ -103,31 +97,23 @@ static void rec_check_slots(jit_State *J) } if (s == 0) { lua_assert(tref_isfunc(tr)); -#if LJ_FR2 } else if (s == 1) { lua_assert((tr & ~TREF_FRAME) == 0); -#endif } else if ((tr & TREF_FRAME)) { GCfunc *fn = gco2func(frame_gc(tv)); BCReg delta = (BCReg)(tv - frame_prev(tv)); -#if LJ_FR2 if (ref) lua_assert(ir_knum(ir)->u64 == tv->u64); tr = J->slot[s-1]; ir = IR(tref_ref(tr)); -#endif lua_assert(tref_isfunc(tr)); if (tref_isk(tr)) lua_assert(fn == ir_kfunc(ir)); lua_assert(s > delta + LJ_FR2 ? (J->slot[s-delta] & TREF_FRAME) : (s == delta + LJ_FR2)); depth++; } else if ((tr & TREF_CONT)) { -#if LJ_FR2 if (ref) lua_assert(ir_knum(ir)->u64 == tv->u64); -#else - lua_assert(ir_kptr(ir) == gcrefp(tv->gcr, void)); -#endif lua_assert((J->slot[s+1+LJ_FR2] & TREF_FRAME)); depth++; } else { @@ -609,49 +595,6 @@ static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev) /* -- Record profiler hook checks ----------------------------------------- */ -#if LJ_HASPROFILE - -/* Need to insert profiler hook check? */ -static int rec_profile_need(jit_State *J, GCproto *pt, const BCIns *pc) -{ - GCproto *ppt; - lua_assert(J->prof_mode == 'f' || J->prof_mode == 'l'); - if (!pt) - return 0; - ppt = J->prev_pt; - J->prev_pt = pt; - if (pt != ppt && ppt) { - J->prev_line = -1; - return 1; - } - if (J->prof_mode == 'l') { - BCLine line = lj_debug_line(pt, proto_bcpos(pt, pc)); - BCLine pline = J->prev_line; - J->prev_line = line; - if (pline != line) - return 1; - } - return 0; -} - -static void rec_profile_ins(jit_State *J, const BCIns *pc) -{ - if (J->prof_mode && rec_profile_need(J, J->pt, pc)) { - emitir(IRTG(IR_PROF, IRT_NIL), 0, 0); - lj_snap_add(J); - } -} - -static void rec_profile_ret(jit_State *J) -{ - if (J->prof_mode == 'f') { - emitir(IRTG(IR_PROF, IRT_NIL), 0, 0); - J->prev_pt = NULL; - lj_snap_add(J); - } -} - -#endif /* -- Record calls and returns -------------------------------------------- */ @@ -707,19 +650,13 @@ static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs) lj_trace_err(J, LJ_TRERR_NOMM); for (i = ++nargs; i > LJ_FR2; i--) /* Shift arguments up. */ fbase[i+LJ_FR2] = fbase[i+LJ_FR2-1]; -#if LJ_FR2 fbase[2] = fbase[0]; -#endif fbase[0] = ix.mobj; /* Replace function. */ functv = &ix.mobjv; } kfunc = rec_call_specialize(J, funcV(functv), fbase[0]); -#if LJ_FR2 fbase[0] = kfunc; fbase[1] = TREF_FRAME; -#else - fbase[0] = kfunc | TREF_FRAME; -#endif J->maxslot = (BCReg)nargs; } @@ -919,12 +856,8 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) static BCReg rec_mm_prep(jit_State *J, ASMFunction cont) { BCReg s, top = cont == lj_cont_cat ? J->maxslot : curr_proto(J->L)->framesize; -#if LJ_FR2 J->base[top] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont))); J->base[top+1] = TREF_CONT; -#else - J->base[top] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT; -#endif J->framedepth++; for (s = J->maxslot; s < top; s++) J->base[s] = 0; /* Clear frame gap to avoid resurrecting previous refs. */ @@ -976,13 +909,9 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm) } /* The cdata metatable is treated as immutable. */ if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt; -#if LJ_GC64 /* TODO: fix ARM32 asm_fload(), so we can use this for all archs. */ ix->mt = mix.tab = lj_ir_ggfload(J, IRT_TAB, GG_OFS(g.gcroot[GCROOT_BASEMT+itypemap(&ix->tabv)])); -#else - ix->mt = mix.tab = lj_ir_ktab(J, mt); -#endif goto nocheck; } ix->mt = mt ? mix.tab : TREF_NIL; @@ -1026,9 +955,7 @@ static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm) } ok: base[0] = ix->mobj; -#if LJ_FR2 base[1] = 0; -#endif copyTV(J->L, basev+0, &ix->mobjv); lj_record_call(J, func, 2); return 0; /* No result yet. */ @@ -1156,7 +1083,6 @@ static void rec_mm_comp(jit_State *J, RecordIndex *ix, int op) } } -#if LJ_HASFFI /* Setup call to cdata comparison metamethod. */ static void rec_mm_comp_cdata(jit_State *J, RecordIndex *ix, int op, MMS mm) { @@ -1172,7 +1098,6 @@ static void rec_mm_comp_cdata(jit_State *J, RecordIndex *ix, int op, MMS mm) lj_record_mm_lookup(J, ix, mm); rec_mm_callcomp(J, ix, op); } -#endif /* -- Indexed access ------------------------------------------------------ */ @@ -1545,7 +1470,6 @@ static int rec_upvalue_constify(jit_State *J, GCupval *uvp) if (uvp->immutable) { cTValue *o = uvval(uvp); /* Don't constify objects that may retain large amounts of memory. */ -#if LJ_HASFFI if (tviscdata(o)) { GCcdata *cd = cdataV(o); if (!cdataisv(cd) && !(cd->marked & LJ_GC_CDATA_FIN)) { @@ -1555,9 +1479,6 @@ static int rec_upvalue_constify(jit_State *J, GCupval *uvp) } return 0; } -#else - UNUSED(J); -#endif if (!(tvistab(o) || tvisudata(o) || tvisthread(o))) return 1; } @@ -1579,11 +1500,7 @@ static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val) goto noconstify; kfunc = lj_ir_kfunc(J, J->fn); emitir(IRTG(IR_EQ, IRT_FUNC), fn, kfunc); -#if LJ_FR2 J->base[-2] = kfunc; -#else - J->base[-1] = kfunc | TREF_FRAME; -#endif fn = kfunc; } tr = lj_record_constify(J, uvval(uvp)); @@ -1699,9 +1616,7 @@ static void rec_func_vararg(jit_State *J) if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS) lj_trace_err(J, LJ_TRERR_STACKOV); J->base[vframe-1-LJ_FR2] = J->base[-1-LJ_FR2]; /* Copy function up. */ -#if LJ_FR2 J->base[vframe-1] = TREF_FRAME; -#endif /* Copy fixarg slots up and set their original slots to nil. */ fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot; for (s = 0; s < fixargs; s++) { @@ -1943,15 +1858,11 @@ static void rec_comp_fixup(jit_State *J, const BCIns *pc, int cond) const BCIns *npc = pc + 2 + (cond ? bc_j(jmpins) : 0); SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; /* Set PC to opposite target to avoid re-recording the comp. in side trace. */ -#if LJ_FR2 SnapEntry *flink = &J->cur.snapmap[snap->mapofs + snap->nent]; uint64_t pcbase; memcpy(&pcbase, flink, sizeof(uint64_t)); pcbase = (pcbase & 0xff) | (u64ptr(npc) << 8); memcpy(flink, &pcbase, sizeof(uint64_t)); -#else - J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc); -#endif J->needsnap = 1; if (bc_a(jmpins) < J->maxslot) J->maxslot = bc_a(jmpins); lj_snap_shrink(J); /* Shrink last snapshot if possible. */ @@ -2039,9 +1950,6 @@ void lj_record_ins(jit_State *J) rec_check_ir(J); #endif -#if LJ_HASPROFILE - rec_profile_ins(J, pc); -#endif /* Keep a copy of the runtime values of var/num/str operands. */ #define rav (&ix.valv) @@ -2083,12 +1991,10 @@ void lj_record_ins(jit_State *J) /* -- Comparison ops ---------------------------------------------------- */ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: -#if LJ_HASFFI if (tref_iscdata(ra) || tref_iscdata(rc)) { rec_mm_comp_cdata(J, &ix, op, ((int)op & 2) ? MM_le : MM_lt); break; } -#endif /* Emit nothing for two numeric or string consts. */ if (!(tref_isk2(ra,rc) && tref_isnumber_str(ra) && tref_isnumber_str(rc))) { IRType ta = tref_isinteger(ra) ? IRT_INT : tref_type(ra); @@ -2135,12 +2041,10 @@ void lj_record_ins(jit_State *J) case BC_ISEQS: case BC_ISNES: case BC_ISEQN: case BC_ISNEN: case BC_ISEQP: case BC_ISNEP: -#if LJ_HASFFI if (tref_iscdata(ra) || tref_iscdata(rc)) { rec_mm_comp_cdata(J, &ix, op, MM_eq); break; } -#endif /* Emit nothing for two non-table, non-udata consts. */ if (!(tref_isk2(ra, rc) && !(tref_istab(ra) || tref_isudata(ra)))) { int diff; @@ -2250,11 +2154,7 @@ void lj_record_ins(jit_State *J) case BC_MOV: /* Clear gap of method call to avoid resurrecting previous refs. */ if (ra > J->maxslot) { -#if LJ_FR2 memset(J->base + J->maxslot, 0, (ra - J->maxslot) * sizeof(TRef)); -#else - J->base[ra-1] = 0; -#endif } break; case BC_KSTR: case BC_KNUM: case BC_KPRI: @@ -2269,11 +2169,9 @@ void lj_record_ins(jit_State *J) J->base[ra++] = TREF_NIL; if (rc >= J->maxslot) J->maxslot = rc+1; break; -#if LJ_HASFFI case BC_KCDATA: rc = lj_ir_kgc(J, proto_kgc(J->pt, ~(ptrdiff_t)rc), IRT_CDATA); break; -#endif /* -- Upvalue and function ops ------------------------------------------ */ @@ -2364,9 +2262,6 @@ void lj_record_ins(jit_State *J) rc = (BCReg)(J->L->top - J->L->base) - ra + 1; /* fallthrough */ case BC_RET: case BC_RET0: case BC_RET1: -#if LJ_HASPROFILE - rec_profile_ret(J); -#endif lj_record_ret(J, ra, (ptrdiff_t)rc-1); break; @@ -2457,9 +2352,7 @@ void lj_record_ins(jit_State *J) if (bcmode_a(op) == BCMdst && rc) { J->base[ra] = rc; if (ra >= J->maxslot) { -#if LJ_FR2 if (ra > J->maxslot) J->base[ra-1] = 0; -#endif J->maxslot = ra+1; } } @@ -2612,10 +2505,6 @@ void lj_record_setup(jit_State *J) if (1 + J->pt->framesize >= LJ_MAX_JSLOTS) lj_trace_err(J, LJ_TRERR_STACKOV); } -#if LJ_HASPROFILE - J->prev_pt = NULL; - J->prev_line = -1; -#endif #ifdef LUAJIT_ENABLE_CHECKHOOK /* Regularly check for instruction/line hooks from compiled code and ** exit to the interpreter if the hooks are set. @@ -2643,4 +2532,3 @@ void lj_record_setup(jit_State *J) #undef emitir_raw #undef emitir -#endif diff --git a/src/lj_record.h b/src/lj_record.h index 93d374d249..bffa165493 100644 --- a/src/lj_record.h +++ b/src/lj_record.h @@ -9,7 +9,6 @@ #include "lj_obj.h" #include "lj_jit.h" -#if LJ_HASJIT /* Context for recording an indexed load/store. */ typedef struct RecordIndex { TValue tabv; /* Runtime value of table (or indexed object). */ @@ -40,6 +39,5 @@ LJ_FUNC TRef lj_record_idx(jit_State *J, RecordIndex *ix); LJ_FUNC void lj_record_ins(jit_State *J); LJ_FUNC void lj_record_setup(jit_State *J); -#endif #endif diff --git a/src/lj_snap.c b/src/lj_snap.c index bb063c2b65..1fc5b6c107 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c @@ -8,7 +8,6 @@ #include "lj_obj.h" -#if LJ_HASJIT #include "lj_gc.h" #include "lj_tab.h" @@ -21,10 +20,8 @@ #include "lj_trace.h" #include "lj_snap.h" #include "lj_target.h" -#if LJ_HASFFI #include "lj_ctype.h" #include "lj_cdata.h" -#endif /* Pass IR on to next optimization in chain (FOLD). */ #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) @@ -68,7 +65,6 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) for (s = 0; s < nslots; s++) { TRef tr = J->slot[s]; IRRef ref = tref_ref(tr); -#if LJ_FR2 if (s == 1) { /* Ignore slot 1 in LJ_FR2 mode, except if tailcalled. */ if ((tr & TREF_FRAME)) map[n++] = SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL); @@ -79,7 +75,6 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) tr = J->slot[s] = (tr & 0xff0000) | lj_ir_k64(J, IR_KNUM, base[s].u64); ref = tref_ref(tr); } -#endif if (ref) { SnapEntry sn = SNAP_TR(s, tr); IRIns *ir = &J->cur.ir[ref]; @@ -108,31 +103,16 @@ static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot) cTValue *lim = J->L->base - J->baseslot + LJ_FR2; GCfunc *fn = frame_func(frame); cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top; -#if LJ_FR2 uint64_t pcbase = (u64ptr(J->pc) << 8) | (J->baseslot - 2); lua_assert(2 <= J->baseslot && J->baseslot <= 257); memcpy(map, &pcbase, sizeof(uint64_t)); -#else - MSize f = 0; - map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ -#endif while (frame > lim) { /* Backwards traversal of all frames above base. */ if (frame_islua(frame)) { -#if !LJ_FR2 - map[f++] = SNAP_MKPC(frame_pc(frame)); -#endif frame = frame_prevl(frame); } else if (frame_iscont(frame)) { -#if !LJ_FR2 - map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); - map[f++] = SNAP_MKPC(frame_contpc(frame)); -#endif frame = frame_prevd(frame); } else { lua_assert(!frame_isc(frame)); -#if !LJ_FR2 - map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); -#endif frame = frame_prevd(frame); continue; } @@ -140,13 +120,8 @@ static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot) ftop = frame + funcproto(frame_func(frame))->framesize; } *topslot = (uint8_t)(ftop - lim); -#if LJ_FR2 lua_assert(sizeof(SnapEntry) * 2 == sizeof(uint64_t)); return 2; -#else - lua_assert(f == (MSize)(1 + J->framedepth)); - return f; -#endif } /* Take a snapshot of the current stack. */ @@ -635,15 +610,8 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, int32_t *sps = &ex->spill[regsp_spill(rs)]; if (irt_isinteger(t)) { setintV(o, *sps); -#if !LJ_SOFTFP } else if (irt_isnum(t)) { o->u64 = *(uint64_t *)sps; -#endif -#if LJ_64 && !LJ_GC64 - } else if (irt_islightud(t)) { - /* 64 bit lightuserdata which may escape already has the tag bits. */ - o->u64 = *(uint64_t *)sps; -#endif } else { lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t)); @@ -657,15 +625,8 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, return; } else if (irt_isinteger(t)) { setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]); -#if !LJ_SOFTFP } else if (irt_isnum(t)) { setnumV(o, ex->fpr[r-RID_MIN_FPR]); -#endif -#if LJ_64 && !LJ_GC64 - } else if (irt_is64(t)) { - /* 64 bit values that already have the tag bits. */ - o->u64 = ex->gpr[r-RID_MIN_GPR]; -#endif } else if (irt_ispri(t)) { setpriV(o, irt_toitype(t)); } else { @@ -674,7 +635,6 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, } } -#if LJ_HASFFI /* Restore raw data from the trace exit state. */ static void snap_restoredata(GCtrace *T, ExitState *ex, SnapNo snapno, BloomFilter rfilt, @@ -712,19 +672,10 @@ static void snap_restoredata(GCtrace *T, ExitState *ex, return; } src = (int32_t *)&ex->gpr[r-RID_MIN_GPR]; -#if !LJ_SOFTFP if (r >= RID_MAX_GPR) { src = (int32_t *)&ex->fpr[r-RID_MIN_FPR]; -#if LJ_TARGET_PPC - if (sz == 4) { /* PPC FPRs are always doubles. */ - *(float *)dst = (float)*(double *)src; - return; - } -#else if (LJ_BE && sz == 4) src++; -#endif } else -#endif if (LJ_64 && LJ_BE && sz == 4) src++; } } @@ -734,7 +685,6 @@ static void snap_restoredata(GCtrace *T, ExitState *ex, else if (sz == 1) *(int8_t *)dst = (int8_t)*src; else *(int16_t *)dst = (int16_t)*src; } -#endif /* Unsink allocation from the trace exit state. Unsink sunk stores. */ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, @@ -743,7 +693,6 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, { lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW || ir->o == IR_CNEWI); -#if LJ_HASFFI if (ir->o == IR_CNEW || ir->o == IR_CNEWI) { CTState *cts = ctype_cts(J->L); CTypeID id = (CTypeID)T->ir[ir->op1].i; @@ -788,7 +737,6 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, } } } else -#endif { IRIns *irs, *irlast; GCtab *t = ir->o == IR_TNEW ? lj_tab_new(J->L, ir->op1, ir->op2) : @@ -833,9 +781,6 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) SnapEntry *map = &T->snapmap[snap->mapofs]; #if !LJ_FR2 || defined(LUA_USE_ASSERT) SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1-LJ_FR2]; -#endif -#if !LJ_FR2 - ptrdiff_t ftsz0; #endif TValue *frame; BloomFilter rfilt = snap_renamefilter(T, snapno); @@ -853,9 +798,6 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) /* Fill stack slots with data from the registers and spill slots. */ frame = L->base-1-LJ_FR2; -#if !LJ_FR2 - ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */ -#endif for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; if (!(sn & SNAP_NORESTORE)) { @@ -878,18 +820,10 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) TValue tmp; snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp); o->u32.hi = tmp.u32.lo; -#if !LJ_FR2 - } else if ((sn & (SNAP_CONT|SNAP_FRAME))) { - /* Overwrite tag with frame link. */ - setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0); - L->base = o+1; -#endif } } } -#if LJ_FR2 L->base += (map[nent+LJ_BE] & 0xff); -#endif lua_assert(map + nent == flinks); /* Compute current stack top. */ @@ -910,4 +844,3 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) #undef emitir_raw #undef emitir -#endif diff --git a/src/lj_snap.h b/src/lj_snap.h index 2c9ae3d643..509742ea74 100644 --- a/src/lj_snap.h +++ b/src/lj_snap.h @@ -9,7 +9,6 @@ #include "lj_obj.h" #include "lj_jit.h" -#if LJ_HASJIT LJ_FUNC void lj_snap_add(jit_State *J); LJ_FUNC void lj_snap_purge(jit_State *J); LJ_FUNC void lj_snap_shrink(jit_State *J); @@ -29,6 +28,5 @@ static LJ_AINLINE void lj_snap_grow_map(jit_State *J, MSize need) if (LJ_UNLIKELY(need > J->sizesnapmap)) lj_snap_grow_map_(J, need); } -#endif #endif diff --git a/src/lj_state.c b/src/lj_state.c index 3cc0fea5c1..a9544eadd1 100644 --- a/src/lj_state.c +++ b/src/lj_state.c @@ -19,9 +19,7 @@ #include "lj_meta.h" #include "lj_state.h" #include "lj_frame.h" -#if LJ_HASFFI #include "lj_ctype.h" -#endif #include "lj_trace.h" #include "lj_dispatch.h" #include "lj_vm.h" @@ -165,9 +163,7 @@ static void close_state(lua_State *L) lua_assert(gcref(g->gc.root) == obj2gco(L)); lua_assert(g->strnum == 0); lj_trace_freestate(g); -#if LJ_HASFFI lj_ctype_freestate(g); -#endif lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef); lj_buf_free(g, &g->tmpbuf); lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); @@ -180,11 +176,7 @@ static void close_state(lua_State *L) g->allocf(g->allocd, G2GG(g), sizeof(GG_State), 0); } -#if LJ_64 && !LJ_GC64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC)) -lua_State *lj_state_newstate(lua_Alloc f, void *ud) -#else LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) -#endif { GG_State *GG = (GG_State *)f(ud, NULL, 0, sizeof(GG_State)); lua_State *L = &GG->L; @@ -207,9 +199,6 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) setnilV(registry(L)); setnilV(&g->nilnode.val); setnilV(&g->nilnode.key); -#if !LJ_GC64 - setmref(g->nilnode.freetop, &g->nilnode); -#endif lj_buf_init(NULL, &g->tmpbuf); g->gc.state = GCSpause; setgcref(g->gc.root, obj2gco(L)); @@ -243,17 +232,12 @@ LUA_API void lua_close(lua_State *L) global_State *g = G(L); int i; L = mainthread(g); /* Only the main thread can be closed. */ -#if LJ_HASPROFILE - luaJIT_profile_stop(L); -#endif setgcrefnull(g->cur_L); lj_func_closeuv(L, tvref(L->stack)); lj_gc_separateudata(g, 1); /* Separate udata which have GC metamethods. */ -#if LJ_HASJIT G2J(g)->flags &= ~JIT_F_ON; G2J(g)->state = LJ_TRACE_IDLE; lj_dispatch_update(g); -#endif for (i = 0;;) { hook_enter(g); L->status = 0; diff --git a/src/lj_state.h b/src/lj_state.h index 02a0eafa33..8294baa564 100644 --- a/src/lj_state.h +++ b/src/lj_state.h @@ -28,8 +28,5 @@ static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need) LJ_FUNC lua_State *lj_state_new(lua_State *L); LJ_FUNC void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L); -#if LJ_64 && !LJ_GC64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC)) -LJ_FUNC lua_State *lj_state_newstate(lua_Alloc f, void *ud); -#endif #endif diff --git a/src/lj_strfmt.c b/src/lj_strfmt.c index d7893ce981..03050eb98a 100644 --- a/src/lj_strfmt.c +++ b/src/lj_strfmt.c @@ -134,10 +134,8 @@ char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v) *p++ = 'N'; *p++ = 'U'; *p++ = 'L'; *p++ = 'L'; return p; } -#if LJ_64 /* Shorten output for 64 bit pointers. */ n = 2+2*4+((x >> 32) ? 2+2*(lj_fls((uint32_t)(x >> 32))>>3) : 0); -#endif p[0] = '0'; p[1] = 'x'; for (i = n-1; i >= 2; i--, x >>= 4) @@ -181,13 +179,11 @@ SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k) return sb; } -#if LJ_HASJIT /* Add number to buffer. */ SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o) { return lj_strfmt_putfnum(sb, STRFMT_G14, o->n); } -#endif SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v) { @@ -362,7 +358,6 @@ GCstr * LJ_FASTCALL lj_strfmt_number(lua_State *L, cTValue *o) return tvisint(o) ? lj_strfmt_int(L, intV(o)) : lj_strfmt_num(L, o); } -#if LJ_HASJIT /* Convert char value to string. */ GCstr * LJ_FASTCALL lj_strfmt_char(lua_State *L, int c) { @@ -370,7 +365,6 @@ GCstr * LJ_FASTCALL lj_strfmt_char(lua_State *L, int c) buf[0] = c; return lj_str_new(L, buf, 1); } -#endif /* Raw conversion of object to string. */ GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o) diff --git a/src/lj_strfmt.h b/src/lj_strfmt.h index 6e1d9017e6..1496ace017 100644 --- a/src/lj_strfmt.h +++ b/src/lj_strfmt.h @@ -90,9 +90,7 @@ LJ_FUNC const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp); /* Unformatted conversions to buffer. */ LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k); -#if LJ_HASJIT LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o); -#endif LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v); LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str); @@ -108,9 +106,7 @@ LJ_FUNC SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat, GCstr *str); LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k); LJ_FUNCA GCstr * LJ_FASTCALL lj_strfmt_num(lua_State *L, cTValue *o); LJ_FUNCA GCstr * LJ_FASTCALL lj_strfmt_number(lua_State *L, cTValue *o); -#if LJ_HASJIT LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_char(lua_State *L, int c); -#endif LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o); /* Internal string formatting. */ diff --git a/src/lj_strfmt_num.c b/src/lj_strfmt_num.c index 9271f68a11..b80ad7b4a1 100644 --- a/src/lj_strfmt_num.c +++ b/src/lj_strfmt_num.c @@ -112,13 +112,8 @@ static char *lj_strfmt_wuint9(char *p, uint32_t u) ** enough digits to make both %.99e and %.99f do the right thing. */ -#if LJ_64 #define ND_MUL2K_MAX_SHIFT 29 #define ND_MUL2K_DIV1E9(val) ((uint32_t)((val) / 1000000000)) -#else -#define ND_MUL2K_MAX_SHIFT 11 -#define ND_MUL2K_DIV1E9(val) ((uint32_t)((val) >> 9) / 1953125) -#endif /* Multiply nd by 2^k and add carry_in (ndlo is assumed to be zero). */ static uint32_t nd_mul2k(uint32_t* nd, uint32_t ndhi, uint32_t k, diff --git a/src/lj_tab.c b/src/lj_tab.c index 47c0cfd34a..25a9b0fe6f 100644 --- a/src/lj_tab.c +++ b/src/lj_tab.c @@ -28,12 +28,8 @@ static LJ_AINLINE Node *hashmask(const GCtab *t, uint32_t hash) #define hashlohi(t, lo, hi) hashmask((t), hashrot((lo), (hi))) #define hashnum(t, o) hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1)) -#if LJ_GC64 #define hashgcref(t, r) \ hashlohi((t), (uint32_t)gcrefu(r), (uint32_t)(gcrefu(r) >> 32)) -#else -#define hashgcref(t, r) hashlohi((t), gcrefu(r), gcrefu(r) + HASH_BIAS) -#endif /* Hash an arbitrary key and return its anchor position in the hash table. */ static Node *hashkey(const GCtab *t, cTValue *key) @@ -114,9 +110,7 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits) t->hmask = 0; nilnode = &G(L)->nilnode; setmref(t->node, nilnode); -#if LJ_GC64 setmref(t->freetop, nilnode); -#endif } else { /* Otherwise separately allocate the array part. */ Node *nilnode; t = lj_mem_newobj(L, GCtab); @@ -129,9 +123,7 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits) t->hmask = 0; nilnode = &G(L)->nilnode; setmref(t->node, nilnode); -#if LJ_GC64 setmref(t->freetop, nilnode); -#endif if (asize > 0) { if (asize > LJ_MAX_ASIZE) lj_err_msg(L, LJ_ERR_TABOV); @@ -169,7 +161,6 @@ GCtab *lj_tab_new_ah(lua_State *L, int32_t a, int32_t h) return lj_tab_new(L, (uint32_t)(a > 0 ? a+1 : 0), hsize2hbits(h)); } -#if LJ_HASJIT GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize) { GCtab *t = newtab(L, ahsize & 0xffffff, ahsize >> 24); @@ -177,7 +168,6 @@ GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize) if (t->hmask > 0) clearhpart(t); return t; } -#endif /* Duplicate a table. */ GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt) @@ -278,9 +268,7 @@ void lj_tab_resize(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits) } else { global_State *g = G(L); setmref(t->node, &g->nilnode); -#if LJ_GC64 setmref(t->freetop, &g->nilnode); -#endif t->hmask = 0; } if (asize < oldasize) { /* Array part shrinks? */ @@ -385,12 +373,10 @@ static void rehashtab(lua_State *L, GCtab *t, cTValue *ek) lj_tab_resize(L, t, asize, hsize2hbits(total)); } -#if LJ_HASFFI void lj_tab_rehash(lua_State *L, GCtab *t) { rehashtab(L, t, niltv(L)); } -#endif void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize) { diff --git a/src/lj_tab.h b/src/lj_tab.h index 71e34945e8..c79106c246 100644 --- a/src/lj_tab.h +++ b/src/lj_tab.h @@ -17,17 +17,10 @@ /* Scramble the bits of numbers and pointers. */ static LJ_AINLINE uint32_t hashrot(uint32_t lo, uint32_t hi) { -#if LJ_TARGET_X86ORX64 /* Prefer variant that compiles well for a 2-operand CPU. */ lo ^= hi; hi = lj_rol(hi, HASH_ROT1); lo -= hi; hi = lj_rol(hi, HASH_ROT2); hi ^= lo; hi -= lj_rol(lo, HASH_ROT3); -#else - lo ^= hi; - lo = lo - lj_rol(hi, HASH_ROT1); - hi = lo ^ lj_rol(hi, HASH_ROT1 + HASH_ROT2); - hi = hi - lj_rol(lo, HASH_ROT3); -#endif return hi; } @@ -35,15 +28,11 @@ static LJ_AINLINE uint32_t hashrot(uint32_t lo, uint32_t hi) LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits); LJ_FUNC GCtab *lj_tab_new_ah(lua_State *L, int32_t a, int32_t h); -#if LJ_HASJIT LJ_FUNC GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize); -#endif LJ_FUNCA GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt); LJ_FUNC void LJ_FASTCALL lj_tab_clear(GCtab *t); LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t); -#if LJ_HASFFI LJ_FUNC void lj_tab_rehash(lua_State *L, GCtab *t); -#endif LJ_FUNC void lj_tab_resize(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits); LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize); diff --git a/src/lj_target.h b/src/lj_target.h index 8dcae957f0..14bd9ca3bb 100644 --- a/src/lj_target.h +++ b/src/lj_target.h @@ -55,11 +55,7 @@ typedef uint32_t RegSP; /* Bitset for registers. 32 registers suffice for most architectures. ** Note that one set holds bits for both GPRs and FPRs. */ -#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 -typedef uint64_t RegSet; -#else typedef uint32_t RegSet; -#endif #define RID2RSET(r) (((RegSet)1) << (r)) #define RSET_EMPTY ((RegSet)0) @@ -69,13 +65,8 @@ typedef uint32_t RegSet; #define rset_set(rs, r) (rs |= RID2RSET(r)) #define rset_clear(rs, r) (rs &= ~RID2RSET(r)) #define rset_exclude(rs, r) (rs & ~RID2RSET(r)) -#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 -#define rset_picktop(rs) ((Reg)(__builtin_clzll(rs)^63)) -#define rset_pickbot(rs) ((Reg)__builtin_ctzll(rs)) -#else #define rset_picktop(rs) ((Reg)lj_fls(rs)) #define rset_pickbot(rs) ((Reg)lj_ffs(rs)) -#endif /* -- Register allocation cost -------------------------------------------- */ @@ -134,19 +125,7 @@ typedef uint32_t RegCost; /* -- Target-specific definitions ----------------------------------------- */ -#if LJ_TARGET_X86ORX64 #include "lj_target_x86.h" -#elif LJ_TARGET_ARM -#include "lj_target_arm.h" -#elif LJ_TARGET_ARM64 -#include "lj_target_arm64.h" -#elif LJ_TARGET_PPC -#include "lj_target_ppc.h" -#elif LJ_TARGET_MIPS -#include "lj_target_mips.h" -#else -#error "Missing include for target CPU" -#endif #ifdef EXITSTUBS_PER_GROUP /* Return the address of an exit stub. */ diff --git a/src/lj_target_arm.h b/src/lj_target_arm.h deleted file mode 100644 index 5551b1f1ce..0000000000 --- a/src/lj_target_arm.h +++ /dev/null @@ -1,270 +0,0 @@ -/* -** Definitions for ARM CPUs. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -#ifndef _LJ_TARGET_ARM_H -#define _LJ_TARGET_ARM_H - -/* -- Registers IDs ------------------------------------------------------- */ - -#define GPRDEF(_) \ - _(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \ - _(R8) _(R9) _(R10) _(R11) _(R12) _(SP) _(LR) _(PC) -#if LJ_SOFTFP -#define FPRDEF(_) -#else -#define FPRDEF(_) \ - _(D0) _(D1) _(D2) _(D3) _(D4) _(D5) _(D6) _(D7) \ - _(D8) _(D9) _(D10) _(D11) _(D12) _(D13) _(D14) _(D15) -#endif -#define VRIDDEF(_) - -#define RIDENUM(name) RID_##name, - -enum { - GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ - FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ - RID_MAX, - RID_TMP = RID_LR, - - /* Calling conventions. */ - RID_RET = RID_R0, - RID_RETLO = RID_R0, - RID_RETHI = RID_R1, -#if LJ_SOFTFP - RID_FPRET = RID_R0, -#else - RID_FPRET = RID_D0, -#endif - - /* These definitions must match with the *.dasc file(s): */ - RID_BASE = RID_R9, /* Interpreter BASE. */ - RID_LPC = RID_R6, /* Interpreter PC. */ - RID_DISPATCH = RID_R7, /* Interpreter DISPATCH table. */ - RID_LREG = RID_R8, /* Interpreter L. */ - - /* Register ranges [min, max) and number of registers. */ - RID_MIN_GPR = RID_R0, - RID_MAX_GPR = RID_PC+1, - RID_MIN_FPR = RID_MAX_GPR, -#if LJ_SOFTFP - RID_MAX_FPR = RID_MIN_FPR, -#else - RID_MAX_FPR = RID_D15+1, -#endif - RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, - RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR -}; - -#define RID_NUM_KREF RID_NUM_GPR -#define RID_MIN_KREF RID_R0 - -/* -- Register sets ------------------------------------------------------- */ - -/* Make use of all registers, except sp, lr and pc. */ -#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_R12+1)) -#define RSET_GPREVEN \ - (RID2RSET(RID_R0)|RID2RSET(RID_R2)|RID2RSET(RID_R4)|RID2RSET(RID_R6)| \ - RID2RSET(RID_R8)|RID2RSET(RID_R10)) -#define RSET_GPRODD \ - (RID2RSET(RID_R1)|RID2RSET(RID_R3)|RID2RSET(RID_R5)|RID2RSET(RID_R7)| \ - RID2RSET(RID_R9)|RID2RSET(RID_R11)) -#if LJ_SOFTFP -#define RSET_FPR 0 -#else -#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) -#endif -#define RSET_ALL (RSET_GPR|RSET_FPR) -#define RSET_INIT RSET_ALL - -/* ABI-specific register sets. lr is an implicit scratch register. */ -#define RSET_SCRATCH_GPR_ (RSET_RANGE(RID_R0, RID_R3+1)|RID2RSET(RID_R12)) -#ifdef __APPLE__ -#define RSET_SCRATCH_GPR (RSET_SCRATCH_GPR_|RID2RSET(RID_R9)) -#else -#define RSET_SCRATCH_GPR RSET_SCRATCH_GPR_ -#endif -#if LJ_SOFTFP -#define RSET_SCRATCH_FPR 0 -#else -#define RSET_SCRATCH_FPR (RSET_RANGE(RID_D0, RID_D7+1)) -#endif -#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) -#define REGARG_FIRSTGPR RID_R0 -#define REGARG_LASTGPR RID_R3 -#define REGARG_NUMGPR 4 -#if LJ_ABI_SOFTFP -#define REGARG_FIRSTFPR 0 -#define REGARG_LASTFPR 0 -#define REGARG_NUMFPR 0 -#else -#define REGARG_FIRSTFPR RID_D0 -#define REGARG_LASTFPR RID_D7 -#define REGARG_NUMFPR 8 -#endif - -/* -- Spill slots --------------------------------------------------------- */ - -/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. -** -** SPS_FIXED: Available fixed spill slots in interpreter frame. -** This definition must match with the *.dasc file(s). -** -** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots. -*/ -#define SPS_FIXED 2 -#define SPS_FIRST 2 - -#define SPOFS_TMP 0 - -#define sps_scale(slot) (4 * (int32_t)(slot)) -#define sps_align(slot) (((slot) - SPS_FIXED + 1) & ~1) - -/* -- Exit state ---------------------------------------------------------- */ - -/* This definition must match with the *.dasc file(s). */ -typedef struct { -#if !LJ_SOFTFP - lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ -#endif - int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ - int32_t spill[256]; /* Spill slots. */ -} ExitState; - -/* PC after instruction that caused an exit. Used to find the trace number. */ -#define EXITSTATE_PCREG RID_PC -/* Highest exit + 1 indicates stack check. */ -#define EXITSTATE_CHECKEXIT 1 - -#define EXITSTUB_SPACING 4 -#define EXITSTUBS_PER_GROUP 32 - -/* -- Instructions -------------------------------------------------------- */ - -/* Instruction fields. */ -#define ARMF_CC(ai, cc) (((ai) ^ ARMI_CCAL) | ((cc) << 28)) -#define ARMF_N(r) ((r) << 16) -#define ARMF_D(r) ((r) << 12) -#define ARMF_S(r) ((r) << 8) -#define ARMF_M(r) (r) -#define ARMF_SH(sh, n) (((sh) << 5) | ((n) << 7)) -#define ARMF_RSH(sh, r) (0x10 | ((sh) << 5) | ARMF_S(r)) - -typedef enum ARMIns { - ARMI_CCAL = 0xe0000000, - ARMI_S = 0x000100000, - ARMI_K12 = 0x02000000, - ARMI_KNEG = 0x00200000, - ARMI_LS_W = 0x00200000, - ARMI_LS_U = 0x00800000, - ARMI_LS_P = 0x01000000, - ARMI_LS_R = 0x02000000, - ARMI_LSX_I = 0x00400000, - - ARMI_AND = 0xe0000000, - ARMI_EOR = 0xe0200000, - ARMI_SUB = 0xe0400000, - ARMI_RSB = 0xe0600000, - ARMI_ADD = 0xe0800000, - ARMI_ADC = 0xe0a00000, - ARMI_SBC = 0xe0c00000, - ARMI_RSC = 0xe0e00000, - ARMI_TST = 0xe1100000, - ARMI_TEQ = 0xe1300000, - ARMI_CMP = 0xe1500000, - ARMI_CMN = 0xe1700000, - ARMI_ORR = 0xe1800000, - ARMI_MOV = 0xe1a00000, - ARMI_BIC = 0xe1c00000, - ARMI_MVN = 0xe1e00000, - - ARMI_NOP = 0xe1a00000, - - ARMI_MUL = 0xe0000090, - ARMI_SMULL = 0xe0c00090, - - ARMI_LDR = 0xe4100000, - ARMI_LDRB = 0xe4500000, - ARMI_LDRH = 0xe01000b0, - ARMI_LDRSB = 0xe01000d0, - ARMI_LDRSH = 0xe01000f0, - ARMI_LDRD = 0xe00000d0, - ARMI_STR = 0xe4000000, - ARMI_STRB = 0xe4400000, - ARMI_STRH = 0xe00000b0, - ARMI_STRD = 0xe00000f0, - ARMI_PUSH = 0xe92d0000, - - ARMI_B = 0xea000000, - ARMI_BL = 0xeb000000, - ARMI_BLX = 0xfa000000, - ARMI_BLXr = 0xe12fff30, - - /* ARMv6 */ - ARMI_REV = 0xe6bf0f30, - ARMI_SXTB = 0xe6af0070, - ARMI_SXTH = 0xe6bf0070, - ARMI_UXTB = 0xe6ef0070, - ARMI_UXTH = 0xe6ff0070, - - /* ARMv6T2 */ - ARMI_MOVW = 0xe3000000, - ARMI_MOVT = 0xe3400000, - - /* VFP */ - ARMI_VMOV_D = 0xeeb00b40, - ARMI_VMOV_S = 0xeeb00a40, - ARMI_VMOVI_D = 0xeeb00b00, - - ARMI_VMOV_R_S = 0xee100a10, - ARMI_VMOV_S_R = 0xee000a10, - ARMI_VMOV_RR_D = 0xec500b10, - ARMI_VMOV_D_RR = 0xec400b10, - - ARMI_VADD_D = 0xee300b00, - ARMI_VSUB_D = 0xee300b40, - ARMI_VMUL_D = 0xee200b00, - ARMI_VMLA_D = 0xee000b00, - ARMI_VMLS_D = 0xee000b40, - ARMI_VNMLS_D = 0xee100b00, - ARMI_VDIV_D = 0xee800b00, - - ARMI_VABS_D = 0xeeb00bc0, - ARMI_VNEG_D = 0xeeb10b40, - ARMI_VSQRT_D = 0xeeb10bc0, - - ARMI_VCMP_D = 0xeeb40b40, - ARMI_VCMPZ_D = 0xeeb50b40, - - ARMI_VMRS = 0xeef1fa10, - - ARMI_VCVT_S32_F32 = 0xeebd0ac0, - ARMI_VCVT_S32_F64 = 0xeebd0bc0, - ARMI_VCVT_U32_F32 = 0xeebc0ac0, - ARMI_VCVT_U32_F64 = 0xeebc0bc0, - ARMI_VCVT_F32_S32 = 0xeeb80ac0, - ARMI_VCVT_F64_S32 = 0xeeb80bc0, - ARMI_VCVT_F32_U32 = 0xeeb80a40, - ARMI_VCVT_F64_U32 = 0xeeb80b40, - ARMI_VCVT_F32_F64 = 0xeeb70bc0, - ARMI_VCVT_F64_F32 = 0xeeb70ac0, - - ARMI_VLDR_S = 0xed100a00, - ARMI_VLDR_D = 0xed100b00, - ARMI_VSTR_S = 0xed000a00, - ARMI_VSTR_D = 0xed000b00, -} ARMIns; - -typedef enum ARMShift { - ARMSH_LSL, ARMSH_LSR, ARMSH_ASR, ARMSH_ROR -} ARMShift; - -/* ARM condition codes. */ -typedef enum ARMCC { - CC_EQ, CC_NE, CC_CS, CC_CC, CC_MI, CC_PL, CC_VS, CC_VC, - CC_HI, CC_LS, CC_GE, CC_LT, CC_GT, CC_LE, CC_AL, - CC_HS = CC_CS, CC_LO = CC_CC -} ARMCC; - -#endif diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h deleted file mode 100644 index 3f6bb39be2..0000000000 --- a/src/lj_target_arm64.h +++ /dev/null @@ -1,322 +0,0 @@ -/* -** Definitions for ARM64 CPUs. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -#ifndef _LJ_TARGET_ARM64_H -#define _LJ_TARGET_ARM64_H - -/* -- Registers IDs ------------------------------------------------------- */ - -#define GPRDEF(_) \ - _(X0) _(X1) _(X2) _(X3) _(X4) _(X5) _(X6) _(X7) \ - _(X8) _(X9) _(X10) _(X11) _(X12) _(X13) _(X14) _(X15) \ - _(X16) _(X17) _(X18) _(X19) _(X20) _(X21) _(X22) _(X23) \ - _(X24) _(X25) _(X26) _(X27) _(X28) _(FP) _(LR) _(SP) -#define FPRDEF(_) \ - _(D0) _(D1) _(D2) _(D3) _(D4) _(D5) _(D6) _(D7) \ - _(D8) _(D9) _(D10) _(D11) _(D12) _(D13) _(D14) _(D15) \ - _(D16) _(D17) _(D18) _(D19) _(D20) _(D21) _(D22) _(D23) \ - _(D24) _(D25) _(D26) _(D27) _(D28) _(D29) _(D30) _(D31) -#define VRIDDEF(_) - -#define RIDENUM(name) RID_##name, - -enum { - GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ - FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ - RID_MAX, - RID_TMP = RID_LR, - RID_ZERO = RID_SP, - - /* Calling conventions. */ - RID_RET = RID_X0, - RID_FPRET = RID_D0, - - /* These definitions must match with the *.dasc file(s): */ - RID_BASE = RID_X19, /* Interpreter BASE. */ - RID_LPC = RID_X21, /* Interpreter PC. */ - RID_GL = RID_X22, /* Interpreter GL. */ - RID_LREG = RID_X23, /* Interpreter L. */ - - /* Register ranges [min, max) and number of registers. */ - RID_MIN_GPR = RID_X0, - RID_MAX_GPR = RID_SP+1, - RID_MIN_FPR = RID_MAX_GPR, - RID_MAX_FPR = RID_D31+1, - RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, - RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR -}; - -#define RID_NUM_KREF RID_NUM_GPR -#define RID_MIN_KREF RID_X0 - -/* -- Register sets ------------------------------------------------------- */ - -/* Make use of all registers, except for x18, fp, lr and sp. */ -#define RSET_FIXED \ - (RID2RSET(RID_X18)|RID2RSET(RID_FP)|RID2RSET(RID_LR)|RID2RSET(RID_SP)|\ - RID2RSET(RID_GL)) -#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) -#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) -#define RSET_ALL (RSET_GPR|RSET_FPR) -#define RSET_INIT RSET_ALL - -/* lr is an implicit scratch register. */ -#define RSET_SCRATCH_GPR (RSET_RANGE(RID_X0, RID_X17+1)) -#define RSET_SCRATCH_FPR \ - (RSET_RANGE(RID_D0, RID_D7+1)|RSET_RANGE(RID_D16, RID_D31+1)) -#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) -#define REGARG_FIRSTGPR RID_X0 -#define REGARG_LASTGPR RID_X7 -#define REGARG_NUMGPR 8 -#define REGARG_FIRSTFPR RID_D0 -#define REGARG_LASTFPR RID_D7 -#define REGARG_NUMFPR 8 - -/* -- Spill slots --------------------------------------------------------- */ - -/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. -** -** SPS_FIXED: Available fixed spill slots in interpreter frame. -** This definition must match with the vm_arm64.dasc file. -** Pre-allocate some slots to avoid sp adjust in every root trace. -** -** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots. -*/ -#define SPS_FIXED 4 -#define SPS_FIRST 2 - -#define SPOFS_TMP 0 - -#define sps_scale(slot) (4 * (int32_t)(slot)) -#define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3) - -/* -- Exit state ---------------------------------------------------------- */ - -/* This definition must match with the *.dasc file(s). */ -typedef struct { - lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ - intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ - int32_t spill[256]; /* Spill slots. */ -} ExitState; - -/* Highest exit + 1 indicates stack check. */ -#define EXITSTATE_CHECKEXIT 1 - -/* Return the address of a per-trace exit stub. */ -static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) -{ - while (*p == 0xd503201f) p++; /* Skip A64I_NOP. */ - return p + 3 + exitno; -} -/* Avoid dependence on lj_jit.h if only including lj_target.h. */ -#define exitstub_trace_addr(T, exitno) \ - exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode), (exitno)) - -/* -- Instructions -------------------------------------------------------- */ - -/* Instruction fields. */ -#define A64F_D(r) (r) -#define A64F_N(r) ((r) << 5) -#define A64F_A(r) ((r) << 10) -#define A64F_M(r) ((r) << 16) -#define A64F_IMMS(x) ((x) << 10) -#define A64F_IMMR(x) ((x) << 16) -#define A64F_U16(x) ((x) << 5) -#define A64F_U12(x) ((x) << 10) -#define A64F_S26(x) (x) -#define A64F_S19(x) (((uint32_t)(x) & 0x7ffffu) << 5) -#define A64F_S14(x) ((x) << 5) -#define A64F_S9(x) ((x) << 12) -#define A64F_BIT(x) ((x) << 19) -#define A64F_SH(sh, x) (((sh) << 22) | ((x) << 10)) -#define A64F_EX(ex) (A64I_EX | ((ex) << 13)) -#define A64F_EXSH(ex,x) (A64I_EX | ((ex) << 13) | ((x) << 10)) -#define A64F_FP8(x) ((x) << 13) -#define A64F_CC(cc) ((cc) << 12) -#define A64F_LSL16(x) (((x) / 16) << 21) -#define A64F_BSH(sh) ((sh) << 10) - -typedef enum A64Ins { - A64I_S = 0x20000000, - A64I_X = 0x80000000, - A64I_EX = 0x00200000, - A64I_ON = 0x00200000, - A64I_K12 = 0x1a000000, - A64I_K13 = 0x18000000, - A64I_LS_U = 0x01000000, - A64I_LS_S = 0x00800000, - A64I_LS_R = 0x01200800, - A64I_LS_SH = 0x00001000, - A64I_LS_UXTWx = 0x00004000, - A64I_LS_SXTWx = 0x0000c000, - A64I_LS_SXTXx = 0x0000e000, - A64I_LS_LSLx = 0x00006000, - - A64I_ADDw = 0x0b000000, - A64I_ADDx = 0x8b000000, - A64I_ADDSw = 0x2b000000, - A64I_ADDSx = 0xab000000, - A64I_NEGw = 0x4b0003e0, - A64I_NEGx = 0xcb0003e0, - A64I_SUBw = 0x4b000000, - A64I_SUBx = 0xcb000000, - A64I_SUBSw = 0x6b000000, - A64I_SUBSx = 0xeb000000, - - A64I_MULw = 0x1b007c00, - A64I_MULx = 0x9b007c00, - A64I_SMULL = 0x9b207c00, - - A64I_ANDw = 0x0a000000, - A64I_ANDx = 0x8a000000, - A64I_ANDSw = 0x6a000000, - A64I_ANDSx = 0xea000000, - A64I_EORw = 0x4a000000, - A64I_EORx = 0xca000000, - A64I_ORRw = 0x2a000000, - A64I_ORRx = 0xaa000000, - A64I_TSTw = 0x6a00001f, - A64I_TSTx = 0xea00001f, - - A64I_CMPw = 0x6b00001f, - A64I_CMPx = 0xeb00001f, - A64I_CMNw = 0x2b00001f, - A64I_CMNx = 0xab00001f, - A64I_CCMPw = 0x7a400000, - A64I_CCMPx = 0xfa400000, - A64I_CSELw = 0x1a800000, - A64I_CSELx = 0x9a800000, - - A64I_ASRw = 0x13007c00, - A64I_ASRx = 0x9340fc00, - A64I_LSLx = 0xd3400000, - A64I_LSRx = 0xd340fc00, - A64I_SHRw = 0x1ac02000, - A64I_SHRx = 0x9ac02000, /* lsl/lsr/asr/ror x0, x0, x0 */ - A64I_REVw = 0x5ac00800, - A64I_REVx = 0xdac00c00, - - A64I_EXTRw = 0x13800000, - A64I_EXTRx = 0x93c00000, - A64I_SBFMw = 0x13000000, - A64I_SBFMx = 0x93400000, - A64I_SXTBw = 0x13001c00, - A64I_SXTHw = 0x13003c00, - A64I_SXTW = 0x93407c00, - A64I_UBFMw = 0x53000000, - A64I_UBFMx = 0xd3400000, - A64I_UXTBw = 0x53001c00, - A64I_UXTHw = 0x53003c00, - - A64I_MOVw = 0x2a0003e0, - A64I_MOVx = 0xaa0003e0, - A64I_MVNw = 0x2a2003e0, - A64I_MVNx = 0xaa2003e0, - A64I_MOVKw = 0x72800000, - A64I_MOVKx = 0xf2800000, - A64I_MOVZw = 0x52800000, - A64I_MOVZx = 0xd2800000, - A64I_MOVNw = 0x12800000, - A64I_MOVNx = 0x92800000, - - A64I_LDRB = 0x39400000, - A64I_LDRH = 0x79400000, - A64I_LDRw = 0xb9400000, - A64I_LDRx = 0xf9400000, - A64I_LDRLw = 0x18000000, - A64I_LDRLx = 0x58000000, - A64I_STRB = 0x39000000, - A64I_STRH = 0x79000000, - A64I_STRw = 0xb9000000, - A64I_STRx = 0xf9000000, - A64I_STPw = 0x29000000, - A64I_STPx = 0xa9000000, - A64I_LDPw = 0x29400000, - A64I_LDPx = 0xa9400000, - - A64I_B = 0x14000000, - A64I_BCC = 0x54000000, - A64I_BL = 0x94000000, - A64I_BR = 0xd61f0000, - A64I_BLR = 0xd63f0000, - A64I_TBZ = 0x36000000, - A64I_TBNZ = 0x37000000, - A64I_CBZ = 0x34000000, - A64I_CBNZ = 0x35000000, - - A64I_NOP = 0xd503201f, - - /* FP */ - A64I_FADDd = 0x1e602800, - A64I_FSUBd = 0x1e603800, - A64I_FMADDd = 0x1f400000, - A64I_FMSUBd = 0x1f408000, - A64I_FNMADDd = 0x1f600000, - A64I_FNMSUBd = 0x1f608000, - A64I_FMULd = 0x1e600800, - A64I_FDIVd = 0x1e601800, - A64I_FNEGd = 0x1e614000, - A64I_FABS = 0x1e60c000, - A64I_FSQRTd = 0x1e61c000, - A64I_LDRs = 0xbd400000, - A64I_LDRd = 0xfd400000, - A64I_STRs = 0xbd000000, - A64I_STRd = 0xfd000000, - A64I_LDPs = 0x2d400000, - A64I_LDPd = 0x6d400000, - A64I_STPs = 0x2d000000, - A64I_STPd = 0x6d000000, - A64I_FCMPd = 0x1e602000, - A64I_FCMPZd = 0x1e602008, - A64I_FCSELd = 0x1e600c00, - A64I_FRINTMd = 0x1e654000, - A64I_FRINTPd = 0x1e64c000, - A64I_FRINTZd = 0x1e65c000, - - A64I_FCVT_F32_F64 = 0x1e624000, - A64I_FCVT_F64_F32 = 0x1e22c000, - A64I_FCVT_F32_S32 = 0x1e220000, - A64I_FCVT_F64_S32 = 0x1e620000, - A64I_FCVT_F32_U32 = 0x1e230000, - A64I_FCVT_F64_U32 = 0x1e630000, - A64I_FCVT_F32_S64 = 0x9e220000, - A64I_FCVT_F64_S64 = 0x9e620000, - A64I_FCVT_F32_U64 = 0x9e230000, - A64I_FCVT_F64_U64 = 0x9e630000, - A64I_FCVT_S32_F64 = 0x1e780000, - A64I_FCVT_S32_F32 = 0x1e380000, - A64I_FCVT_U32_F64 = 0x1e790000, - A64I_FCVT_U32_F32 = 0x1e390000, - A64I_FCVT_S64_F64 = 0x9e780000, - A64I_FCVT_S64_F32 = 0x9e380000, - A64I_FCVT_U64_F64 = 0x9e790000, - A64I_FCVT_U64_F32 = 0x9e390000, - - A64I_FMOV_S = 0x1e204000, - A64I_FMOV_D = 0x1e604000, - A64I_FMOV_R_S = 0x1e260000, - A64I_FMOV_S_R = 0x1e270000, - A64I_FMOV_R_D = 0x9e660000, - A64I_FMOV_D_R = 0x9e670000, - A64I_FMOV_DI = 0x1e601000, -} A64Ins; - -typedef enum A64Shift { - A64SH_LSL, A64SH_LSR, A64SH_ASR, A64SH_ROR -} A64Shift; - -typedef enum A64Extend { - A64EX_UXTB, A64EX_UXTH, A64EX_UXTW, A64EX_UXTX, - A64EX_SXTB, A64EX_SXTH, A64EX_SXTW, A64EX_SXTX, -} A64Extend; - -/* ARM condition codes. */ -typedef enum A64CC { - CC_EQ, CC_NE, CC_CS, CC_CC, CC_MI, CC_PL, CC_VS, CC_VC, - CC_HI, CC_LS, CC_GE, CC_LT, CC_GT, CC_LE, CC_AL, - CC_HS = CC_CS, CC_LO = CC_CC -} A64CC; - -#endif diff --git a/src/lj_target_mips.h b/src/lj_target_mips.h deleted file mode 100644 index 740687b355..0000000000 --- a/src/lj_target_mips.h +++ /dev/null @@ -1,377 +0,0 @@ -/* -** Definitions for MIPS CPUs. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -#ifndef _LJ_TARGET_MIPS_H -#define _LJ_TARGET_MIPS_H - -/* -- Registers IDs ------------------------------------------------------- */ - -#define GPRDEF(_) \ - _(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \ - _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15) \ - _(R16) _(R17) _(R18) _(R19) _(R20) _(R21) _(R22) _(R23) \ - _(R24) _(R25) _(SYS1) _(SYS2) _(R28) _(SP) _(R30) _(RA) -#if LJ_SOFTFP -#define FPRDEF(_) -#else -#define FPRDEF(_) \ - _(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \ - _(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \ - _(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \ - _(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31) -#endif -#define VRIDDEF(_) - -#define RIDENUM(name) RID_##name, - -enum { - GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ - FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ - RID_MAX, - RID_ZERO = RID_R0, - RID_TMP = RID_RA, - RID_GP = RID_R28, - - /* Calling conventions. */ - RID_RET = RID_R2, -#if LJ_LE - RID_RETHI = RID_R3, - RID_RETLO = RID_R2, -#else - RID_RETHI = RID_R2, - RID_RETLO = RID_R3, -#endif -#if LJ_SOFTFP - RID_FPRET = RID_R2, -#else - RID_FPRET = RID_F0, -#endif - RID_CFUNCADDR = RID_R25, - - /* These definitions must match with the *.dasc file(s): */ - RID_BASE = RID_R16, /* Interpreter BASE. */ - RID_LPC = RID_R18, /* Interpreter PC. */ - RID_DISPATCH = RID_R19, /* Interpreter DISPATCH table. */ - RID_LREG = RID_R20, /* Interpreter L. */ - RID_JGL = RID_R30, /* On-trace: global_State + 32768. */ - - /* Register ranges [min, max) and number of registers. */ - RID_MIN_GPR = RID_R0, - RID_MAX_GPR = RID_RA+1, - RID_MIN_FPR = RID_MAX_GPR, -#if LJ_SOFTFP - RID_MAX_FPR = RID_MIN_FPR, -#else - RID_MAX_FPR = RID_F31+1, -#endif - RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, - RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR /* Only even regs are used. */ -}; - -#define RID_NUM_KREF RID_NUM_GPR -#define RID_MIN_KREF RID_R0 - -/* -- Register sets ------------------------------------------------------- */ - -/* Make use of all registers, except ZERO, TMP, SP, SYS1, SYS2, JGL and GP. */ -#define RSET_FIXED \ - (RID2RSET(RID_ZERO)|RID2RSET(RID_TMP)|RID2RSET(RID_SP)|\ - RID2RSET(RID_SYS1)|RID2RSET(RID_SYS2)|RID2RSET(RID_JGL)|RID2RSET(RID_GP)) -#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) -#if LJ_SOFTFP -#define RSET_FPR 0 -#else -#if LJ_32 -#define RSET_FPR \ - (RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\ - RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\ - RID2RSET(RID_F16)|RID2RSET(RID_F18)|RID2RSET(RID_F20)|RID2RSET(RID_F22)|\ - RID2RSET(RID_F24)|RID2RSET(RID_F26)|RID2RSET(RID_F28)|RID2RSET(RID_F30)) -#else -#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) -#endif -#endif -#define RSET_ALL (RSET_GPR|RSET_FPR) -#define RSET_INIT RSET_ALL - -#define RSET_SCRATCH_GPR \ - (RSET_RANGE(RID_R1, RID_R15+1)|\ - RID2RSET(RID_R24)|RID2RSET(RID_R25)) -#if LJ_SOFTFP -#define RSET_SCRATCH_FPR 0 -#else -#if LJ_32 -#define RSET_SCRATCH_FPR \ - (RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\ - RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\ - RID2RSET(RID_F16)|RID2RSET(RID_F18)) -#else -#define RSET_SCRATCH_FPR RSET_RANGE(RID_F0, RID_F24) -#endif -#endif -#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) -#define REGARG_FIRSTGPR RID_R4 -#if LJ_32 -#define REGARG_LASTGPR RID_R7 -#define REGARG_NUMGPR 4 -#else -#define REGARG_LASTGPR RID_R11 -#define REGARG_NUMGPR 8 -#endif -#if LJ_ABI_SOFTFP -#define REGARG_FIRSTFPR 0 -#define REGARG_LASTFPR 0 -#define REGARG_NUMFPR 0 -#else -#define REGARG_FIRSTFPR RID_F12 -#if LJ_32 -#define REGARG_LASTFPR RID_F14 -#define REGARG_NUMFPR 2 -#else -#define REGARG_LASTFPR RID_F19 -#define REGARG_NUMFPR 8 -#endif -#endif - -/* -- Spill slots --------------------------------------------------------- */ - -/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. -** -** SPS_FIXED: Available fixed spill slots in interpreter frame. -** This definition must match with the *.dasc file(s). -** -** SPS_FIRST: First spill slot for general use. -*/ -#if LJ_32 -#define SPS_FIXED 5 -#else -#define SPS_FIXED 4 -#endif -#define SPS_FIRST 4 - -#define SPOFS_TMP 0 - -#define sps_scale(slot) (4 * (int32_t)(slot)) -#define sps_align(slot) (((slot) - SPS_FIXED + 1) & ~1) - -/* -- Exit state ---------------------------------------------------------- */ - -/* This definition must match with the *.dasc file(s). */ -typedef struct { -#if !LJ_SOFTFP - lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ -#endif - intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ - int32_t spill[256]; /* Spill slots. */ -} ExitState; - -/* Highest exit + 1 indicates stack check. */ -#define EXITSTATE_CHECKEXIT 1 - -/* Return the address of a per-trace exit stub. */ -static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p) -{ - while (*p == 0x00000000) p++; /* Skip MIPSI_NOP. */ - return p; -} -/* Avoid dependence on lj_jit.h if only including lj_target.h. */ -#define exitstub_trace_addr(T, exitno) \ - exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode)) - -/* -- Instructions -------------------------------------------------------- */ - -/* Instruction fields. */ -#define MIPSF_S(r) ((r) << 21) -#define MIPSF_T(r) ((r) << 16) -#define MIPSF_D(r) ((r) << 11) -#define MIPSF_R(r) ((r) << 21) -#define MIPSF_H(r) ((r) << 16) -#define MIPSF_G(r) ((r) << 11) -#define MIPSF_F(r) ((r) << 6) -#define MIPSF_A(n) ((n) << 6) -#define MIPSF_M(n) ((n) << 11) -#define MIPSF_L(n) ((n) << 6) - -typedef enum MIPSIns { - MIPSI_D = 0x38, - MIPSI_DV = 0x10, - MIPSI_D32 = 0x3c, - /* Integer instructions. */ - MIPSI_MOVE = 0x00000025, - MIPSI_NOP = 0x00000000, - - MIPSI_LI = 0x24000000, - MIPSI_LU = 0x34000000, - MIPSI_LUI = 0x3c000000, - - MIPSI_AND = 0x00000024, - MIPSI_ANDI = 0x30000000, - MIPSI_OR = 0x00000025, - MIPSI_ORI = 0x34000000, - MIPSI_XOR = 0x00000026, - MIPSI_XORI = 0x38000000, - MIPSI_NOR = 0x00000027, - - MIPSI_SLT = 0x0000002a, - MIPSI_SLTU = 0x0000002b, - MIPSI_SLTI = 0x28000000, - MIPSI_SLTIU = 0x2c000000, - - MIPSI_ADDU = 0x00000021, - MIPSI_ADDIU = 0x24000000, - MIPSI_SUB = 0x00000022, - MIPSI_SUBU = 0x00000023, - MIPSI_MUL = 0x70000002, - MIPSI_DIV = 0x0000001a, - MIPSI_DIVU = 0x0000001b, - - MIPSI_MOVZ = 0x0000000a, - MIPSI_MOVN = 0x0000000b, - MIPSI_MFHI = 0x00000010, - MIPSI_MFLO = 0x00000012, - MIPSI_MULT = 0x00000018, - - MIPSI_SLL = 0x00000000, - MIPSI_SRL = 0x00000002, - MIPSI_SRA = 0x00000003, - MIPSI_ROTR = 0x00200002, /* MIPSXXR2 */ - MIPSI_DROTR = 0x0020003a, - MIPSI_DROTR32 = 0x0020003e, - MIPSI_SLLV = 0x00000004, - MIPSI_SRLV = 0x00000006, - MIPSI_SRAV = 0x00000007, - MIPSI_ROTRV = 0x00000046, /* MIPSXXR2 */ - MIPSI_DROTRV = 0x00000056, - - MIPSI_SEB = 0x7c000420, /* MIPSXXR2 */ - MIPSI_SEH = 0x7c000620, /* MIPSXXR2 */ - MIPSI_WSBH = 0x7c0000a0, /* MIPSXXR2 */ - MIPSI_DSBH = 0x7c0000a4, - - MIPSI_B = 0x10000000, - MIPSI_J = 0x08000000, - MIPSI_JAL = 0x0c000000, - MIPSI_JALX = 0x74000000, - MIPSI_JR = 0x00000008, - MIPSI_JALR = 0x0000f809, - - MIPSI_BEQ = 0x10000000, - MIPSI_BNE = 0x14000000, - MIPSI_BLEZ = 0x18000000, - MIPSI_BGTZ = 0x1c000000, - MIPSI_BLTZ = 0x04000000, - MIPSI_BGEZ = 0x04010000, - - /* Load/store instructions. */ - MIPSI_LW = 0x8c000000, - MIPSI_LD = 0xdc000000, - MIPSI_SW = 0xac000000, - MIPSI_SD = 0xfc000000, - MIPSI_LB = 0x80000000, - MIPSI_SB = 0xa0000000, - MIPSI_LH = 0x84000000, - MIPSI_SH = 0xa4000000, - MIPSI_LBU = 0x90000000, - MIPSI_LHU = 0x94000000, - MIPSI_LWC1 = 0xc4000000, - MIPSI_SWC1 = 0xe4000000, - MIPSI_LDC1 = 0xd4000000, - MIPSI_SDC1 = 0xf4000000, - - /* MIPS64 instructions. */ - MIPSI_DADD = 0x0000002c, - MIPSI_DADDI = 0x60000000, - MIPSI_DADDU = 0x0000002d, - MIPSI_DADDIU = 0x64000000, - MIPSI_DSUB = 0x0000002e, - MIPSI_DSUBU = 0x0000002f, - MIPSI_DDIV = 0x0000001e, - MIPSI_DDIVU = 0x0000001f, - MIPSI_DMULT = 0x0000001c, - MIPSI_DMULTU = 0x0000001d, - - MIPSI_DSLL = 0x00000038, - MIPSI_DSRL = 0x0000003a, - MIPSI_DSLLV = 0x00000014, - MIPSI_DSRLV = 0x00000016, - MIPSI_DSRA = 0x0000003b, - MIPSI_DSRAV = 0x00000017, - MIPSI_DSRA32 = 0x0000003f, - MIPSI_DSLL32 = 0x0000003c, - MIPSI_DSRL32 = 0x0000003e, - MIPSI_DSHD = 0x7c000164, - - MIPSI_AADDU = LJ_32 ? MIPSI_ADDU : MIPSI_DADDU, - MIPSI_AADDIU = LJ_32 ? MIPSI_ADDIU : MIPSI_DADDIU, - MIPSI_ASUBU = LJ_32 ? MIPSI_SUBU : MIPSI_DSUBU, - MIPSI_AL = LJ_32 ? MIPSI_LW : MIPSI_LD, - MIPSI_AS = LJ_32 ? MIPSI_SW : MIPSI_SD, - - /* Extract/insert instructions. */ - MIPSI_DEXTM = 0x7c000001, - MIPSI_DEXTU = 0x7c000002, - MIPSI_DEXT = 0x7c000003, - MIPSI_DINSM = 0x7c000005, - MIPSI_DINSU = 0x7c000006, - MIPSI_DINS = 0x7c000007, - - MIPSI_RINT_D = 0x4620001a, - MIPSI_RINT_S = 0x4600001a, - MIPSI_RINT = 0x4400001a, - MIPSI_FLOOR_D = 0x4620000b, - MIPSI_CEIL_D = 0x4620000a, - MIPSI_ROUND_D = 0x46200008, - - /* FP instructions. */ - MIPSI_MOV_S = 0x46000006, - MIPSI_MOV_D = 0x46200006, - MIPSI_MOVT_D = 0x46210011, - MIPSI_MOVF_D = 0x46200011, - - MIPSI_ABS_D = 0x46200005, - MIPSI_NEG_D = 0x46200007, - - MIPSI_ADD_D = 0x46200000, - MIPSI_SUB_D = 0x46200001, - MIPSI_MUL_D = 0x46200002, - MIPSI_DIV_D = 0x46200003, - MIPSI_SQRT_D = 0x46200004, - - MIPSI_ADD_S = 0x46000000, - MIPSI_SUB_S = 0x46000001, - - MIPSI_CVT_D_S = 0x46000021, - MIPSI_CVT_W_S = 0x46000024, - MIPSI_CVT_S_D = 0x46200020, - MIPSI_CVT_W_D = 0x46200024, - MIPSI_CVT_S_W = 0x46800020, - MIPSI_CVT_D_W = 0x46800021, - MIPSI_CVT_S_L = 0x46a00020, - MIPSI_CVT_D_L = 0x46a00021, - - MIPSI_TRUNC_W_S = 0x4600000d, - MIPSI_TRUNC_W_D = 0x4620000d, - MIPSI_TRUNC_L_S = 0x46000009, - MIPSI_TRUNC_L_D = 0x46200009, - MIPSI_FLOOR_W_S = 0x4600000f, - MIPSI_FLOOR_W_D = 0x4620000f, - - MIPSI_MFC1 = 0x44000000, - MIPSI_MTC1 = 0x44800000, - MIPSI_DMTC1 = 0x44a00000, - MIPSI_DMFC1 = 0x44200000, - - MIPSI_BC1F = 0x45000000, - MIPSI_BC1T = 0x45010000, - - MIPSI_C_EQ_D = 0x46200032, - MIPSI_C_OLT_S = 0x46000034, - MIPSI_C_OLT_D = 0x46200034, - MIPSI_C_ULT_D = 0x46200035, - MIPSI_C_OLE_D = 0x46200036, - MIPSI_C_ULE_D = 0x46200037, -} MIPSIns; - -#endif diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h deleted file mode 100644 index c5c991a377..0000000000 --- a/src/lj_target_ppc.h +++ /dev/null @@ -1,280 +0,0 @@ -/* -** Definitions for PPC CPUs. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -#ifndef _LJ_TARGET_PPC_H -#define _LJ_TARGET_PPC_H - -/* -- Registers IDs ------------------------------------------------------- */ - -#define GPRDEF(_) \ - _(R0) _(SP) _(SYS1) _(R3) _(R4) _(R5) _(R6) _(R7) \ - _(R8) _(R9) _(R10) _(R11) _(R12) _(SYS2) _(R14) _(R15) \ - _(R16) _(R17) _(R18) _(R19) _(R20) _(R21) _(R22) _(R23) \ - _(R24) _(R25) _(R26) _(R27) _(R28) _(R29) _(R30) _(R31) -#define FPRDEF(_) \ - _(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \ - _(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \ - _(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \ - _(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31) -#define VRIDDEF(_) - -#define RIDENUM(name) RID_##name, - -enum { - GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ - FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ - RID_MAX, - RID_TMP = RID_R0, - - /* Calling conventions. */ - RID_RET = RID_R3, - RID_RETHI = RID_R3, - RID_RETLO = RID_R4, - RID_FPRET = RID_F1, - - /* These definitions must match with the *.dasc file(s): */ - RID_BASE = RID_R14, /* Interpreter BASE. */ - RID_LPC = RID_R16, /* Interpreter PC. */ - RID_DISPATCH = RID_R17, /* Interpreter DISPATCH table. */ - RID_LREG = RID_R18, /* Interpreter L. */ - RID_JGL = RID_R31, /* On-trace: global_State + 32768. */ - - /* Register ranges [min, max) and number of registers. */ - RID_MIN_GPR = RID_R0, - RID_MAX_GPR = RID_R31+1, - RID_MIN_FPR = RID_F0, - RID_MAX_FPR = RID_F31+1, - RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, - RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR -}; - -#define RID_NUM_KREF RID_NUM_GPR -#define RID_MIN_KREF RID_R0 - -/* -- Register sets ------------------------------------------------------- */ - -/* Make use of all registers, except TMP, SP, SYS1, SYS2 and JGL. */ -#define RSET_FIXED \ - (RID2RSET(RID_TMP)|RID2RSET(RID_SP)|RID2RSET(RID_SYS1)|\ - RID2RSET(RID_SYS2)|RID2RSET(RID_JGL)) -#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) -#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) -#define RSET_ALL (RSET_GPR|RSET_FPR) -#define RSET_INIT RSET_ALL - -#define RSET_SCRATCH_GPR (RSET_RANGE(RID_R3, RID_R12+1)) -#define RSET_SCRATCH_FPR (RSET_RANGE(RID_F0, RID_F13+1)) -#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) -#define REGARG_FIRSTGPR RID_R3 -#define REGARG_LASTGPR RID_R10 -#define REGARG_NUMGPR 8 -#define REGARG_FIRSTFPR RID_F1 -#define REGARG_LASTFPR RID_F8 -#define REGARG_NUMFPR 8 - -/* -- Spill slots --------------------------------------------------------- */ - -/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. -** -** SPS_FIXED: Available fixed spill slots in interpreter frame. -** This definition must match with the *.dasc file(s). -** -** SPS_FIRST: First spill slot for general use. -** [sp+12] tmplo word \ -** [sp+ 8] tmphi word / tmp dword, parameter area for callee -** [sp+ 4] tmpw, LR of callee -** [sp+ 0] stack chain -*/ -#define SPS_FIXED 7 -#define SPS_FIRST 4 - -/* Stack offsets for temporary slots. Used for FP<->int conversions etc. */ -#define SPOFS_TMPW 4 -#define SPOFS_TMP 8 -#define SPOFS_TMPHI 8 -#define SPOFS_TMPLO 12 - -#define sps_scale(slot) (4 * (int32_t)(slot)) -#define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3) - -/* -- Exit state ---------------------------------------------------------- */ - -/* This definition must match with the *.dasc file(s). */ -typedef struct { - lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ - intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ - int32_t spill[256]; /* Spill slots. */ -} ExitState; - -/* Highest exit + 1 indicates stack check. */ -#define EXITSTATE_CHECKEXIT 1 - -/* Return the address of a per-trace exit stub. */ -static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) -{ - while (*p == 0x60000000) p++; /* Skip PPCI_NOP. */ - return p + 3 + exitno; -} -/* Avoid dependence on lj_jit.h if only including lj_target.h. */ -#define exitstub_trace_addr(T, exitno) \ - exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode), (exitno)) - -/* -- Instructions -------------------------------------------------------- */ - -/* Instruction fields. */ -#define PPCF_CC(cc) ((((cc) & 3) << 16) | (((cc) & 4) << 22)) -#define PPCF_T(r) ((r) << 21) -#define PPCF_A(r) ((r) << 16) -#define PPCF_B(r) ((r) << 11) -#define PPCF_C(r) ((r) << 6) -#define PPCF_MB(n) ((n) << 6) -#define PPCF_ME(n) ((n) << 1) -#define PPCF_Y 0x00200000 -#define PPCF_DOT 0x00000001 - -typedef enum PPCIns { - /* Integer instructions. */ - PPCI_MR = 0x7c000378, - PPCI_NOP = 0x60000000, - - PPCI_LI = 0x38000000, - PPCI_LIS = 0x3c000000, - - PPCI_ADD = 0x7c000214, - PPCI_ADDC = 0x7c000014, - PPCI_ADDO = 0x7c000614, - PPCI_ADDE = 0x7c000114, - PPCI_ADDZE = 0x7c000194, - PPCI_ADDME = 0x7c0001d4, - PPCI_ADDI = 0x38000000, - PPCI_ADDIS = 0x3c000000, - PPCI_ADDIC = 0x30000000, - PPCI_ADDICDOT = 0x34000000, - - PPCI_SUBF = 0x7c000050, - PPCI_SUBFC = 0x7c000010, - PPCI_SUBFO = 0x7c000450, - PPCI_SUBFE = 0x7c000110, - PPCI_SUBFZE = 0x7c000190, - PPCI_SUBFME = 0x7c0001d0, - PPCI_SUBFIC = 0x20000000, - - PPCI_NEG = 0x7c0000d0, - - PPCI_AND = 0x7c000038, - PPCI_ANDC = 0x7c000078, - PPCI_NAND = 0x7c0003b8, - PPCI_ANDIDOT = 0x70000000, - PPCI_ANDISDOT = 0x74000000, - - PPCI_OR = 0x7c000378, - PPCI_NOR = 0x7c0000f8, - PPCI_ORI = 0x60000000, - PPCI_ORIS = 0x64000000, - - PPCI_XOR = 0x7c000278, - PPCI_EQV = 0x7c000238, - PPCI_XORI = 0x68000000, - PPCI_XORIS = 0x6c000000, - - PPCI_CMPW = 0x7c000000, - PPCI_CMPLW = 0x7c000040, - PPCI_CMPWI = 0x2c000000, - PPCI_CMPLWI = 0x28000000, - - PPCI_MULLW = 0x7c0001d6, - PPCI_MULLI = 0x1c000000, - PPCI_MULLWO = 0x7c0005d6, - - PPCI_EXTSB = 0x7c000774, - PPCI_EXTSH = 0x7c000734, - - PPCI_SLW = 0x7c000030, - PPCI_SRW = 0x7c000430, - PPCI_SRAW = 0x7c000630, - PPCI_SRAWI = 0x7c000670, - - PPCI_RLWNM = 0x5c000000, - PPCI_RLWINM = 0x54000000, - PPCI_RLWIMI = 0x50000000, - - PPCI_B = 0x48000000, - PPCI_BL = 0x48000001, - PPCI_BC = 0x40800000, - PPCI_BCL = 0x40800001, - PPCI_BCTR = 0x4e800420, - PPCI_BCTRL = 0x4e800421, - - PPCI_CRANDC = 0x4c000102, - PPCI_CRXOR = 0x4c000182, - PPCI_CRAND = 0x4c000202, - PPCI_CREQV = 0x4c000242, - PPCI_CRORC = 0x4c000342, - PPCI_CROR = 0x4c000382, - - PPCI_MFLR = 0x7c0802a6, - PPCI_MTCTR = 0x7c0903a6, - - PPCI_MCRXR = 0x7c000400, - - /* Load/store instructions. */ - PPCI_LWZ = 0x80000000, - PPCI_LBZ = 0x88000000, - PPCI_STW = 0x90000000, - PPCI_STB = 0x98000000, - PPCI_LHZ = 0xa0000000, - PPCI_LHA = 0xa8000000, - PPCI_STH = 0xb0000000, - - PPCI_STWU = 0x94000000, - - PPCI_LFS = 0xc0000000, - PPCI_LFD = 0xc8000000, - PPCI_STFS = 0xd0000000, - PPCI_STFD = 0xd8000000, - - PPCI_LWZX = 0x7c00002e, - PPCI_LBZX = 0x7c0000ae, - PPCI_STWX = 0x7c00012e, - PPCI_STBX = 0x7c0001ae, - PPCI_LHZX = 0x7c00022e, - PPCI_LHAX = 0x7c0002ae, - PPCI_STHX = 0x7c00032e, - - PPCI_LWBRX = 0x7c00042c, - PPCI_STWBRX = 0x7c00052c, - - PPCI_LFSX = 0x7c00042e, - PPCI_LFDX = 0x7c0004ae, - PPCI_STFSX = 0x7c00052e, - PPCI_STFDX = 0x7c0005ae, - - /* FP instructions. */ - PPCI_FMR = 0xfc000090, - PPCI_FNEG = 0xfc000050, - PPCI_FABS = 0xfc000210, - - PPCI_FRSP = 0xfc000018, - PPCI_FCTIWZ = 0xfc00001e, - - PPCI_FADD = 0xfc00002a, - PPCI_FSUB = 0xfc000028, - PPCI_FMUL = 0xfc000032, - PPCI_FDIV = 0xfc000024, - PPCI_FSQRT = 0xfc00002c, - - PPCI_FMADD = 0xfc00003a, - PPCI_FMSUB = 0xfc000038, - PPCI_FNMSUB = 0xfc00003c, - - PPCI_FCMPU = 0xfc000000, - PPCI_FSEL = 0xfc00002e, -} PPCIns; - -typedef enum PPCCC { - CC_GE, CC_LE, CC_NE, CC_NS, CC_LT, CC_GT, CC_EQ, CC_SO -} PPCCC; - -#endif diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index 356f792459..74e0f4eae0 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h @@ -8,19 +8,12 @@ /* -- Registers IDs ------------------------------------------------------- */ -#if LJ_64 #define GPRDEF(_) \ _(EAX) _(ECX) _(EDX) _(EBX) _(ESP) _(EBP) _(ESI) _(EDI) \ _(R8D) _(R9D) _(R10D) _(R11D) _(R12D) _(R13D) _(R14D) _(R15D) #define FPRDEF(_) \ _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) \ _(XMM8) _(XMM9) _(XMM10) _(XMM11) _(XMM12) _(XMM13) _(XMM14) _(XMM15) -#else -#define GPRDEF(_) \ - _(EAX) _(ECX) _(EDX) _(EBX) _(ESP) _(EBP) _(ESI) _(EDI) -#define FPRDEF(_) \ - _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) -#endif #define VRIDDEF(_) \ _(MRM) _(RIP) @@ -36,22 +29,12 @@ enum { /* Calling conventions. */ RID_SP = RID_ESP, RID_RET = RID_EAX, -#if LJ_64 RID_FPRET = RID_XMM0, -#else - RID_RETLO = RID_EAX, - RID_RETHI = RID_EDX, -#endif /* These definitions must match with the *.dasc file(s): */ RID_BASE = RID_EDX, /* Interpreter BASE. */ -#if LJ_64 && !LJ_ABI_WIN RID_LPC = RID_EBX, /* Interpreter PC. */ RID_DISPATCH = RID_R14D, /* Interpreter DISPATCH table. */ -#else - RID_LPC = RID_ESI, /* Interpreter PC. */ - RID_DISPATCH = RID_EBX, /* Interpreter DISPATCH table. */ -#endif /* Register ranges [min, max) and number of registers. */ RID_MIN_GPR = RID_EAX, @@ -72,28 +55,11 @@ enum { #define RSET_ALL (RSET_GPR|RSET_FPR) #define RSET_INIT RSET_ALL -#if LJ_64 /* Note: this requires the use of FORCE_REX! */ #define RSET_GPR8 RSET_GPR -#else -#define RSET_GPR8 (RSET_RANGE(RID_EAX, RID_EBX+1)) -#endif /* ABI-specific register sets. */ #define RSET_ACD (RID2RSET(RID_EAX)|RID2RSET(RID_ECX)|RID2RSET(RID_EDX)) -#if LJ_64 -#if LJ_ABI_WIN -/* Windows x64 ABI. */ -#define RSET_SCRATCH \ - (RSET_ACD|RSET_RANGE(RID_R8D, RID_R11D+1)|RSET_RANGE(RID_XMM0, RID_XMM5+1)) -#define REGARG_GPRS \ - (RID_ECX|((RID_EDX|((RID_R8D|(RID_R9D<<5))<<5))<<5)) -#define REGARG_NUMGPR 4 -#define REGARG_NUMFPR 4 -#define REGARG_FIRSTFPR RID_XMM0 -#define REGARG_LASTFPR RID_XMM3 -#define STACKARG_OFS (4*8) -#else /* The rest of the civilized x64 world has a common ABI. */ #define RSET_SCRATCH \ (RSET_ACD|RSET_RANGE(RID_ESI, RID_R11D+1)|RSET_FPR) @@ -105,21 +71,10 @@ enum { #define REGARG_FIRSTFPR RID_XMM0 #define REGARG_LASTFPR RID_XMM7 #define STACKARG_OFS 0 -#endif -#else -/* Common x86 ABI. */ -#define RSET_SCRATCH (RSET_ACD|RSET_FPR) -#define REGARG_GPRS (RID_ECX|(RID_EDX<<5)) /* Fastcall only. */ -#define REGARG_NUMGPR 2 /* Fastcall only. */ -#define REGARG_NUMFPR 0 -#define STACKARG_OFS 0 -#endif -#if LJ_64 /* Prefer the low 8 regs of each type to reduce REX prefixes. */ #undef rset_picktop #define rset_picktop(rs) (lj_fls(lj_bswap(rs)) ^ 0x18) -#endif /* -- Spill slots --------------------------------------------------------- */ @@ -130,22 +85,8 @@ enum { ** ** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots. */ -#if LJ_64 -#if LJ_ABI_WIN -#define SPS_FIXED (4*2) -#define SPS_FIRST (4*2) /* Don't use callee register save area. */ -#else -#if LJ_GC64 #define SPS_FIXED 2 -#else -#define SPS_FIXED 4 -#endif -#define SPS_FIRST 2 -#endif -#else -#define SPS_FIXED 6 #define SPS_FIRST 2 -#endif #define SPOFS_TMP 0 diff --git a/src/lj_trace.c b/src/lj_trace.c index 80a7f024af..0d139cb174 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -8,7 +8,6 @@ #include "lj_obj.h" -#if LJ_HASJIT #include "lj_gc.h" #include "lj_err.h" @@ -82,41 +81,6 @@ static TraceNo trace_findfree(jit_State *J) memcpy(p, J->cur.field, J->cur.szfield*sizeof(tp)); \ p += J->cur.szfield*sizeof(tp); -#ifdef LUAJIT_USE_PERFTOOLS -/* -** Create symbol table of JIT-compiled code. For use with Linux perf tools. -** Example usage: -** perf record -f -e cycles luajit test.lua -** perf report -s symbol -** rm perf.data /tmp/perf-*.map -*/ -#include -#include - -static void perftools_addtrace(GCtrace *T) -{ - static FILE *fp; - GCproto *pt = &gcref(T->startpt)->pt; - const BCIns *startpc = mref(T->startpc, const BCIns); - const char *name = proto_chunknamestr(pt); - BCLine lineno; - if (name[0] == '@' || name[0] == '=') - name++; - else - name = "(string)"; - lua_assert(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc); - lineno = lj_debug_line(pt, proto_bcpos(pt, startpc)); - if (!fp) { - char fname[40]; - sprintf(fname, "/tmp/perf-%d.map", getpid()); - if (!(fp = fopen(fname, "w"))) return; - setlinebuf(fp); - } - fprintf(fp, "%lx %x TRACE_%d::%s:%u\n", - (long)T->mcode, T->szmcode, T->traceno, name, lineno); -} -#endif - /* Allocate space for copy of T. */ GCtrace * LJ_FASTCALL lj_trace_alloc(lua_State *L, GCtrace *T) { @@ -159,9 +123,6 @@ static void trace_save(jit_State *J, GCtrace *T) setgcrefp(J->trace[T->traceno], T); lj_gc_barriertrace(J2G(J), T->traceno); lj_gdbjit_addtrace(J, T); -#ifdef LUAJIT_USE_PERFTOOLS - perftools_addtrace(T); -#endif } void LJ_FASTCALL lj_trace_free(global_State *g, GCtrace *T) @@ -317,32 +278,10 @@ void lj_trace_initstate(global_State *g) tv[1].u64 = U64x(80000000,00000000); /* Initialize 32/64 bit constants. */ -#if LJ_TARGET_X86ORX64 J->k64[LJ_K64_TOBIT].u64 = U64x(43380000,00000000); -#if LJ_32 - J->k64[LJ_K64_M2P64_31].u64 = U64x(c1e00000,00000000); -#endif J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000); J->k32[LJ_K32_M2P64_31] = LJ_64 ? 0xdf800000 : 0xcf000000; -#endif -#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64 J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000); -#endif -#if LJ_TARGET_PPC - J->k32[LJ_K32_2P52_2P31] = 0x59800004; - J->k32[LJ_K32_2P52] = 0x59800000; -#endif -#if LJ_TARGET_PPC || LJ_TARGET_MIPS - J->k32[LJ_K32_2P31] = 0x4f000000; -#endif -#if LJ_TARGET_MIPS - J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000); -#if LJ_64 - J->k64[LJ_K64_2P63].u64 = U64x(43e00000,00000000); - J->k32[LJ_K32_2P63] = 0x5f000000; - J->k32[LJ_K32_M2P64] = 0xdf800000; -#endif -#endif } /* Free everything associated with the JIT compiler state. */ @@ -788,29 +727,6 @@ static TValue *trace_exit_cp(lua_State *L, lua_CFunction dummy, void *ud) return NULL; } -#ifndef LUAJIT_DISABLE_VMEVENT -/* Push all registers from exit state. */ -static void trace_exit_regs(lua_State *L, ExitState *ex) -{ - int32_t i; - setintV(L->top++, RID_NUM_GPR); - setintV(L->top++, RID_NUM_FPR); - for (i = 0; i < RID_NUM_GPR; i++) { - if (sizeof(ex->gpr[i]) == sizeof(int32_t)) - setintV(L->top++, (int32_t)ex->gpr[i]); - else - setnumV(L->top++, (lua_Number)ex->gpr[i]); - } -#if !LJ_SOFTFP - for (i = 0; i < RID_NUM_FPR; i++) { - setnumV(L->top, ex->fpr[i]); - if (LJ_UNLIKELY(tvisnan(L->top))) - setnanV(L->top); - L->top++; - } -#endif -} -#endif #ifdef EXITSTATE_PCREG /* Determine trace number from pc of exit instruction. */ @@ -906,4 +822,3 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) } } -#endif diff --git a/src/lj_trace.h b/src/lj_trace.h index 22cae741f3..b04ba89ad0 100644 --- a/src/lj_trace.h +++ b/src/lj_trace.h @@ -8,7 +8,6 @@ #include "lj_obj.h" -#if LJ_HASJIT #include "lj_jit.h" #include "lj_dispatch.h" @@ -42,14 +41,5 @@ LJ_FUNCA int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr); #define lj_trace_abort(g) (G2J(g)->state &= ~LJ_TRACE_ACTIVE) #define lj_trace_end(J) (J->state = LJ_TRACE_END) -#else - -#define lj_trace_flushall(L) (UNUSED(L), 0) -#define lj_trace_initstate(g) UNUSED(g) -#define lj_trace_freestate(g) UNUSED(g) -#define lj_trace_abort(g) UNUSED(g) -#define lj_trace_end(J) UNUSED(J) - -#endif #endif diff --git a/src/lj_vm.h b/src/lj_vm.h index 1cc7eed782..85dc99f848 100644 --- a/src/lj_vm.h +++ b/src/lj_vm.h @@ -17,27 +17,14 @@ LJ_ASMF int lj_vm_cpcall(lua_State *L, lua_CFunction func, void *ud, LJ_ASMF int lj_vm_resume(lua_State *L, TValue *base, int nres1, ptrdiff_t ef); LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_c(void *cframe, int errcode); LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_ff(void *cframe); -#if LJ_ABI_WIN && LJ_TARGET_X86 -LJ_ASMF_NORET void LJ_FASTCALL lj_vm_rtlunwind(void *cframe, void *excptrec, - void *unwinder, int errcode); -#endif LJ_ASMF void lj_vm_unwind_c_eh(void); LJ_ASMF void lj_vm_unwind_ff_eh(void); -#if LJ_TARGET_X86ORX64 LJ_ASMF void lj_vm_unwind_rethrow(void); -#endif /* Miscellaneous functions. */ -#if LJ_TARGET_X86ORX64 LJ_ASMF int lj_vm_cpuid(uint32_t f, uint32_t res[4]); -#endif -#if LJ_TARGET_PPC -void lj_vm_cachesync(void *start, void *end); -#endif LJ_ASMF double lj_vm_foldarith(double x, double y, int op); -#if LJ_HASJIT LJ_ASMF double lj_vm_foldfpm(double x, int op); -#endif #if !LJ_ARCH_HASFPU /* Declared in lj_obj.h: LJ_ASMF int32_t lj_vm_tobit(double x); */ #endif @@ -54,53 +41,27 @@ LJ_ASMF void lj_vm_exit_handler(void); LJ_ASMF void lj_vm_exit_interp(void); /* Internal math helper functions. */ -#if LJ_TARGET_PPC || LJ_TARGET_ARM64 || (LJ_TARGET_MIPS && LJ_ABI_SOFTFP) -#define lj_vm_floor floor -#define lj_vm_ceil ceil -#else LJ_ASMF double lj_vm_floor(double); LJ_ASMF double lj_vm_ceil(double); -#if LJ_TARGET_ARM -LJ_ASMF double lj_vm_floor_sf(double); -LJ_ASMF double lj_vm_ceil_sf(double); -#endif -#endif #ifdef LUAJIT_NO_LOG2 LJ_ASMF double lj_vm_log2(double); #else #define lj_vm_log2 log2 #endif -#if !(defined(_LJ_DISPATCH_H) && LJ_TARGET_MIPS) LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t); -#endif -#if LJ_HASJIT -#if LJ_TARGET_X86ORX64 LJ_ASMF void lj_vm_floor_sse(void); LJ_ASMF void lj_vm_ceil_sse(void); LJ_ASMF void lj_vm_trunc_sse(void); LJ_ASMF void lj_vm_powi_sse(void); #define lj_vm_powi NULL -#else -LJ_ASMF double lj_vm_powi(double, int32_t); -#endif -#if LJ_TARGET_PPC || LJ_TARGET_ARM64 -#define lj_vm_trunc trunc -#else LJ_ASMF double lj_vm_trunc(double); -#if LJ_TARGET_ARM -LJ_ASMF double lj_vm_trunc_sf(double); -#endif -#endif #ifdef LUAJIT_NO_EXP2 LJ_ASMF double lj_vm_exp2(double); #else #define lj_vm_exp2 exp2 #endif -#if LJ_HASFFI LJ_ASMF int lj_vm_errno(void); -#endif -#endif /* Continuations for metamethods. */ LJ_ASMF void lj_cont_cat(void); /* Continue with concatenation. */ diff --git a/src/lj_vmevent.h b/src/lj_vmevent.h index 050fb4dd24..f0040922ca 100644 --- a/src/lj_vmevent.h +++ b/src/lj_vmevent.h @@ -30,30 +30,7 @@ typedef enum { LJ_VMEVENT__MAX } VMEvent; -#ifdef LUAJIT_DISABLE_VMEVENT #define lj_vmevent_send(L, ev, args) UNUSED(L) #define lj_vmevent_send_(L, ev, args, post) UNUSED(L) -#else -#define lj_vmevent_send(L, ev, args) \ - if (G(L)->vmevmask & VMEVENT_MASK(LJ_VMEVENT_##ev)) { \ - ptrdiff_t argbase = lj_vmevent_prepare(L, LJ_VMEVENT_##ev); \ - if (argbase) { \ - args \ - lj_vmevent_call(L, argbase); \ - } \ - } -#define lj_vmevent_send_(L, ev, args, post) \ - if (G(L)->vmevmask & VMEVENT_MASK(LJ_VMEVENT_##ev)) { \ - ptrdiff_t argbase = lj_vmevent_prepare(L, LJ_VMEVENT_##ev); \ - if (argbase) { \ - args \ - lj_vmevent_call(L, argbase); \ - post \ - } \ - } - -LJ_FUNC ptrdiff_t lj_vmevent_prepare(lua_State *L, VMEvent ev); -LJ_FUNC void lj_vmevent_call(lua_State *L, ptrdiff_t argbase); -#endif #endif diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c index b231d3e811..a8db3a7af7 100644 --- a/src/lj_vmmath.c +++ b/src/lj_vmmath.c @@ -15,24 +15,6 @@ /* -- Wrapper functions --------------------------------------------------- */ -#if LJ_TARGET_X86 && __ELF__ && __PIC__ -/* Wrapper functions to deal with the ELF/x86 PIC disaster. */ -LJ_FUNCA double lj_wrap_log(double x) { return log(x); } -LJ_FUNCA double lj_wrap_log10(double x) { return log10(x); } -LJ_FUNCA double lj_wrap_exp(double x) { return exp(x); } -LJ_FUNCA double lj_wrap_sin(double x) { return sin(x); } -LJ_FUNCA double lj_wrap_cos(double x) { return cos(x); } -LJ_FUNCA double lj_wrap_tan(double x) { return tan(x); } -LJ_FUNCA double lj_wrap_asin(double x) { return asin(x); } -LJ_FUNCA double lj_wrap_acos(double x) { return acos(x); } -LJ_FUNCA double lj_wrap_atan(double x) { return atan(x); } -LJ_FUNCA double lj_wrap_sinh(double x) { return sinh(x); } -LJ_FUNCA double lj_wrap_cosh(double x) { return cosh(x); } -LJ_FUNCA double lj_wrap_tanh(double x) { return tanh(x); } -LJ_FUNCA double lj_wrap_atan2(double x, double y) { return atan2(x, y); } -LJ_FUNCA double lj_wrap_pow(double x, double y) { return pow(x, y); } -LJ_FUNCA double lj_wrap_fmod(double x, double y) { return fmod(x, y); } -#endif /* -- Helper functions for generated machine code ------------------------- */ @@ -47,17 +29,14 @@ double lj_vm_foldarith(double x, double y, int op) case IR_POW - IR_ADD: return pow(x, y); break; case IR_NEG - IR_ADD: return -x; break; case IR_ABS - IR_ADD: return fabs(x); break; -#if LJ_HASJIT case IR_ATAN2 - IR_ADD: return atan2(x, y); break; case IR_LDEXP - IR_ADD: return ldexp(x, (int)y); break; case IR_MIN - IR_ADD: return x > y ? y : x; break; case IR_MAX - IR_ADD: return x < y ? y : x; break; -#endif default: return x; } } -#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) { uint32_t y, ua, ub; @@ -69,9 +48,7 @@ int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) if (((int32_t)y^b) < 0) y = (uint32_t)-(int32_t)y; return (int32_t)y; } -#endif -#if LJ_HASJIT #ifdef LUAJIT_NO_LOG2 double lj_vm_log2(double a) @@ -87,39 +64,6 @@ double lj_vm_exp2(double a) } #endif -#if !LJ_TARGET_X86ORX64 -/* Unsigned x^k. */ -static double lj_vm_powui(double x, uint32_t k) -{ - double y; - lua_assert(k != 0); - for (; (k & 1) == 0; k >>= 1) x *= x; - y = x; - if ((k >>= 1) != 0) { - for (;;) { - x *= x; - if (k == 1) break; - if (k & 1) y *= x; - k >>= 1; - } - y *= x; - } - return y; -} - -/* Signed x^k. */ -double lj_vm_powi(double x, int32_t k) -{ - if (k > 1) - return lj_vm_powui(x, (uint32_t)k); - else if (k == 1) - return x; - else if (k == 0) - return 1.0; - else - return 1.0 / lj_vm_powui(x, (uint32_t)-k); -} -#endif /* Computes fpm(x) for extended math functions. */ double lj_vm_foldfpm(double x, int fpm) @@ -142,11 +86,8 @@ double lj_vm_foldfpm(double x, int fpm) return 0; } -#if LJ_HASFFI int lj_vm_errno(void) { return errno; } -#endif -#endif diff --git a/src/luajit.c b/src/luajit.c index 8c8cf9e6aa..d8d72a7a82 100644 --- a/src/luajit.c +++ b/src/luajit.c @@ -19,28 +19,14 @@ #include "lj_arch.h" -#if LJ_TARGET_POSIX #include #define lua_stdin_is_tty() isatty(0) -#elif LJ_TARGET_WINDOWS -#include -#ifdef __BORLANDC__ -#define lua_stdin_is_tty() isatty(_fileno(stdin)) -#else -#define lua_stdin_is_tty() _isatty(_fileno(stdin)) -#endif -#else -#define lua_stdin_is_tty() 1 -#endif - -#if !LJ_TARGET_CONSOLE + #include -#endif static lua_State *globalL = NULL; static const char *progname = LUA_PROGNAME; -#if !LJ_TARGET_CONSOLE static void lstop(lua_State *L, lua_Debug *ar) { (void)ar; /* unused arg. */ @@ -57,7 +43,6 @@ static void laction(int i) terminate process (default action) */ lua_sethook(globalL, lstop, LUA_MASKCALL | LUA_MASKRET | LUA_MASKCOUNT, 1); } -#endif static void print_usage(void) { @@ -115,13 +100,9 @@ static int docall(lua_State *L, int narg, int clear) int base = lua_gettop(L) - narg; /* function index */ lua_pushcfunction(L, traceback); /* push traceback function */ lua_insert(L, base); /* put it under chunk and args */ -#if !LJ_TARGET_CONSOLE signal(SIGINT, laction); -#endif status = lua_pcall(L, narg, (clear ? 0 : LUA_MULTRET), base); -#if !LJ_TARGET_CONSOLE signal(SIGINT, SIG_DFL); -#endif lua_remove(L, base); /* remove traceback function */ /* force a complete garbage collection in case of errors */ if (status != 0) lua_gc(L, LUA_GCCOLLECT, 0); @@ -488,11 +469,7 @@ static int runargs(lua_State *L, char **argv, int argn) static int handle_luainit(lua_State *L) { -#if LJ_TARGET_CONSOLE - const char *init = NULL; -#else const char *init = getenv(LUA_INIT); -#endif if (init == NULL) return 0; /* status OK */ else if (init[0] == '@') diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc deleted file mode 100644 index 780cc16e6d..0000000000 --- a/src/vm_arm.dasc +++ /dev/null @@ -1,4593 +0,0 @@ -|// Low-level VM code for ARM CPUs. -|// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -| -|.arch arm -|.section code_op, code_sub -| -|.actionlist build_actionlist -|.globals GLOB_ -|.globalnames globnames -|.externnames extnames -| -|// Note: The ragged indentation of the instructions is intentional. -|// The starting columns indicate data dependencies. -| -|//----------------------------------------------------------------------- -| -|// Fixed register assignments for the interpreter. -| -|// The following must be C callee-save. -|.define MASKR8, r4 // 255*8 constant for fast bytecode decoding. -|.define KBASE, r5 // Constants of current Lua function. -|.define PC, r6 // Next PC. -|.define DISPATCH, r7 // Opcode dispatch table. -|.define LREG, r8 // Register holding lua_State (also in SAVE_L). -| -|// C callee-save in EABI, but often refetched. Temporary in iOS 3.0+. -|.define BASE, r9 // Base of current Lua stack frame. -| -|// The following temporaries are not saved across C calls, except for RA/RC. -|.define RA, r10 // Callee-save. -|.define RC, r11 // Callee-save. -|.define RB, r12 -|.define OP, r12 // Overlaps RB, must not be lr. -|.define INS, lr -| -|// Calling conventions. Also used as temporaries. -|.define CARG1, r0 -|.define CARG2, r1 -|.define CARG3, r2 -|.define CARG4, r3 -|.define CARG12, r0 // For 1st soft-fp double. -|.define CARG34, r2 // For 2nd soft-fp double. -| -|.define CRET1, r0 -|.define CRET2, r1 -| -|// Stack layout while in interpreter. Must match with lj_frame.h. -|.define SAVE_R4, [sp, #28] -|.define CFRAME_SPACE, #28 -|.define SAVE_ERRF, [sp, #24] -|.define SAVE_NRES, [sp, #20] -|.define SAVE_CFRAME, [sp, #16] -|.define SAVE_L, [sp, #12] -|.define SAVE_PC, [sp, #8] -|.define SAVE_MULTRES, [sp, #4] -|.define ARG5, [sp] -| -|.define TMPDhi, [sp, #4] -|.define TMPDlo, [sp] -|.define TMPD, [sp] -|.define TMPDp, sp -| -|.if FPU -|.macro saveregs -| push {r5, r6, r7, r8, r9, r10, r11, lr} -| vpush {d8-d15} -| sub sp, sp, CFRAME_SPACE+4 -| str r4, SAVE_R4 -|.endmacro -|.macro restoreregs_ret -| ldr r4, SAVE_R4 -| add sp, sp, CFRAME_SPACE+4 -| vpop {d8-d15} -| pop {r5, r6, r7, r8, r9, r10, r11, pc} -|.endmacro -|.else -|.macro saveregs -| push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -| sub sp, sp, CFRAME_SPACE -|.endmacro -|.macro restoreregs_ret -| add sp, sp, CFRAME_SPACE -| pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -|.endmacro -|.endif -| -|// Type definitions. Some of these are only used for documentation. -|.type L, lua_State, LREG -|.type GL, global_State -|.type TVALUE, TValue -|.type GCOBJ, GCobj -|.type STR, GCstr -|.type TAB, GCtab -|.type LFUNC, GCfuncL -|.type CFUNC, GCfuncC -|.type PROTO, GCproto -|.type UPVAL, GCupval -|.type NODE, Node -|.type NARGS8, int -|.type TRACE, GCtrace -|.type SBUF, SBuf -| -|//----------------------------------------------------------------------- -| -|// Trap for not-yet-implemented parts. -|.macro NYI; ud; .endmacro -| -|//----------------------------------------------------------------------- -| -|// Access to frame relative to BASE. -|.define FRAME_FUNC, #-8 -|.define FRAME_PC, #-4 -| -|.macro decode_RA8, dst, ins; and dst, MASKR8, ins, lsr #5; .endmacro -|.macro decode_RB8, dst, ins; and dst, MASKR8, ins, lsr #21; .endmacro -|.macro decode_RC8, dst, ins; and dst, MASKR8, ins, lsr #13; .endmacro -|.macro decode_RD, dst, ins; lsr dst, ins, #16; .endmacro -|.macro decode_OP, dst, ins; and dst, ins, #255; .endmacro -| -|// Instruction fetch. -|.macro ins_NEXT1 -| ldrb OP, [PC] -|.endmacro -|.macro ins_NEXT2 -| ldr INS, [PC], #4 -|.endmacro -|// Instruction decode+dispatch. -|.macro ins_NEXT3 -| ldr OP, [DISPATCH, OP, lsl #2] -| decode_RA8 RA, INS -| decode_RD RC, INS -| bx OP -|.endmacro -|.macro ins_NEXT -| ins_NEXT1 -| ins_NEXT2 -| ins_NEXT3 -|.endmacro -| -|// Instruction footer. -|.if 1 -| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. -| .define ins_next, ins_NEXT -| .define ins_next_, ins_NEXT -| .define ins_next1, ins_NEXT1 -| .define ins_next2, ins_NEXT2 -| .define ins_next3, ins_NEXT3 -|.else -| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. -| // Affects only certain kinds of benchmarks (and only with -j off). -| .macro ins_next -| b ->ins_next -| .endmacro -| .macro ins_next1 -| .endmacro -| .macro ins_next2 -| .endmacro -| .macro ins_next3 -| b ->ins_next -| .endmacro -| .macro ins_next_ -| ->ins_next: -| ins_NEXT -| .endmacro -|.endif -| -|// Avoid register name substitution for field name. -#define field_pc pc -| -|// Call decode and dispatch. -|.macro ins_callt -| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC -| ldr PC, LFUNC:CARG3->field_pc -| ldrb OP, [PC] // STALL: load PC. early PC. -| ldr INS, [PC], #4 -| ldr OP, [DISPATCH, OP, lsl #2] // STALL: load OP. early OP. -| decode_RA8 RA, INS -| add RA, RA, BASE -| bx OP -|.endmacro -| -|.macro ins_call -| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, PC = caller PC -| str PC, [BASE, FRAME_PC] -| ins_callt // STALL: locked PC. -|.endmacro -| -|//----------------------------------------------------------------------- -| -|// Macros to test operand types. -|.macro checktp, reg, tp; cmn reg, #-tp; .endmacro -|.macro checktpeq, reg, tp; cmneq reg, #-tp; .endmacro -|.macro checktpne, reg, tp; cmnne reg, #-tp; .endmacro -|.macro checkstr, reg, target; checktp reg, LJ_TSTR; bne target; .endmacro -|.macro checktab, reg, target; checktp reg, LJ_TTAB; bne target; .endmacro -|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC; bne target; .endmacro -| -|// Assumes DISPATCH is relative to GL. -#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) -#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) -| -#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) -| -|.macro hotcheck, delta -| lsr CARG1, PC, #1 -| and CARG1, CARG1, #126 -| sub CARG1, CARG1, #-GG_DISP2HOT -| ldrh CARG2, [DISPATCH, CARG1] -| subs CARG2, CARG2, #delta -| strh CARG2, [DISPATCH, CARG1] -|.endmacro -| -|.macro hotloop -| hotcheck HOTCOUNT_LOOP -| blo ->vm_hotloop -|.endmacro -| -|.macro hotcall -| hotcheck HOTCOUNT_CALL -| blo ->vm_hotcall -|.endmacro -| -|// Set current VM state. -|.macro mv_vmstate, reg, st; mvn reg, #LJ_VMST_..st; .endmacro -|.macro st_vmstate, reg; str reg, [DISPATCH, #DISPATCH_GL(vmstate)]; .endmacro -| -|// Move table write barrier back. Overwrites mark and tmp. -|.macro barrierback, tab, mark, tmp -| ldr tmp, [DISPATCH, #DISPATCH_GL(gc.grayagain)] -| bic mark, mark, #LJ_GC_BLACK // black2gray(tab) -| str tab, [DISPATCH, #DISPATCH_GL(gc.grayagain)] -| strb mark, tab->marked -| str tmp, tab->gclist -|.endmacro -| -|.macro .IOS, a, b -|.if IOS -| a, b -|.endif -|.endmacro -| -|//----------------------------------------------------------------------- - -#if !LJ_DUALNUM -#error "Only dual-number mode supported for ARM target" -#endif - -/* Generate subroutines used by opcodes and other parts of the VM. */ -/* The .code_sub section should be last to help static branch prediction. */ -static void build_subroutines(BuildCtx *ctx) -{ - |.code_sub - | - |//----------------------------------------------------------------------- - |//-- Return handling ---------------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_returnp: - | // See vm_return. Also: RB = previous base. - | tst PC, #FRAME_P - | beq ->cont_dispatch - | - | // Return from pcall or xpcall fast func. - | ldr PC, [RB, FRAME_PC] // Fetch PC of previous frame. - | mvn CARG2, #~LJ_TTRUE - | mov BASE, RB - | // Prepending may overwrite the pcall frame, so do it at the end. - | str CARG2, [RA, FRAME_PC] // Prepend true to results. - | sub RA, RA, #8 - | - |->vm_returnc: - | adds RC, RC, #8 // RC = (nresults+1)*8. - | mov CRET1, #LUA_YIELD - | beq ->vm_unwind_c_eh - | str RC, SAVE_MULTRES - | ands CARG1, PC, #FRAME_TYPE - | beq ->BC_RET_Z // Handle regular return to Lua. - | - |->vm_return: - | // BASE = base, RA = resultptr, RC/MULTRES = (nresults+1)*8, PC = return - | // CARG1 = PC & FRAME_TYPE - | bic RB, PC, #FRAME_TYPEP - | cmp CARG1, #FRAME_C - | sub RB, BASE, RB // RB = previous base. - | bne ->vm_returnp - | - | str RB, L->base - | ldr KBASE, SAVE_NRES - | mv_vmstate CARG4, C - | sub BASE, BASE, #8 - | subs CARG3, RC, #8 - | lsl KBASE, KBASE, #3 // KBASE = (nresults_wanted+1)*8 - | st_vmstate CARG4 - | beq >2 - |1: - | subs CARG3, CARG3, #8 - | ldrd CARG12, [RA], #8 - | strd CARG12, [BASE], #8 - | bne <1 - |2: - | cmp KBASE, RC // More/less results wanted? - | bne >6 - |3: - | str BASE, L->top // Store new top. - | - |->vm_leave_cp: - | ldr RC, SAVE_CFRAME // Restore previous C frame. - | mov CRET1, #0 // Ok return status for vm_pcall. - | str RC, L->cframe - | - |->vm_leave_unw: - | restoreregs_ret - | - |6: - | blt >7 // Less results wanted? - | // More results wanted. Check stack size and fill up results with nil. - | ldr CARG3, L->maxstack - | mvn CARG2, #~LJ_TNIL - | cmp BASE, CARG3 - | bhs >8 - | str CARG2, [BASE, #4] - | add RC, RC, #8 - | add BASE, BASE, #8 - | b <2 - | - |7: // Less results wanted. - | sub CARG1, RC, KBASE - | cmp KBASE, #0 // LUA_MULTRET+1 case? - | subne BASE, BASE, CARG1 // Either keep top or shrink it. - | b <3 - | - |8: // Corner case: need to grow stack for filling up results. - | // This can happen if: - | // - A C function grows the stack (a lot). - | // - The GC shrinks the stack in between. - | // - A return back from a lua_call() with (high) nresults adjustment. - | str BASE, L->top // Save current top held in BASE (yes). - | lsr CARG2, KBASE, #3 - | mov CARG1, L - | bl extern lj_state_growstack // (lua_State *L, int n) - | ldr BASE, L->top // Need the (realloced) L->top in BASE. - | b <2 - | - |->vm_unwind_c: // Unwind C stack, return from vm_pcall. - | // (void *cframe, int errcode) - | mov sp, CARG1 - | mov CRET1, CARG2 - |->vm_unwind_c_eh: // Landing pad for external unwinder. - | ldr L, SAVE_L - | mv_vmstate CARG4, C - | ldr GL:CARG3, L->glref - | str CARG4, GL:CARG3->vmstate - | b ->vm_leave_unw - | - |->vm_unwind_ff: // Unwind C stack, return from ff pcall. - | // (void *cframe) - | bic CARG1, CARG1, #~CFRAME_RAWMASK // Use two steps: bic sp is deprecated. - | mov sp, CARG1 - |->vm_unwind_ff_eh: // Landing pad for external unwinder. - | ldr L, SAVE_L - | mov MASKR8, #255 - | mov RC, #16 // 2 results: false + error message. - | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. - | ldr BASE, L->base - | ldr DISPATCH, L->glref // Setup pointer to dispatch table. - | mvn CARG1, #~LJ_TFALSE - | sub RA, BASE, #8 // Results start at BASE-8. - | ldr PC, [BASE, FRAME_PC] // Fetch PC of previous frame. - | add DISPATCH, DISPATCH, #GG_G2DISP - | mv_vmstate CARG2, INTERP - | str CARG1, [BASE, #-4] // Prepend false to error message. - | st_vmstate CARG2 - | b ->vm_returnc - | - |->vm_unwind_ext: // Complete external unwind. -#if !LJ_NO_UNWIND - | push {r0, r1, r2, lr} - | bl extern _Unwind_Complete - | ldr r0, [sp] - | bl extern _Unwind_DeleteException - | pop {r0, r1, r2, lr} - | mov r0, r1 - | bx r2 -#endif - | - |//----------------------------------------------------------------------- - |//-- Grow stack for calls ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_growstack_c: // Grow stack for C function. - | // CARG1 = L - | mov CARG2, #LUA_MINSTACK - | b >2 - | - |->vm_growstack_l: // Grow stack for Lua function. - | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC - | add RC, BASE, RC - | sub RA, RA, BASE - | mov CARG1, L - | str BASE, L->base - | add PC, PC, #4 // Must point after first instruction. - | str RC, L->top - | lsr CARG2, RA, #3 - |2: - | // L->base = new base, L->top = top - | str PC, SAVE_PC - | bl extern lj_state_growstack // (lua_State *L, int n) - | ldr BASE, L->base - | ldr RC, L->top - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] - | sub NARGS8:RC, RC, BASE - | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC - | ins_callt // Just retry the call. - | - |//----------------------------------------------------------------------- - |//-- Entry points into the assembler VM --------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_resume: // Setup C frame and resume thread. - | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) - | saveregs - | mov L, CARG1 - | ldr DISPATCH, L:CARG1->glref // Setup pointer to dispatch table. - | mov BASE, CARG2 - | add DISPATCH, DISPATCH, #GG_G2DISP - | str L, SAVE_L - | mov PC, #FRAME_CP - | str CARG3, SAVE_NRES - | add CARG2, sp, #CFRAME_RESUME - | ldrb CARG1, L->status - | str CARG3, SAVE_ERRF - | str L, SAVE_PC // Any value outside of bytecode is ok. - | str CARG3, SAVE_CFRAME - | cmp CARG1, #0 - | str CARG2, L->cframe - | beq >3 - | - | // Resume after yield (like a return). - | str L, [DISPATCH, #DISPATCH_GL(cur_L)] - | mov RA, BASE - | ldr BASE, L->base - | ldr CARG1, L->top - | mov MASKR8, #255 - | strb CARG3, L->status - | sub RC, CARG1, BASE - | ldr PC, [BASE, FRAME_PC] - | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. - | mv_vmstate CARG2, INTERP - | add RC, RC, #8 - | ands CARG1, PC, #FRAME_TYPE - | st_vmstate CARG2 - | str RC, SAVE_MULTRES - | beq ->BC_RET_Z - | b ->vm_return - | - |->vm_pcall: // Setup protected C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) - | saveregs - | mov PC, #FRAME_CP - | str CARG4, SAVE_ERRF - | b >1 - | - |->vm_call: // Setup C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1) - | saveregs - | mov PC, #FRAME_C - | - |1: // Entry point for vm_pcall above (PC = ftype). - | ldr RC, L:CARG1->cframe - | str CARG3, SAVE_NRES - | mov L, CARG1 - | str CARG1, SAVE_L - | ldr DISPATCH, L->glref // Setup pointer to dispatch table. - | mov BASE, CARG2 - | str CARG1, SAVE_PC // Any value outside of bytecode is ok. - | str RC, SAVE_CFRAME - | add DISPATCH, DISPATCH, #GG_G2DISP - | str sp, L->cframe // Add our C frame to cframe chain. - | - |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). - | str L, [DISPATCH, #DISPATCH_GL(cur_L)] - | ldr RB, L->base // RB = old base (for vmeta_call). - | ldr CARG1, L->top - | mov MASKR8, #255 - | add PC, PC, BASE - | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. - | sub PC, PC, RB // PC = frame delta + frame type - | mv_vmstate CARG2, INTERP - | sub NARGS8:RC, CARG1, BASE - | st_vmstate CARG2 - | - |->vm_call_dispatch: - | // RB = old base, BASE = new base, RC = nargs*8, PC = caller PC - | ldrd CARG34, [BASE, FRAME_FUNC] - | checkfunc CARG4, ->vmeta_call - | - |->vm_call_dispatch_f: - | ins_call - | // BASE = new base, CARG3 = func, RC = nargs*8, PC = caller PC - | - |->vm_cpcall: // Setup protected C frame, call C. - | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) - | saveregs - | mov L, CARG1 - | ldr RA, L:CARG1->stack - | str CARG1, SAVE_L - | ldr DISPATCH, L->glref // Setup pointer to dispatch table. - | ldr RB, L->top - | str CARG1, SAVE_PC // Any value outside of bytecode is ok. - | ldr RC, L->cframe - | add DISPATCH, DISPATCH, #GG_G2DISP - | sub RA, RA, RB // Compute -savestack(L, L->top). - | mov RB, #0 - | str RA, SAVE_NRES // Neg. delta means cframe w/o frame. - | str RB, SAVE_ERRF // No error function. - | str RC, SAVE_CFRAME - | str sp, L->cframe // Add our C frame to cframe chain. - | str L, [DISPATCH, #DISPATCH_GL(cur_L)] - | blx CARG4 // (lua_State *L, lua_CFunction func, void *ud) - | movs BASE, CRET1 - | mov PC, #FRAME_CP - | bne <3 // Else continue with the call. - | b ->vm_leave_cp // No base? Just remove C frame. - | - |//----------------------------------------------------------------------- - |//-- Metamethod handling ------------------------------------------------ - |//----------------------------------------------------------------------- - | - |//-- Continuation dispatch ---------------------------------------------- - | - |->cont_dispatch: - | // BASE = meta base, RA = resultptr, RC = (nresults+1)*8 - | ldr LFUNC:CARG3, [RB, FRAME_FUNC] - | ldr CARG1, [BASE, #-16] // Get continuation. - | mov CARG4, BASE - | mov BASE, RB // Restore caller BASE. - |.if FFI - | cmp CARG1, #1 - |.endif - | ldr PC, [CARG4, #-12] // Restore PC from [cont|PC]. - | ldr CARG3, LFUNC:CARG3->field_pc - | mvn INS, #~LJ_TNIL - | add CARG2, RA, RC - | str INS, [CARG2, #-4] // Ensure one valid arg. - |.if FFI - | bls >1 - |.endif - | ldr KBASE, [CARG3, #PC2PROTO(k)] - | // BASE = base, RA = resultptr, CARG4 = meta base - | bx CARG1 - | - |.if FFI - |1: - | beq ->cont_ffi_callback // cont = 1: return from FFI callback. - | // cont = 0: tailcall from C function. - | sub CARG4, CARG4, #16 - | sub RC, CARG4, BASE - | b ->vm_call_tail - |.endif - | - |->cont_cat: // RA = resultptr, CARG4 = meta base - | ldr INS, [PC, #-4] - | sub CARG2, CARG4, #16 - | ldrd CARG34, [RA] - | str BASE, L->base - | decode_RB8 RC, INS - | decode_RA8 RA, INS - | add CARG1, BASE, RC - | subs CARG1, CARG2, CARG1 - | strdne CARG34, [CARG2] - | movne CARG3, CARG1 - | bne ->BC_CAT_Z - | strd CARG34, [BASE, RA] - | b ->cont_nop - | - |//-- Table indexing metamethods ----------------------------------------- - | - |->vmeta_tgets1: - | add CARG2, BASE, RB - | b >2 - | - |->vmeta_tgets: - | sub CARG2, DISPATCH, #-DISPATCH_GL(tmptv) - | mvn CARG4, #~LJ_TTAB - | str TAB:RB, [CARG2] - | str CARG4, [CARG2, #4] - |2: - | mvn CARG4, #~LJ_TSTR - | str STR:RC, TMPDlo - | str CARG4, TMPDhi - | mov CARG3, TMPDp - | b >1 - | - |->vmeta_tgetb: // RC = index - | decode_RB8 RB, INS - | str RC, TMPDlo - | mvn CARG4, #~LJ_TISNUM - | add CARG2, BASE, RB - | str CARG4, TMPDhi - | mov CARG3, TMPDp - | b >1 - | - |->vmeta_tgetv: - | add CARG2, BASE, RB - | add CARG3, BASE, RC - |1: - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) - | // Returns TValue * (finished) or NULL (metamethod). - | .IOS ldr BASE, L->base - | cmp CRET1, #0 - | beq >3 - | ldrd CARG34, [CRET1] - | ins_next1 - | ins_next2 - | strd CARG34, [BASE, RA] - | ins_next3 - | - |3: // Call __index metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k - | rsb CARG1, BASE, #FRAME_CONT - | ldr BASE, L->top - | mov NARGS8:RC, #16 // 2 args for func(t, k). - | str PC, [BASE, #-12] // [cont|PC] - | add PC, CARG1, BASE - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. - | b ->vm_call_dispatch_f - | - |->vmeta_tgetr: - | .IOS mov RC, BASE - | bl extern lj_tab_getinth // (GCtab *t, int32_t key) - | // Returns cTValue * or NULL. - | .IOS mov BASE, RC - | cmp CRET1, #0 - | ldrdne CARG12, [CRET1] - | mvneq CARG2, #~LJ_TNIL - | b ->BC_TGETR_Z - | - |//----------------------------------------------------------------------- - | - |->vmeta_tsets1: - | add CARG2, BASE, RB - | b >2 - | - |->vmeta_tsets: - | sub CARG2, DISPATCH, #-DISPATCH_GL(tmptv) - | mvn CARG4, #~LJ_TTAB - | str TAB:RB, [CARG2] - | str CARG4, [CARG2, #4] - |2: - | mvn CARG4, #~LJ_TSTR - | str STR:RC, TMPDlo - | str CARG4, TMPDhi - | mov CARG3, TMPDp - | b >1 - | - |->vmeta_tsetb: // RC = index - | decode_RB8 RB, INS - | str RC, TMPDlo - | mvn CARG4, #~LJ_TISNUM - | add CARG2, BASE, RB - | str CARG4, TMPDhi - | mov CARG3, TMPDp - | b >1 - | - |->vmeta_tsetv: - | add CARG2, BASE, RB - | add CARG3, BASE, RC - |1: - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) - | // Returns TValue * (finished) or NULL (metamethod). - | .IOS ldr BASE, L->base - | cmp CRET1, #0 - | ldrd CARG34, [BASE, RA] - | beq >3 - | ins_next1 - | // NOBARRIER: lj_meta_tset ensures the table is not black. - | strd CARG34, [CRET1] - | ins_next2 - | ins_next3 - | - |3: // Call __newindex metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) - | rsb CARG1, BASE, #FRAME_CONT - | ldr BASE, L->top - | mov NARGS8:RC, #24 // 3 args for func(t, k, v). - | strd CARG34, [BASE, #16] // Copy value to third argument. - | str PC, [BASE, #-12] // [cont|PC] - | add PC, CARG1, BASE - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. - | b ->vm_call_dispatch_f - | - |->vmeta_tsetr: - | str BASE, L->base - | .IOS mov RC, BASE - | str PC, SAVE_PC - | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) - | // Returns TValue *. - | .IOS mov BASE, RC - | b ->BC_TSETR_Z - | - |//-- Comparison metamethods --------------------------------------------- - | - |->vmeta_comp: - | mov CARG1, L - | sub PC, PC, #4 - | mov CARG2, RA - | str BASE, L->base - | mov CARG3, RC - | str PC, SAVE_PC - | decode_OP CARG4, INS - | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) - | // Returns 0/1 or TValue * (metamethod). - |3: - | .IOS ldr BASE, L->base - | cmp CRET1, #1 - | bhi ->vmeta_binop - |4: - | ldrh RB, [PC, #2] - | add PC, PC, #4 - | add RB, PC, RB, lsl #2 - | subhs PC, RB, #0x20000 - |->cont_nop: - | ins_next - | - |->cont_ra: // RA = resultptr - | ldr INS, [PC, #-4] - | ldrd CARG12, [RA] - | decode_RA8 CARG3, INS - | strd CARG12, [BASE, CARG3] - | b ->cont_nop - | - |->cont_condt: // RA = resultptr - | ldr CARG2, [RA, #4] - | mvn CARG1, #~LJ_TTRUE - | cmp CARG1, CARG2 // Branch if result is true. - | b <4 - | - |->cont_condf: // RA = resultptr - | ldr CARG2, [RA, #4] - | checktp CARG2, LJ_TFALSE // Branch if result is false. - | b <4 - | - |->vmeta_equal: - | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV. - | sub PC, PC, #4 - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) - | // Returns 0/1 or TValue * (metamethod). - | b <3 - | - |->vmeta_equal_cd: - |.if FFI - | sub PC, PC, #4 - | str BASE, L->base - | mov CARG1, L - | mov CARG2, INS - | str PC, SAVE_PC - | bl extern lj_meta_equal_cd // (lua_State *L, BCIns op) - | // Returns 0/1 or TValue * (metamethod). - | b <3 - |.endif - | - |->vmeta_istype: - | sub PC, PC, #4 - | str BASE, L->base - | mov CARG1, L - | lsr CARG2, RA, #3 - | mov CARG3, RC - | str PC, SAVE_PC - | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) - | .IOS ldr BASE, L->base - | b ->cont_nop - | - |//-- Arithmetic metamethods --------------------------------------------- - | - |->vmeta_arith_vn: - | decode_RB8 RB, INS - | decode_RC8 RC, INS - | add CARG3, BASE, RB - | add CARG4, KBASE, RC - | b >1 - | - |->vmeta_arith_nv: - | decode_RB8 RB, INS - | decode_RC8 RC, INS - | add CARG4, BASE, RB - | add CARG3, KBASE, RC - | b >1 - | - |->vmeta_unm: - | ldr INS, [PC, #-8] - | sub PC, PC, #4 - | add CARG3, BASE, RC - | add CARG4, BASE, RC - | b >1 - | - |->vmeta_arith_vv: - | decode_RB8 RB, INS - | decode_RC8 RC, INS - | add CARG3, BASE, RB - | add CARG4, BASE, RC - |1: - | decode_OP OP, INS - | add CARG2, BASE, RA - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | str OP, ARG5 - | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) - | // Returns NULL (finished) or TValue * (metamethod). - | .IOS ldr BASE, L->base - | cmp CRET1, #0 - | beq ->cont_nop - | - | // Call metamethod for binary op. - |->vmeta_binop: - | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 - | sub CARG2, CRET1, BASE - | str PC, [CRET1, #-12] // [cont|PC] - | add PC, CARG2, #FRAME_CONT - | mov BASE, CRET1 - | mov NARGS8:RC, #16 // 2 args for func(o1, o2). - | b ->vm_call_dispatch - | - |->vmeta_len: - | add CARG2, BASE, RC - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_meta_len // (lua_State *L, TValue *o) - | // Returns NULL (retry) or TValue * (metamethod base). - | .IOS ldr BASE, L->base -#if LJ_52 - | cmp CRET1, #0 - | bne ->vmeta_binop // Binop call for compatibility. - | ldr TAB:CARG1, [BASE, RC] - | b ->BC_LEN_Z -#else - | b ->vmeta_binop // Binop call for compatibility. -#endif - | - |//-- Call metamethod ---------------------------------------------------- - | - |->vmeta_call: // Resolve and call __call metamethod. - | // RB = old base, BASE = new base, RC = nargs*8 - | mov CARG1, L - | str RB, L->base // This is the callers base! - | sub CARG2, BASE, #8 - | str PC, SAVE_PC - | add CARG3, BASE, NARGS8:RC - | .IOS mov RA, BASE - | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) - | .IOS mov BASE, RA - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. - | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now. - | ins_call - | - |->vmeta_callt: // Resolve __call for BC_CALLT. - | // BASE = old base, RA = new base, RC = nargs*8 - | mov CARG1, L - | str BASE, L->base - | sub CARG2, RA, #8 - | str PC, SAVE_PC - | add CARG3, RA, NARGS8:RC - | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) - | .IOS ldr BASE, L->base - | ldr LFUNC:CARG3, [RA, FRAME_FUNC] // Guaranteed to be a function here. - | ldr PC, [BASE, FRAME_PC] - | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now. - | b ->BC_CALLT2_Z - | - |//-- Argument coercion for 'for' statement ------------------------------ - | - |->vmeta_for: - | mov CARG1, L - | str BASE, L->base - | mov CARG2, RA - | str PC, SAVE_PC - | bl extern lj_meta_for // (lua_State *L, TValue *base) - | .IOS ldr BASE, L->base - |.if JIT - | ldrb OP, [PC, #-4] - |.endif - | ldr INS, [PC, #-4] - |.if JIT - | cmp OP, #BC_JFORI - |.endif - | decode_RA8 RA, INS - | decode_RD RC, INS - |.if JIT - | beq =>BC_JFORI - |.endif - | b =>BC_FORI - | - |//----------------------------------------------------------------------- - |//-- Fast functions ----------------------------------------------------- - |//----------------------------------------------------------------------- - | - |.macro .ffunc, name - |->ff_ .. name: - |.endmacro - | - |.macro .ffunc_1, name - |->ff_ .. name: - | ldrd CARG12, [BASE] - | cmp NARGS8:RC, #8 - | blo ->fff_fallback - |.endmacro - | - |.macro .ffunc_2, name - |->ff_ .. name: - | ldrd CARG12, [BASE] - | ldrd CARG34, [BASE, #8] - | cmp NARGS8:RC, #16 - | blo ->fff_fallback - |.endmacro - | - |.macro .ffunc_n, name - | .ffunc_1 name - | checktp CARG2, LJ_TISNUM - | bhs ->fff_fallback - |.endmacro - | - |.macro .ffunc_nn, name - | .ffunc_2 name - | checktp CARG2, LJ_TISNUM - | cmnlo CARG4, #-LJ_TISNUM - | bhs ->fff_fallback - |.endmacro - | - |.macro .ffunc_d, name - | .ffunc name - | ldr CARG2, [BASE, #4] - | cmp NARGS8:RC, #8 - | vldr d0, [BASE] - | blo ->fff_fallback - | checktp CARG2, LJ_TISNUM - | bhs ->fff_fallback - |.endmacro - | - |.macro .ffunc_dd, name - | .ffunc name - | ldr CARG2, [BASE, #4] - | ldr CARG4, [BASE, #12] - | cmp NARGS8:RC, #16 - | vldr d0, [BASE] - | vldr d1, [BASE, #8] - | blo ->fff_fallback - | checktp CARG2, LJ_TISNUM - | cmnlo CARG4, #-LJ_TISNUM - | bhs ->fff_fallback - |.endmacro - | - |// Inlined GC threshold check. Caveat: uses CARG1 and CARG2. - |.macro ffgccheck - | ldr CARG1, [DISPATCH, #DISPATCH_GL(gc.total)] - | ldr CARG2, [DISPATCH, #DISPATCH_GL(gc.threshold)] - | cmp CARG1, CARG2 - | blge ->fff_gcstep - |.endmacro - | - |//-- Base library: checks ----------------------------------------------- - | - |.ffunc_1 assert - | checktp CARG2, LJ_TTRUE - | bhi ->fff_fallback - | ldr PC, [BASE, FRAME_PC] - | strd CARG12, [BASE, #-8] - | mov RB, BASE - | subs RA, NARGS8:RC, #8 - | add RC, NARGS8:RC, #8 // Compute (nresults+1)*8. - | beq ->fff_res // Done if exactly 1 argument. - |1: - | ldrd CARG12, [RB, #8] - | subs RA, RA, #8 - | strd CARG12, [RB], #8 - | bne <1 - | b ->fff_res - | - |.ffunc type - | ldr CARG2, [BASE, #4] - | cmp NARGS8:RC, #8 - | blo ->fff_fallback - | checktp CARG2, LJ_TISNUM - | mvnlo CARG2, #~LJ_TISNUM - | rsb CARG4, CARG2, #(int)(offsetof(GCfuncC, upvalue)>>3)-1 - | lsl CARG4, CARG4, #3 - | ldrd CARG12, [CFUNC:CARG3, CARG4] - | b ->fff_restv - | - |//-- Base library: getters and setters --------------------------------- - | - |.ffunc_1 getmetatable - | checktp CARG2, LJ_TTAB - | cmnne CARG2, #-LJ_TUDATA - | bne >6 - |1: // Field metatable must be at same offset for GCtab and GCudata! - | ldr TAB:RB, TAB:CARG1->metatable - |2: - | mvn CARG2, #~LJ_TNIL - | ldr STR:RC, [DISPATCH, #DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])] - | cmp TAB:RB, #0 - | beq ->fff_restv - | ldr CARG3, TAB:RB->hmask - | ldr CARG4, STR:RC->hash - | ldr NODE:INS, TAB:RB->node - | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask - | add CARG3, CARG3, CARG3, lsl #1 - | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 - |3: // Rearranged logic, because we expect _not_ to find the key. - | ldrd CARG34, NODE:INS->key // STALL: early NODE:INS. - | ldrd CARG12, NODE:INS->val - | ldr NODE:INS, NODE:INS->next - | checktp CARG4, LJ_TSTR - | cmpeq CARG3, STR:RC - | beq >5 - | cmp NODE:INS, #0 - | bne <3 - |4: - | mov CARG1, RB // Use metatable as default result. - | mvn CARG2, #~LJ_TTAB - | b ->fff_restv - |5: - | checktp CARG2, LJ_TNIL - | bne ->fff_restv - | b <4 - | - |6: - | checktp CARG2, LJ_TISNUM - | mvnhs CARG2, CARG2 - | movlo CARG2, #~LJ_TISNUM - | add CARG4, DISPATCH, CARG2, lsl #2 - | ldr TAB:RB, [CARG4, #DISPATCH_GL(gcroot[GCROOT_BASEMT])] - | b <2 - | - |.ffunc_2 setmetatable - | // Fast path: no mt for table yet and not clearing the mt. - | checktp CARG2, LJ_TTAB - | ldreq TAB:RB, TAB:CARG1->metatable - | checktpeq CARG4, LJ_TTAB - | ldrbeq CARG4, TAB:CARG1->marked - | cmpeq TAB:RB, #0 - | bne ->fff_fallback - | tst CARG4, #LJ_GC_BLACK // isblack(table) - | str TAB:CARG3, TAB:CARG1->metatable - | beq ->fff_restv - | barrierback TAB:CARG1, CARG4, CARG3 - | b ->fff_restv - | - |.ffunc rawget - | ldrd CARG34, [BASE] - | cmp NARGS8:RC, #16 - | blo ->fff_fallback - | mov CARG2, CARG3 - | checktab CARG4, ->fff_fallback - | mov CARG1, L - | add CARG3, BASE, #8 - | .IOS mov RA, BASE - | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) - | // Returns cTValue *. - | .IOS mov BASE, RA - | ldrd CARG12, [CRET1] - | b ->fff_restv - | - |//-- Base library: conversions ------------------------------------------ - | - |.ffunc tonumber - | // Only handles the number case inline (without a base argument). - | ldrd CARG12, [BASE] - | cmp NARGS8:RC, #8 - | bne ->fff_fallback - | checktp CARG2, LJ_TISNUM - | bls ->fff_restv - | b ->fff_fallback - | - |.ffunc_1 tostring - | // Only handles the string or number case inline. - | checktp CARG2, LJ_TSTR - | // A __tostring method in the string base metatable is ignored. - | beq ->fff_restv - | // Handle numbers inline, unless a number base metatable is present. - | ldr CARG4, [DISPATCH, #DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])] - | str BASE, L->base - | checktp CARG2, LJ_TISNUM - | cmpls CARG4, #0 - | str PC, SAVE_PC // Redundant (but a defined value). - | bhi ->fff_fallback - | ffgccheck - | mov CARG1, L - | mov CARG2, BASE - | bl extern lj_strfmt_number // (lua_State *L, cTValue *o) - | // Returns GCstr *. - | ldr BASE, L->base - | mvn CARG2, #~LJ_TSTR - | b ->fff_restv - | - |//-- Base library: iterators ------------------------------------------- - | - |.ffunc_1 next - | mvn CARG4, #~LJ_TNIL - | checktab CARG2, ->fff_fallback - | strd CARG34, [BASE, NARGS8:RC] // Set missing 2nd arg to nil. - | ldr PC, [BASE, FRAME_PC] - | mov CARG2, CARG1 - | str BASE, L->base // Add frame since C call can throw. - | mov CARG1, L - | str BASE, L->top // Dummy frame length is ok. - | add CARG3, BASE, #8 - | str PC, SAVE_PC - | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) - | // Returns 0 at end of traversal. - | .IOS ldr BASE, L->base - | cmp CRET1, #0 - | mvneq CRET2, #~LJ_TNIL - | beq ->fff_restv // End of traversal: return nil. - | ldrd CARG12, [BASE, #8] // Copy key and value to results. - | ldrd CARG34, [BASE, #16] - | mov RC, #(2+1)*8 - | strd CARG12, [BASE, #-8] - | strd CARG34, [BASE] - | b ->fff_res - | - |.ffunc_1 pairs - | checktab CARG2, ->fff_fallback -#if LJ_52 - | ldr TAB:RB, TAB:CARG1->metatable -#endif - | ldrd CFUNC:CARG34, CFUNC:CARG3->upvalue[0] - | ldr PC, [BASE, FRAME_PC] -#if LJ_52 - | cmp TAB:RB, #0 - | bne ->fff_fallback -#endif - | mvn CARG2, #~LJ_TNIL - | mov RC, #(3+1)*8 - | strd CFUNC:CARG34, [BASE, #-8] - | str CARG2, [BASE, #12] - | b ->fff_res - | - |.ffunc_2 ipairs_aux - | checktp CARG2, LJ_TTAB - | checktpeq CARG4, LJ_TISNUM - | bne ->fff_fallback - | ldr RB, TAB:CARG1->asize - | ldr RC, TAB:CARG1->array - | add CARG3, CARG3, #1 - | ldr PC, [BASE, FRAME_PC] - | cmp CARG3, RB - | add RC, RC, CARG3, lsl #3 - | strd CARG34, [BASE, #-8] - | ldrdlo CARG12, [RC] - | mov RC, #(0+1)*8 - | bhs >2 // Not in array part? - |1: - | checktp CARG2, LJ_TNIL - | movne RC, #(2+1)*8 - | strdne CARG12, [BASE] - | b ->fff_res - |2: // Check for empty hash part first. Otherwise call C function. - | ldr RB, TAB:CARG1->hmask - | mov CARG2, CARG3 - | cmp RB, #0 - | beq ->fff_res - | .IOS mov RA, BASE - | bl extern lj_tab_getinth // (GCtab *t, int32_t key) - | // Returns cTValue * or NULL. - | .IOS mov BASE, RA - | cmp CRET1, #0 - | beq ->fff_res - | ldrd CARG12, [CRET1] - | b <1 - | - |.ffunc_1 ipairs - | checktab CARG2, ->fff_fallback -#if LJ_52 - | ldr TAB:RB, TAB:CARG1->metatable -#endif - | ldrd CFUNC:CARG34, CFUNC:CARG3->upvalue[0] - | ldr PC, [BASE, FRAME_PC] -#if LJ_52 - | cmp TAB:RB, #0 - | bne ->fff_fallback -#endif - | mov CARG1, #0 - | mvn CARG2, #~LJ_TISNUM - | mov RC, #(3+1)*8 - | strd CFUNC:CARG34, [BASE, #-8] - | strd CARG12, [BASE, #8] - | b ->fff_res - | - |//-- Base library: catch errors ---------------------------------------- - | - |.ffunc pcall - | ldrb RA, [DISPATCH, #DISPATCH_GL(hookmask)] - | cmp NARGS8:RC, #8 - | blo ->fff_fallback - | tst RA, #HOOK_ACTIVE // Remember active hook before pcall. - | mov RB, BASE - | add BASE, BASE, #8 - | moveq PC, #8+FRAME_PCALL - | movne PC, #8+FRAME_PCALLH - | sub NARGS8:RC, NARGS8:RC, #8 - | b ->vm_call_dispatch - | - |.ffunc_2 xpcall - | ldrb RA, [DISPATCH, #DISPATCH_GL(hookmask)] - | checkfunc CARG4, ->fff_fallback // Traceback must be a function. - | mov RB, BASE - | strd CARG12, [BASE, #8] // Swap function and traceback. - | strd CARG34, [BASE] - | tst RA, #HOOK_ACTIVE // Remember active hook before pcall. - | add BASE, BASE, #16 - | moveq PC, #16+FRAME_PCALL - | movne PC, #16+FRAME_PCALLH - | sub NARGS8:RC, NARGS8:RC, #16 - | b ->vm_call_dispatch - | - |//-- Coroutine library -------------------------------------------------- - | - |.macro coroutine_resume_wrap, resume - |.if resume - |.ffunc_1 coroutine_resume - | checktp CARG2, LJ_TTHREAD - | bne ->fff_fallback - |.else - |.ffunc coroutine_wrap_aux - | ldr L:CARG1, CFUNC:CARG3->upvalue[0].gcr - |.endif - | ldr PC, [BASE, FRAME_PC] - | str BASE, L->base - | ldr CARG2, L:CARG1->top - | ldrb RA, L:CARG1->status - | ldr RB, L:CARG1->base - | add CARG3, CARG2, NARGS8:RC - | add CARG4, CARG2, RA - | str PC, SAVE_PC - | cmp CARG4, RB - | beq ->fff_fallback - | ldr CARG4, L:CARG1->maxstack - | ldr RB, L:CARG1->cframe - | cmp RA, #LUA_YIELD - | cmpls CARG3, CARG4 - | cmpls RB, #0 - | bhi ->fff_fallback - |1: - |.if resume - | sub CARG3, CARG3, #8 // Keep resumed thread in stack for GC. - | add BASE, BASE, #8 - | sub NARGS8:RC, NARGS8:RC, #8 - |.endif - | str CARG3, L:CARG1->top - | str BASE, L->top - |2: // Move args to coroutine. - | ldrd CARG34, [BASE, RB] - | cmp RB, NARGS8:RC - | strdne CARG34, [CARG2, RB] - | add RB, RB, #8 - | bne <2 - | - | mov CARG3, #0 - | mov L:RA, L:CARG1 - | mov CARG4, #0 - | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0) - | // Returns thread status. - |4: - | ldr CARG3, L:RA->base - | mv_vmstate CARG2, INTERP - | ldr CARG4, L:RA->top - | cmp CRET1, #LUA_YIELD - | ldr BASE, L->base - | str L, [DISPATCH, #DISPATCH_GL(cur_L)] - | st_vmstate CARG2 - | bhi >8 - | subs RC, CARG4, CARG3 - | ldr CARG1, L->maxstack - | add CARG2, BASE, RC - | beq >6 // No results? - | cmp CARG2, CARG1 - | mov RB, #0 - | bhi >9 // Need to grow stack? - | - | sub CARG4, RC, #8 - | str CARG3, L:RA->top // Clear coroutine stack. - |5: // Move results from coroutine. - | ldrd CARG12, [CARG3, RB] - | cmp RB, CARG4 - | strd CARG12, [BASE, RB] - | add RB, RB, #8 - | bne <5 - |6: - |.if resume - | mvn CARG3, #~LJ_TTRUE - | add RC, RC, #16 - |7: - | str CARG3, [BASE, #-4] // Prepend true/false to results. - | sub RA, BASE, #8 - |.else - | mov RA, BASE - | add RC, RC, #8 - |.endif - | ands CARG1, PC, #FRAME_TYPE - | str PC, SAVE_PC - | str RC, SAVE_MULTRES - | beq ->BC_RET_Z - | b ->vm_return - | - |8: // Coroutine returned with error (at co->top-1). - |.if resume - | ldrd CARG12, [CARG4, #-8]! - | mvn CARG3, #~LJ_TFALSE - | mov RC, #(2+1)*8 - | str CARG4, L:RA->top // Remove error from coroutine stack. - | strd CARG12, [BASE] // Copy error message. - | b <7 - |.else - | mov CARG1, L - | mov CARG2, L:RA - | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) - | // Never returns. - |.endif - | - |9: // Handle stack expansion on return from yield. - | mov CARG1, L - | lsr CARG2, RC, #3 - | bl extern lj_state_growstack // (lua_State *L, int n) - | mov CRET1, #0 - | b <4 - |.endmacro - | - | coroutine_resume_wrap 1 // coroutine.resume - | coroutine_resume_wrap 0 // coroutine.wrap - | - |.ffunc coroutine_yield - | ldr CARG1, L->cframe - | add CARG2, BASE, NARGS8:RC - | str BASE, L->base - | tst CARG1, #CFRAME_RESUME - | str CARG2, L->top - | mov CRET1, #LUA_YIELD - | mov CARG3, #0 - | beq ->fff_fallback - | str CARG3, L->cframe - | strb CRET1, L->status - | b ->vm_leave_unw - | - |//-- Math library ------------------------------------------------------- - | - |.macro math_round, func - | .ffunc_1 math_ .. func - | checktp CARG2, LJ_TISNUM - | beq ->fff_restv - | bhi ->fff_fallback - | // Round FP value and normalize result. - | lsl CARG3, CARG2, #1 - | adds RB, CARG3, #0x00200000 - | bpl >2 // |x| < 1? - | mvn CARG4, #0x3e0 - | subs RB, CARG4, RB, asr #21 - | lsl CARG4, CARG2, #11 - | lsl CARG3, CARG1, #11 - | orr CARG4, CARG4, #0x80000000 - | rsb INS, RB, #32 - | orr CARG4, CARG4, CARG1, lsr #21 - | bls >3 // |x| >= 2^31? - | orr CARG3, CARG3, CARG4, lsl INS - | lsr CARG1, CARG4, RB - |.if "func" == "floor" - | tst CARG3, CARG2, asr #31 - | addne CARG1, CARG1, #1 - |.else - | bics CARG3, CARG3, CARG2, asr #31 - | addsne CARG1, CARG1, #1 - | ldrdvs CARG12, >9 - | bvs ->fff_restv - |.endif - | cmp CARG2, #0 - | rsblt CARG1, CARG1, #0 - |1: - | mvn CARG2, #~LJ_TISNUM - | b ->fff_restv - | - |2: // |x| < 1 - | bcs ->fff_restv // |x| is not finite. - | orr CARG3, CARG3, CARG1 // ztest = abs(hi) | lo - |.if "func" == "floor" - | tst CARG3, CARG2, asr #31 // return (ztest & sign) == 0 ? 0 : -1 - | moveq CARG1, #0 - | mvnne CARG1, #0 - |.else - | bics CARG3, CARG3, CARG2, asr #31 // return (ztest & ~sign) == 0 ? 0 : 1 - | moveq CARG1, #0 - | movne CARG1, #1 - |.endif - | mvn CARG2, #~LJ_TISNUM - | b ->fff_restv - | - |3: // |x| >= 2^31. Check for x == -(2^31). - | cmpeq CARG4, #0x80000000 - |.if "func" == "floor" - | cmpeq CARG3, #0 - |.endif - | bne >4 - | cmp CARG2, #0 - | movmi CARG1, #0x80000000 - | bmi <1 - |4: - | bl ->vm_..func.._sf - | b ->fff_restv - |.endmacro - | - | math_round floor - | math_round ceil - | - |.align 8 - |9: - | .long 0x00000000, 0x41e00000 // 2^31. - | - |.ffunc_1 math_abs - | checktp CARG2, LJ_TISNUM - | bhi ->fff_fallback - | bicne CARG2, CARG2, #0x80000000 - | bne ->fff_restv - | cmp CARG1, #0 - | rsbslt CARG1, CARG1, #0 - | ldrdvs CARG12, <9 - | // Fallthrough. - | - |->fff_restv: - | // CARG12 = TValue result. - | ldr PC, [BASE, FRAME_PC] - | strd CARG12, [BASE, #-8] - |->fff_res1: - | // PC = return. - | mov RC, #(1+1)*8 - |->fff_res: - | // RC = (nresults+1)*8, PC = return. - | ands CARG1, PC, #FRAME_TYPE - | ldreq INS, [PC, #-4] - | str RC, SAVE_MULTRES - | sub RA, BASE, #8 - | bne ->vm_return - | decode_RB8 RB, INS - |5: - | cmp RB, RC // More results expected? - | bhi >6 - | decode_RA8 CARG1, INS - | ins_next1 - | ins_next2 - | // Adjust BASE. KBASE is assumed to be set for the calling frame. - | sub BASE, RA, CARG1 - | ins_next3 - | - |6: // Fill up results with nil. - | add CARG2, RA, RC - | mvn CARG1, #~LJ_TNIL - | add RC, RC, #8 - | str CARG1, [CARG2, #-4] - | b <5 - | - |.macro math_extern, func - |.if HFABI - | .ffunc_d math_ .. func - |.else - | .ffunc_n math_ .. func - |.endif - | .IOS mov RA, BASE - | bl extern func - | .IOS mov BASE, RA - |.if HFABI - | b ->fff_resd - |.else - | b ->fff_restv - |.endif - |.endmacro - | - |.macro math_extern2, func - |.if HFABI - | .ffunc_dd math_ .. func - |.else - | .ffunc_nn math_ .. func - |.endif - | .IOS mov RA, BASE - | bl extern func - | .IOS mov BASE, RA - |.if HFABI - | b ->fff_resd - |.else - | b ->fff_restv - |.endif - |.endmacro - | - |.if FPU - | .ffunc_d math_sqrt - | vsqrt.f64 d0, d0 - |->fff_resd: - | ldr PC, [BASE, FRAME_PC] - | vstr d0, [BASE, #-8] - | b ->fff_res1 - |.else - | math_extern sqrt - |.endif - | - |.ffunc math_log - |.if HFABI - | ldr CARG2, [BASE, #4] - | cmp NARGS8:RC, #8 // Need exactly 1 argument. - | vldr d0, [BASE] - | bne ->fff_fallback - |.else - | ldrd CARG12, [BASE] - | cmp NARGS8:RC, #8 // Need exactly 1 argument. - | bne ->fff_fallback - |.endif - | checktp CARG2, LJ_TISNUM - | bhs ->fff_fallback - | .IOS mov RA, BASE - | bl extern log - | .IOS mov BASE, RA - |.if HFABI - | b ->fff_resd - |.else - | b ->fff_restv - |.endif - | - | math_extern log10 - | math_extern exp - | math_extern sin - | math_extern cos - | math_extern tan - | math_extern asin - | math_extern acos - | math_extern atan - | math_extern sinh - | math_extern cosh - | math_extern tanh - | math_extern2 pow - | math_extern2 atan2 - | math_extern2 fmod - | - |.if HFABI - | .ffunc math_ldexp - | ldr CARG4, [BASE, #4] - | ldrd CARG12, [BASE, #8] - | cmp NARGS8:RC, #16 - | blo ->fff_fallback - | vldr d0, [BASE] - | checktp CARG4, LJ_TISNUM - | bhs ->fff_fallback - | checktp CARG2, LJ_TISNUM - | bne ->fff_fallback - | .IOS mov RA, BASE - | bl extern ldexp // (double x, int exp) - | .IOS mov BASE, RA - | b ->fff_resd - |.else - |.ffunc_2 math_ldexp - | checktp CARG2, LJ_TISNUM - | bhs ->fff_fallback - | checktp CARG4, LJ_TISNUM - | bne ->fff_fallback - | .IOS mov RA, BASE - | bl extern ldexp // (double x, int exp) - | .IOS mov BASE, RA - | b ->fff_restv - |.endif - | - |.if HFABI - |.ffunc_d math_frexp - | mov CARG1, sp - | .IOS mov RA, BASE - | bl extern frexp - | .IOS mov BASE, RA - | ldr CARG3, [sp] - | mvn CARG4, #~LJ_TISNUM - | ldr PC, [BASE, FRAME_PC] - | vstr d0, [BASE, #-8] - | mov RC, #(2+1)*8 - | strd CARG34, [BASE] - | b ->fff_res - |.else - |.ffunc_n math_frexp - | mov CARG3, sp - | .IOS mov RA, BASE - | bl extern frexp - | .IOS mov BASE, RA - | ldr CARG3, [sp] - | mvn CARG4, #~LJ_TISNUM - | ldr PC, [BASE, FRAME_PC] - | strd CARG12, [BASE, #-8] - | mov RC, #(2+1)*8 - | strd CARG34, [BASE] - | b ->fff_res - |.endif - | - |.if HFABI - |.ffunc_d math_modf - | sub CARG1, BASE, #8 - | ldr PC, [BASE, FRAME_PC] - | .IOS mov RA, BASE - | bl extern modf - | .IOS mov BASE, RA - | mov RC, #(2+1)*8 - | vstr d0, [BASE] - | b ->fff_res - |.else - |.ffunc_n math_modf - | sub CARG3, BASE, #8 - | ldr PC, [BASE, FRAME_PC] - | .IOS mov RA, BASE - | bl extern modf - | .IOS mov BASE, RA - | mov RC, #(2+1)*8 - | strd CARG12, [BASE] - | b ->fff_res - |.endif - | - |.macro math_minmax, name, cond, fcond - |.if FPU - | .ffunc_1 name - | add RB, BASE, RC - | checktp CARG2, LJ_TISNUM - | add RA, BASE, #8 - | bne >4 - |1: // Handle integers. - | ldrd CARG34, [RA] - | cmp RA, RB - | bhs ->fff_restv - | checktp CARG4, LJ_TISNUM - | bne >3 - | cmp CARG1, CARG3 - | add RA, RA, #8 - | mov..cond CARG1, CARG3 - | b <1 - |3: // Convert intermediate result to number and continue below. - | vmov s4, CARG1 - | bhi ->fff_fallback - | vldr d1, [RA] - | vcvt.f64.s32 d0, s4 - | b >6 - | - |4: - | vldr d0, [BASE] - | bhi ->fff_fallback - |5: // Handle numbers. - | ldrd CARG34, [RA] - | vldr d1, [RA] - | cmp RA, RB - | bhs ->fff_resd - | checktp CARG4, LJ_TISNUM - | bhs >7 - |6: - | vcmp.f64 d0, d1 - | vmrs - | add RA, RA, #8 - | vmov..fcond.f64 d0, d1 - | b <5 - |7: // Convert integer to number and continue above. - | vmov s4, CARG3 - | bhi ->fff_fallback - | vcvt.f64.s32 d1, s4 - | b <6 - | - |.else - | - | .ffunc_1 name - | checktp CARG2, LJ_TISNUM - | mov RA, #8 - | bne >4 - |1: // Handle integers. - | ldrd CARG34, [BASE, RA] - | cmp RA, RC - | bhs ->fff_restv - | checktp CARG4, LJ_TISNUM - | bne >3 - | cmp CARG1, CARG3 - | add RA, RA, #8 - | mov..cond CARG1, CARG3 - | b <1 - |3: // Convert intermediate result to number and continue below. - | bhi ->fff_fallback - | bl extern __aeabi_i2d - | ldrd CARG34, [BASE, RA] - | b >6 - | - |4: - | bhi ->fff_fallback - |5: // Handle numbers. - | ldrd CARG34, [BASE, RA] - | cmp RA, RC - | bhs ->fff_restv - | checktp CARG4, LJ_TISNUM - | bhs >7 - |6: - | bl extern __aeabi_cdcmple - | add RA, RA, #8 - | mov..fcond CARG1, CARG3 - | mov..fcond CARG2, CARG4 - | b <5 - |7: // Convert integer to number and continue above. - | bhi ->fff_fallback - | strd CARG12, TMPD - | mov CARG1, CARG3 - | bl extern __aeabi_i2d - | ldrd CARG34, TMPD - | b <6 - |.endif - |.endmacro - | - | math_minmax math_min, gt, hi - | math_minmax math_max, lt, lo - | - |//-- String library ----------------------------------------------------- - | - |.ffunc string_byte // Only handle the 1-arg case here. - | ldrd CARG12, [BASE] - | ldr PC, [BASE, FRAME_PC] - | cmp NARGS8:RC, #8 - | checktpeq CARG2, LJ_TSTR // Need exactly 1 argument. - | bne ->fff_fallback - | ldr CARG3, STR:CARG1->len - | ldrb CARG1, STR:CARG1[1] // Access is always ok (NUL at end). - | mvn CARG2, #~LJ_TISNUM - | cmp CARG3, #0 - | moveq RC, #(0+1)*8 - | movne RC, #(1+1)*8 - | strd CARG12, [BASE, #-8] - | b ->fff_res - | - |.ffunc string_char // Only handle the 1-arg case here. - | ffgccheck - | ldrd CARG12, [BASE] - | ldr PC, [BASE, FRAME_PC] - | cmp NARGS8:RC, #8 // Need exactly 1 argument. - | checktpeq CARG2, LJ_TISNUM - | bicseq CARG4, CARG1, #255 - | mov CARG3, #1 - | bne ->fff_fallback - | str CARG1, TMPD - | mov CARG2, TMPDp // Points to stack. Little-endian. - |->fff_newstr: - | // CARG2 = str, CARG3 = len. - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_str_new // (lua_State *L, char *str, size_t l) - |->fff_resstr: - | // Returns GCstr *. - | ldr BASE, L->base - | mvn CARG2, #~LJ_TSTR - | b ->fff_restv - | - |.ffunc string_sub - | ffgccheck - | ldrd CARG12, [BASE] - | ldrd CARG34, [BASE, #16] - | cmp NARGS8:RC, #16 - | mvn RB, #0 - | beq >1 - | blo ->fff_fallback - | checktp CARG4, LJ_TISNUM - | mov RB, CARG3 - | bne ->fff_fallback - |1: - | ldrd CARG34, [BASE, #8] - | checktp CARG2, LJ_TSTR - | ldreq CARG2, STR:CARG1->len - | checktpeq CARG4, LJ_TISNUM - | bne ->fff_fallback - | // CARG1 = str, CARG2 = str->len, CARG3 = start, RB = end - | add CARG4, CARG2, #1 - | cmp CARG3, #0 // if (start < 0) start += len+1 - | addlt CARG3, CARG3, CARG4 - | cmp CARG3, #1 // if (start < 1) start = 1 - | movlt CARG3, #1 - | cmp RB, #0 // if (end < 0) end += len+1 - | addlt RB, RB, CARG4 - | bic RB, RB, RB, asr #31 // if (end < 0) end = 0 - | cmp RB, CARG2 // if (end > len) end = len - | add CARG1, STR:CARG1, #sizeof(GCstr)-1 - | movgt RB, CARG2 - | add CARG2, CARG1, CARG3 - | subs CARG3, RB, CARG3 // len = end - start - | add CARG3, CARG3, #1 // len += 1 - | bge ->fff_newstr - |->fff_emptystr: - | sub STR:CARG1, DISPATCH, #-DISPATCH_GL(strempty) - | mvn CARG2, #~LJ_TSTR - | b ->fff_restv - | - |.macro ffstring_op, name - | .ffunc string_ .. name - | ffgccheck - | ldr CARG3, [BASE, #4] - | cmp NARGS8:RC, #8 - | ldr STR:CARG2, [BASE] - | blo ->fff_fallback - | sub SBUF:CARG1, DISPATCH, #-DISPATCH_GL(tmpbuf) - | checkstr CARG3, ->fff_fallback - | ldr CARG4, SBUF:CARG1->b - | str BASE, L->base - | str PC, SAVE_PC - | str L, SBUF:CARG1->L - | str CARG4, SBUF:CARG1->p - | bl extern lj_buf_putstr_ .. name - | bl extern lj_buf_tostr - | b ->fff_resstr - |.endmacro - | - |ffstring_op reverse - |ffstring_op lower - |ffstring_op upper - | - |//-- Bit library -------------------------------------------------------- - | - |// FP number to bit conversion for soft-float. Clobbers r0-r3. - |->vm_tobit_fb: - | bhi ->fff_fallback - |->vm_tobit: - | lsl RB, CARG2, #1 - | adds RB, RB, #0x00200000 - | movpl CARG1, #0 // |x| < 1? - | bxpl lr - | mvn CARG4, #0x3e0 - | subs RB, CARG4, RB, asr #21 - | bmi >1 // |x| >= 2^32? - | lsl CARG4, CARG2, #11 - | orr CARG4, CARG4, #0x80000000 - | orr CARG4, CARG4, CARG1, lsr #21 - | cmp CARG2, #0 - | lsr CARG1, CARG4, RB - | rsblt CARG1, CARG1, #0 - | bx lr - |1: - | add RB, RB, #21 - | lsr CARG4, CARG1, RB - | rsb RB, RB, #20 - | lsl CARG1, CARG2, #12 - | cmp CARG2, #0 - | orr CARG1, CARG4, CARG1, lsl RB - | rsblt CARG1, CARG1, #0 - | bx lr - | - |.macro .ffunc_bit, name - | .ffunc_1 bit_..name - | checktp CARG2, LJ_TISNUM - | blne ->vm_tobit_fb - |.endmacro - | - |.ffunc_bit tobit - | mvn CARG2, #~LJ_TISNUM - | b ->fff_restv - | - |.macro .ffunc_bit_op, name, ins - | .ffunc_bit name - | mov CARG3, CARG1 - | mov RA, #8 - |1: - | ldrd CARG12, [BASE, RA] - | cmp RA, NARGS8:RC - | add RA, RA, #8 - | bge >2 - | checktp CARG2, LJ_TISNUM - | blne ->vm_tobit_fb - | ins CARG3, CARG3, CARG1 - | b <1 - |.endmacro - | - |.ffunc_bit_op band, and - |.ffunc_bit_op bor, orr - |.ffunc_bit_op bxor, eor - | - |2: - | mvn CARG4, #~LJ_TISNUM - | ldr PC, [BASE, FRAME_PC] - | strd CARG34, [BASE, #-8] - | b ->fff_res1 - | - |.ffunc_bit bswap - | eor CARG3, CARG1, CARG1, ror #16 - | bic CARG3, CARG3, #0x00ff0000 - | ror CARG1, CARG1, #8 - | mvn CARG2, #~LJ_TISNUM - | eor CARG1, CARG1, CARG3, lsr #8 - | b ->fff_restv - | - |.ffunc_bit bnot - | mvn CARG1, CARG1 - | mvn CARG2, #~LJ_TISNUM - | b ->fff_restv - | - |.macro .ffunc_bit_sh, name, ins, shmod - | .ffunc bit_..name - | ldrd CARG12, [BASE, #8] - | cmp NARGS8:RC, #16 - | blo ->fff_fallback - | checktp CARG2, LJ_TISNUM - | blne ->vm_tobit_fb - |.if shmod == 0 - | and RA, CARG1, #31 - |.else - | rsb RA, CARG1, #0 - |.endif - | ldrd CARG12, [BASE] - | checktp CARG2, LJ_TISNUM - | blne ->vm_tobit_fb - | ins CARG1, CARG1, RA - | mvn CARG2, #~LJ_TISNUM - | b ->fff_restv - |.endmacro - | - |.ffunc_bit_sh lshift, lsl, 0 - |.ffunc_bit_sh rshift, lsr, 0 - |.ffunc_bit_sh arshift, asr, 0 - |.ffunc_bit_sh rol, ror, 1 - |.ffunc_bit_sh ror, ror, 0 - | - |//----------------------------------------------------------------------- - | - |->fff_fallback: // Call fast function fallback handler. - | // BASE = new base, RC = nargs*8 - | ldr CARG3, [BASE, FRAME_FUNC] - | ldr CARG2, L->maxstack - | add CARG1, BASE, NARGS8:RC - | ldr PC, [BASE, FRAME_PC] // Fallback may overwrite PC. - | str CARG1, L->top - | ldr CARG3, CFUNC:CARG3->f - | str BASE, L->base - | add CARG1, CARG1, #8*LUA_MINSTACK - | str PC, SAVE_PC // Redundant (but a defined value). - | cmp CARG1, CARG2 - | mov CARG1, L - | bhi >5 // Need to grow stack. - | blx CARG3 // (lua_State *L) - | // Either throws an error, or recovers and returns -1, 0 or nresults+1. - | ldr BASE, L->base - | cmp CRET1, #0 - | lsl RC, CRET1, #3 - | sub RA, BASE, #8 - | bgt ->fff_res // Returned nresults+1? - |1: // Returned 0 or -1: retry fast path. - | ldr CARG1, L->top - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] - | sub NARGS8:RC, CARG1, BASE - | bne ->vm_call_tail // Returned -1? - | ins_callt // Returned 0: retry fast path. - | - |// Reconstruct previous base for vmeta_call during tailcall. - |->vm_call_tail: - | ands CARG1, PC, #FRAME_TYPE - | bic CARG2, PC, #FRAME_TYPEP - | ldreq INS, [PC, #-4] - | andeq CARG2, MASKR8, INS, lsr #5 // Conditional decode_RA8. - | addeq CARG2, CARG2, #8 - | sub RB, BASE, CARG2 - | b ->vm_call_dispatch // Resolve again for tailcall. - | - |5: // Grow stack for fallback handler. - | mov CARG2, #LUA_MINSTACK - | bl extern lj_state_growstack // (lua_State *L, int n) - | ldr BASE, L->base - | cmp CARG1, CARG1 // Set zero-flag to force retry. - | b <1 - | - |->fff_gcstep: // Call GC step function. - | // BASE = new base, RC = nargs*8 - | mov RA, lr - | str BASE, L->base - | add CARG2, BASE, NARGS8:RC - | str PC, SAVE_PC // Redundant (but a defined value). - | str CARG2, L->top - | mov CARG1, L - | bl extern lj_gc_step // (lua_State *L) - | ldr BASE, L->base - | mov lr, RA // Help return address predictor. - | ldr CFUNC:CARG3, [BASE, FRAME_FUNC] - | bx lr - | - |//----------------------------------------------------------------------- - |//-- Special dispatch targets ------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_record: // Dispatch target for recording phase. - |.if JIT - | ldrb CARG1, [DISPATCH, #DISPATCH_GL(hookmask)] - | tst CARG1, #HOOK_VMEVENT // No recording while in vmevent. - | bne >5 - | // Decrement the hookcount for consistency, but always do the call. - | ldr CARG2, [DISPATCH, #DISPATCH_GL(hookcount)] - | tst CARG1, #HOOK_ACTIVE - | bne >1 - | sub CARG2, CARG2, #1 - | tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT - | strne CARG2, [DISPATCH, #DISPATCH_GL(hookcount)] - | b >1 - |.endif - | - |->vm_rethook: // Dispatch target for return hooks. - | ldrb CARG1, [DISPATCH, #DISPATCH_GL(hookmask)] - | tst CARG1, #HOOK_ACTIVE // Hook already active? - | beq >1 - |5: // Re-dispatch to static ins. - | decode_OP OP, INS - | add OP, DISPATCH, OP, lsl #2 - | ldr pc, [OP, #GG_DISP2STATIC] - | - |->vm_inshook: // Dispatch target for instr/line hooks. - | ldrb CARG1, [DISPATCH, #DISPATCH_GL(hookmask)] - | ldr CARG2, [DISPATCH, #DISPATCH_GL(hookcount)] - | tst CARG1, #HOOK_ACTIVE // Hook already active? - | bne <5 - | tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT - | beq <5 - | subs CARG2, CARG2, #1 - | str CARG2, [DISPATCH, #DISPATCH_GL(hookcount)] - | beq >1 - | tst CARG1, #LUA_MASKLINE - | beq <5 - |1: - | mov CARG1, L - | str BASE, L->base - | mov CARG2, PC - | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. - | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) - |3: - | ldr BASE, L->base - |4: // Re-dispatch to static ins. - | ldrb OP, [PC, #-4] - | ldr INS, [PC, #-4] - | add OP, DISPATCH, OP, lsl #2 - | ldr OP, [OP, #GG_DISP2STATIC] - | decode_RA8 RA, INS - | decode_RD RC, INS - | bx OP - | - |->cont_hook: // Continue from hook yield. - | ldr CARG1, [CARG4, #-24] - | add PC, PC, #4 - | str CARG1, SAVE_MULTRES // Restore MULTRES for *M ins. - | b <4 - | - |->vm_hotloop: // Hot loop counter underflow. - |.if JIT - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Same as curr_topL(L). - | sub CARG1, DISPATCH, #-GG_DISP2J - | str PC, SAVE_PC - | ldr CARG3, LFUNC:CARG3->field_pc - | mov CARG2, PC - | str L, [DISPATCH, #DISPATCH_J(L)] - | ldrb CARG3, [CARG3, #PC2PROTO(framesize)] - | str BASE, L->base - | add CARG3, BASE, CARG3, lsl #3 - | str CARG3, L->top - | bl extern lj_trace_hot // (jit_State *J, const BCIns *pc) - | b <3 - |.endif - | - |->vm_callhook: // Dispatch target for call hooks. - | mov CARG2, PC - |.if JIT - | b >1 - |.endif - | - |->vm_hotcall: // Hot call counter underflow. - |.if JIT - | orr CARG2, PC, #1 - |1: - |.endif - | add CARG4, BASE, RC - | str PC, SAVE_PC - | mov CARG1, L - | str BASE, L->base - | sub RA, RA, BASE - | str CARG4, L->top - | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc) - | // Returns ASMFunction. - | ldr BASE, L->base - | ldr CARG4, L->top - | mov CARG2, #0 - | add RA, BASE, RA - | sub NARGS8:RC, CARG4, BASE - | str CARG2, SAVE_PC // Invalidate for subsequent line hook. - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] - | ldr INS, [PC, #-4] - | bx CRET1 - | - |->cont_stitch: // Trace stitching. - |.if JIT - | // RA = resultptr, CARG4 = meta base - | ldr RB, SAVE_MULTRES - | ldr INS, [PC, #-4] - | ldr TRACE:CARG3, [CARG4, #-24] // Save previous trace. - | subs RB, RB, #8 - | decode_RA8 RC, INS // Call base. - | beq >2 - |1: // Move results down. - | ldrd CARG12, [RA] - | add RA, RA, #8 - | subs RB, RB, #8 - | strd CARG12, [BASE, RC] - | add RC, RC, #8 - | bne <1 - |2: - | decode_RA8 RA, INS - | decode_RB8 RB, INS - | add RA, RA, RB - |3: - | cmp RA, RC - | mvn CARG2, #~LJ_TNIL - | bhi >9 // More results wanted? - | - | ldrh RA, TRACE:CARG3->traceno - | ldrh RC, TRACE:CARG3->link - | cmp RC, RA - | beq ->cont_nop // Blacklisted. - | cmp RC, #0 - | bne =>BC_JLOOP // Jump to stitched trace. - | - | // Stitch a new trace to the previous trace. - | str RA, [DISPATCH, #DISPATCH_J(exitno)] - | str L, [DISPATCH, #DISPATCH_J(L)] - | str BASE, L->base - | sub CARG1, DISPATCH, #-GG_DISP2J - | mov CARG2, PC - | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) - | ldr BASE, L->base - | b ->cont_nop - | - |9: // Fill up results with nil. - | strd CARG12, [BASE, RC] - | add RC, RC, #8 - | b <3 - |.endif - | - |->vm_profhook: // Dispatch target for profiler hook. -#if LJ_HASPROFILE - | mov CARG1, L - | str BASE, L->base - | mov CARG2, PC - | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) - | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. - | ldr BASE, L->base - | sub PC, PC, #4 - | b ->cont_nop -#endif - | - |//----------------------------------------------------------------------- - |//-- Trace exit handler ------------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_exit_handler: - |.if JIT - | sub sp, sp, #12 - | push {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12} - | ldr CARG1, [sp, #64] // Load original value of lr. - | ldr DISPATCH, [lr] // Load DISPATCH. - | add CARG3, sp, #64 // Recompute original value of sp. - | mv_vmstate CARG4, EXIT - | str CARG3, [sp, #52] // Store sp in RID_SP - | st_vmstate CARG4 - | ldr CARG2, [CARG1, #-4]! // Get exit instruction. - | str CARG1, [sp, #56] // Store exit pc in RID_LR and RID_PC. - | str CARG1, [sp, #60] - |.if FPU - | vpush {d0-d15} - |.endif - | lsl CARG2, CARG2, #8 - | add CARG1, CARG1, CARG2, asr #6 - | ldr CARG2, [lr, #4] // Load exit stub group offset. - | sub CARG1, CARG1, lr - | ldr L, [DISPATCH, #DISPATCH_GL(cur_L)] - | add CARG1, CARG2, CARG1, lsr #2 // Compute exit number. - | ldr BASE, [DISPATCH, #DISPATCH_GL(jit_base)] - | str CARG1, [DISPATCH, #DISPATCH_J(exitno)] - | mov CARG4, #0 - | str BASE, L->base - | str L, [DISPATCH, #DISPATCH_J(L)] - | str CARG4, [DISPATCH, #DISPATCH_GL(jit_base)] - | sub CARG1, DISPATCH, #-GG_DISP2J - | mov CARG2, sp - | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) - | // Returns MULTRES (unscaled) or negated error code. - | ldr CARG2, L->cframe - | ldr BASE, L->base - | bic CARG2, CARG2, #~CFRAME_RAWMASK // Use two steps: bic sp is deprecated. - | mov sp, CARG2 - | ldr PC, SAVE_PC // Get SAVE_PC. - | str L, SAVE_L // Set SAVE_L (on-trace resume/yield). - | b >1 - |.endif - |->vm_exit_interp: - | // CARG1 = MULTRES or negated error code, BASE, PC and DISPATCH set. - |.if JIT - | ldr L, SAVE_L - |1: - | cmp CARG1, #0 - | blt >9 // Check for error from exit. - | lsl RC, CARG1, #3 - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | str RC, SAVE_MULTRES - | mov CARG3, #0 - | str BASE, L->base - | ldr CARG2, LFUNC:CARG2->field_pc - | str CARG3, [DISPATCH, #DISPATCH_GL(jit_base)] - | mv_vmstate CARG4, INTERP - | ldr KBASE, [CARG2, #PC2PROTO(k)] - | // Modified copy of ins_next which handles function header dispatch, too. - | ldrb OP, [PC] - | mov MASKR8, #255 - | ldr INS, [PC], #4 - | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. - | st_vmstate CARG4 - | cmp OP, #BC_FUNCC+2 // Fast function? - | bhs >4 - |2: - | cmp OP, #BC_FUNCF // Function header? - | ldr OP, [DISPATCH, OP, lsl #2] - | decode_RA8 RA, INS - | lsrlo RC, INS, #16 // No: Decode operands A*8 and D. - | subhs RC, RC, #8 - | addhs RA, RA, BASE // Yes: RA = BASE+framesize*8, RC = nargs*8 - | ldrhs CARG3, [BASE, FRAME_FUNC] - | bx OP - | - |4: // Check frame below fast function. - | ldr CARG1, [BASE, FRAME_PC] - | ands CARG2, CARG1, #FRAME_TYPE - | bne <2 // Trace stitching continuation? - | // Otherwise set KBASE for Lua function below fast function. - | ldr CARG3, [CARG1, #-4] - | decode_RA8 CARG1, CARG3 - | sub CARG2, BASE, CARG1 - | ldr LFUNC:CARG3, [CARG2, #-16] - | ldr CARG3, LFUNC:CARG3->field_pc - | ldr KBASE, [CARG3, #PC2PROTO(k)] - | b <2 - | - |9: // Rethrow error from the right C frame. - | rsb CARG2, CARG1, #0 - | mov CARG1, L - | bl extern lj_err_throw // (lua_State *L, int errcode) - |.endif - | - |//----------------------------------------------------------------------- - |//-- Math helper functions ---------------------------------------------- - |//----------------------------------------------------------------------- - | - |// FP value rounding. Called from JIT code. - |// - |// double lj_vm_floor/ceil/trunc(double x); - |.macro vm_round, func, hf - |.if hf == 1 - | vmov CARG1, CARG2, d0 - |.endif - | lsl CARG3, CARG2, #1 - | adds RB, CARG3, #0x00200000 - | bpl >2 // |x| < 1? - | mvn CARG4, #0x3cc - | subs RB, CARG4, RB, asr #21 // 2^0: RB = 51, 2^51: RB = 0. - | bxlo lr // |x| >= 2^52: done. - | mvn CARG4, #1 - | bic CARG3, CARG1, CARG4, lsl RB // ztest = lo & ~lomask - | and CARG1, CARG1, CARG4, lsl RB // lo &= lomask - | subs RB, RB, #32 - | bicpl CARG4, CARG2, CARG4, lsl RB // |x| <= 2^20: ztest |= hi & ~himask - | orrpl CARG3, CARG3, CARG4 - | mvnpl CARG4, #1 - | andpl CARG2, CARG2, CARG4, lsl RB // |x| <= 2^20: hi &= himask - |.if "func" == "floor" - | tst CARG3, CARG2, asr #31 // iszero = ((ztest & signmask) == 0) - |.else - | bics CARG3, CARG3, CARG2, asr #31 // iszero = ((ztest & ~signmask) == 0) - |.endif - |.if hf == 1 - | vmoveq d0, CARG1, CARG2 - |.endif - | bxeq lr // iszero: done. - | mvn CARG4, #1 - | cmp RB, #0 - | lslpl CARG3, CARG4, RB - | mvnmi CARG3, #0 - | add RB, RB, #32 - | subs CARG1, CARG1, CARG4, lsl RB // lo = lo-lomask - | sbc CARG2, CARG2, CARG3 // hi = hi-himask+carry - |.if hf == 1 - | vmov d0, CARG1, CARG2 - |.endif - | bx lr - | - |2: // |x| < 1: - | bxcs lr // |x| is not finite. - | orr CARG3, CARG3, CARG1 // ztest = (2*hi) | lo - |.if "func" == "floor" - | tst CARG3, CARG2, asr #31 // iszero = ((ztest & signmask) == 0) - |.else - | bics CARG3, CARG3, CARG2, asr #31 // iszero = ((ztest & ~signmask) == 0) - |.endif - | mov CARG1, #0 // lo = 0 - | and CARG2, CARG2, #0x80000000 - | ldrne CARG4, <9 // hi = sign(x) | (iszero ? 0.0 : 1.0) - | orrne CARG2, CARG2, CARG4 - |.if hf == 1 - | vmov d0, CARG1, CARG2 - |.endif - | bx lr - |.endmacro - | - |9: - | .long 0x3ff00000 // hiword(+1.0) - | - |->vm_floor: - |.if HFABI - | vm_round floor, 1 - |.endif - |->vm_floor_sf: - | vm_round floor, 0 - | - |->vm_ceil: - |.if HFABI - | vm_round ceil, 1 - |.endif - |->vm_ceil_sf: - | vm_round ceil, 0 - | - |.macro vm_trunc, hf - |.if JIT - |.if hf == 1 - | vmov CARG1, CARG2, d0 - |.endif - | lsl CARG3, CARG2, #1 - | adds RB, CARG3, #0x00200000 - | andpl CARG2, CARG2, #0x80000000 // |x| < 1? hi = sign(x), lo = 0. - | movpl CARG1, #0 - |.if hf == 1 - | vmovpl d0, CARG1, CARG2 - |.endif - | bxpl lr - | mvn CARG4, #0x3cc - | subs RB, CARG4, RB, asr #21 // 2^0: RB = 51, 2^51: RB = 0. - | bxlo lr // |x| >= 2^52: already done. - | mvn CARG4, #1 - | and CARG1, CARG1, CARG4, lsl RB // lo &= lomask - | subs RB, RB, #32 - | andpl CARG2, CARG2, CARG4, lsl RB // |x| <= 2^20: hi &= himask - |.if hf == 1 - | vmov d0, CARG1, CARG2 - |.endif - | bx lr - |.endif - |.endmacro - | - |->vm_trunc: - |.if HFABI - | vm_trunc 1 - |.endif - |->vm_trunc_sf: - | vm_trunc 0 - | - | // double lj_vm_mod(double dividend, double divisor); - |->vm_mod: - |.if FPU - | // Special calling convention. Also, RC (r11) is not preserved. - | vdiv.f64 d0, d6, d7 - | mov RC, lr - | vmov CARG1, CARG2, d0 - | bl ->vm_floor_sf - | vmov d0, CARG1, CARG2 - | vmul.f64 d0, d0, d7 - | mov lr, RC - | vsub.f64 d6, d6, d0 - | bx lr - |.else - | push {r0, r1, r2, r3, r4, lr} - | bl extern __aeabi_ddiv - | bl ->vm_floor_sf - | ldrd CARG34, [sp, #8] - | bl extern __aeabi_dmul - | ldrd CARG34, [sp] - | eor CARG2, CARG2, #0x80000000 - | bl extern __aeabi_dadd - | add sp, sp, #20 - | pop {pc} - |.endif - | - | // int lj_vm_modi(int dividend, int divisor); - |->vm_modi: - | ands RB, CARG1, #0x80000000 - | rsbmi CARG1, CARG1, #0 // a = |dividend| - | eor RB, RB, CARG2, asr #1 // Keep signdiff and sign(divisor). - | cmp CARG2, #0 - | rsbmi CARG2, CARG2, #0 // b = |divisor| - | subs CARG4, CARG2, #1 - | cmpne CARG1, CARG2 - | moveq CARG1, #0 // if (b == 1 || a == b) a = 0 - | tsthi CARG2, CARG4 - | andeq CARG1, CARG1, CARG4 // else if ((b & (b-1)) == 0) a &= b-1 - | bls >1 - | // Use repeated subtraction to get the remainder. - | clz CARG3, CARG1 - | clz CARG4, CARG2 - | sub CARG4, CARG4, CARG3 - | rsbs CARG3, CARG4, #31 // entry = (31-(clz(b)-clz(a)))*8 - | addne pc, pc, CARG3, lsl #3 // Duff's device. - | nop - { - int i; - for (i = 31; i >= 0; i--) { - | cmp CARG1, CARG2, lsl #i - | subhs CARG1, CARG1, CARG2, lsl #i - } - } - |1: - | cmp CARG1, #0 - | cmpne RB, #0 - | submi CARG1, CARG1, CARG2 // if (y != 0 && signdiff) y = y - b - | eors CARG2, CARG1, RB, lsl #1 - | rsbmi CARG1, CARG1, #0 // if (sign(divisor) != sign(y)) y = -y - | bx lr - | - |//----------------------------------------------------------------------- - |//-- Miscellaneous functions -------------------------------------------- - |//----------------------------------------------------------------------- - | - |//----------------------------------------------------------------------- - |//-- FFI helper functions ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |// Handler for callback functions. - |// Saveregs already performed. Callback slot number in [sp], g in r12. - |->vm_ffi_callback: - |.if FFI - |.type CTSTATE, CTState, PC - | ldr CTSTATE, GL:r12->ctype_state - | add DISPATCH, r12, #GG_G2DISP - |.if FPU - | str r4, SAVE_R4 - | add r4, sp, CFRAME_SPACE+4+8*8 - | vstmdb r4!, {d8-d15} - |.endif - |.if HFABI - | add r12, CTSTATE, #offsetof(CTState, cb.fpr[8]) - |.endif - | strd CARG34, CTSTATE->cb.gpr[2] - | strd CARG12, CTSTATE->cb.gpr[0] - |.if HFABI - | vstmdb r12!, {d0-d7} - |.endif - | ldr CARG4, [sp] - | add CARG3, sp, #CFRAME_SIZE - | mov CARG1, CTSTATE - | lsr CARG4, CARG4, #3 - | str CARG3, CTSTATE->cb.stack - | mov CARG2, sp - | str CARG4, CTSTATE->cb.slot - | str CTSTATE, SAVE_PC // Any value outside of bytecode is ok. - | bl extern lj_ccallback_enter // (CTState *cts, void *cf) - | // Returns lua_State *. - | ldr BASE, L:CRET1->base - | mv_vmstate CARG2, INTERP - | ldr RC, L:CRET1->top - | mov MASKR8, #255 - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] - | mov L, CRET1 - | sub RC, RC, BASE - | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. - | st_vmstate CARG2 - | ins_callt - |.endif - | - |->cont_ffi_callback: // Return from FFI callback. - |.if FFI - | ldr CTSTATE, [DISPATCH, #DISPATCH_GL(ctype_state)] - | str BASE, L->base - | str CARG4, L->top - | str L, CTSTATE->L - | mov CARG1, CTSTATE - | mov CARG2, RA - | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) - | ldrd CARG12, CTSTATE->cb.gpr[0] - |.if HFABI - | vldr d0, CTSTATE->cb.fpr[0] - |.endif - | b ->vm_leave_unw - |.endif - | - |->vm_ffi_call: // Call C function via FFI. - | // Caveat: needs special frame unwinding, see below. - |.if FFI - | .type CCSTATE, CCallState, r4 - | push {CCSTATE, r5, r11, lr} - | mov CCSTATE, CARG1 - | ldr CARG1, CCSTATE:CARG1->spadj - | ldrb CARG2, CCSTATE->nsp - | add CARG3, CCSTATE, #offsetof(CCallState, stack) - |.if HFABI - | add RB, CCSTATE, #offsetof(CCallState, fpr[0]) - |.endif - | mov r11, sp - | sub sp, sp, CARG1 // Readjust stack. - | subs CARG2, CARG2, #1 - |.if HFABI - | vldm RB, {d0-d7} - |.endif - | ldr RB, CCSTATE->func - | bmi >2 - |1: // Copy stack slots. - | ldr CARG4, [CARG3, CARG2, lsl #2] - | str CARG4, [sp, CARG2, lsl #2] - | subs CARG2, CARG2, #1 - | bpl <1 - |2: - | ldrd CARG12, CCSTATE->gpr[0] - | ldrd CARG34, CCSTATE->gpr[2] - | blx RB - | mov sp, r11 - |.if HFABI - | add r12, CCSTATE, #offsetof(CCallState, fpr[4]) - |.endif - | strd CRET1, CCSTATE->gpr[0] - |.if HFABI - | vstmdb r12!, {d0-d3} - |.endif - | pop {CCSTATE, r5, r11, pc} - |.endif - |// Note: vm_ffi_call must be the last function in this object file! - | - |//----------------------------------------------------------------------- -} - -/* Generate the code for a single instruction. */ -static void build_ins(BuildCtx *ctx, BCOp op, int defop) -{ - int vk = 0; - |=>defop: - - switch (op) { - - /* -- Comparison ops ---------------------------------------------------- */ - - /* Remember: all ops branch for a true comparison, fall through otherwise. */ - - case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: - | // RA = src1*8, RC = src2, JMP with RC = target - | lsl RC, RC, #3 - | ldrd CARG12, [RA, BASE]! - | ldrh RB, [PC, #2] - | ldrd CARG34, [RC, BASE]! - | add PC, PC, #4 - | add RB, PC, RB, lsl #2 - | checktp CARG2, LJ_TISNUM - | bne >3 - | checktp CARG4, LJ_TISNUM - | bne >4 - | cmp CARG1, CARG3 - if (op == BC_ISLT) { - | sublt PC, RB, #0x20000 - } else if (op == BC_ISGE) { - | subge PC, RB, #0x20000 - } else if (op == BC_ISLE) { - | suble PC, RB, #0x20000 - } else { - | subgt PC, RB, #0x20000 - } - |1: - | ins_next - | - |3: // CARG12 is not an integer. - |.if FPU - | vldr d0, [RA] - | bhi ->vmeta_comp - | // d0 is a number. - | checktp CARG4, LJ_TISNUM - | vldr d1, [RC] - | blo >5 - | bhi ->vmeta_comp - | // d0 is a number, CARG3 is an integer. - | vmov s4, CARG3 - | vcvt.f64.s32 d1, s4 - | b >5 - |4: // CARG1 is an integer, CARG34 is not an integer. - | vldr d1, [RC] - | bhi ->vmeta_comp - | // CARG1 is an integer, d1 is a number. - | vmov s4, CARG1 - | vcvt.f64.s32 d0, s4 - |5: // d0 and d1 are numbers. - | vcmp.f64 d0, d1 - | vmrs - | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. - if (op == BC_ISLT) { - | sublo PC, RB, #0x20000 - } else if (op == BC_ISGE) { - | subhs PC, RB, #0x20000 - } else if (op == BC_ISLE) { - | subls PC, RB, #0x20000 - } else { - | subhi PC, RB, #0x20000 - } - | b <1 - |.else - | bhi ->vmeta_comp - | // CARG12 is a number. - | checktp CARG4, LJ_TISNUM - | movlo RA, RB // Save RB. - | blo >5 - | bhi ->vmeta_comp - | // CARG12 is a number, CARG3 is an integer. - | mov CARG1, CARG3 - | mov RC, RA - | mov RA, RB // Save RB. - | bl extern __aeabi_i2d - | mov CARG3, CARG1 - | mov CARG4, CARG2 - | ldrd CARG12, [RC] // Restore first operand. - | b >5 - |4: // CARG1 is an integer, CARG34 is not an integer. - | bhi ->vmeta_comp - | // CARG1 is an integer, CARG34 is a number. - | mov RA, RB // Save RB. - | bl extern __aeabi_i2d - | ldrd CARG34, [RC] // Restore second operand. - |5: // CARG12 and CARG34 are numbers. - | bl extern __aeabi_cdcmple - | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. - if (op == BC_ISLT) { - | sublo PC, RA, #0x20000 - } else if (op == BC_ISGE) { - | subhs PC, RA, #0x20000 - } else if (op == BC_ISLE) { - | subls PC, RA, #0x20000 - } else { - | subhi PC, RA, #0x20000 - } - | b <1 - |.endif - break; - - case BC_ISEQV: case BC_ISNEV: - vk = op == BC_ISEQV; - | // RA = src1*8, RC = src2, JMP with RC = target - | lsl RC, RC, #3 - | ldrd CARG12, [RA, BASE]! - | ldrh RB, [PC, #2] - | ldrd CARG34, [RC, BASE]! - | add PC, PC, #4 - | add RB, PC, RB, lsl #2 - | checktp CARG2, LJ_TISNUM - | cmnls CARG4, #-LJ_TISNUM - if (vk) { - | bls ->BC_ISEQN_Z - } else { - | bls ->BC_ISNEN_Z - } - | // Either or both types are not numbers. - |.if FFI - | checktp CARG2, LJ_TCDATA - | checktpne CARG4, LJ_TCDATA - | beq ->vmeta_equal_cd - |.endif - | cmp CARG2, CARG4 // Compare types. - | bne >2 // Not the same type? - | checktp CARG2, LJ_TISPRI - | bhs >1 // Same type and primitive type? - | - | // Same types and not a primitive type. Compare GCobj or pvalue. - | cmp CARG1, CARG3 - if (vk) { - | bne >3 // Different GCobjs or pvalues? - |1: // Branch if same. - | sub PC, RB, #0x20000 - |2: // Different. - | ins_next - |3: - | checktp CARG2, LJ_TISTABUD - | bhi <2 // Different objects and not table/ud? - } else { - | beq >1 // Same GCobjs or pvalues? - | checktp CARG2, LJ_TISTABUD - | bhi >2 // Different objects and not table/ud? - } - | // Different tables or userdatas. Need to check __eq metamethod. - | // Field metatable must be at same offset for GCtab and GCudata! - | ldr TAB:RA, TAB:CARG1->metatable - | cmp TAB:RA, #0 - if (vk) { - | beq <2 // No metatable? - } else { - | beq >2 // No metatable? - } - | ldrb RA, TAB:RA->nomm - | mov CARG4, #1-vk // ne = 0 or 1. - | mov CARG2, CARG1 - | tst RA, #1<vmeta_equal // 'no __eq' flag not set? - if (vk) { - | b <2 - } else { - |2: // Branch if different. - | sub PC, RB, #0x20000 - |1: // Same. - | ins_next - } - break; - - case BC_ISEQS: case BC_ISNES: - vk = op == BC_ISEQS; - | // RA = src*8, RC = str_const (~), JMP with RC = target - | mvn RC, RC - | ldrd CARG12, [BASE, RA] - | ldrh RB, [PC, #2] - | ldr STR:CARG3, [KBASE, RC, lsl #2] - | add PC, PC, #4 - | add RB, PC, RB, lsl #2 - | checktp CARG2, LJ_TSTR - |.if FFI - | bne >7 - | cmp CARG1, CARG3 - |.else - | cmpeq CARG1, CARG3 - |.endif - if (vk) { - | subeq PC, RB, #0x20000 - |1: - } else { - |1: - | subne PC, RB, #0x20000 - } - | ins_next - | - |.if FFI - |7: - | checktp CARG2, LJ_TCDATA - | bne <1 - | b ->vmeta_equal_cd - |.endif - break; - - case BC_ISEQN: case BC_ISNEN: - vk = op == BC_ISEQN; - | // RA = src*8, RC = num_const (~), JMP with RC = target - | lsl RC, RC, #3 - | ldrd CARG12, [RA, BASE]! - | ldrh RB, [PC, #2] - | ldrd CARG34, [RC, KBASE]! - | add PC, PC, #4 - | add RB, PC, RB, lsl #2 - if (vk) { - |->BC_ISEQN_Z: - } else { - |->BC_ISNEN_Z: - } - | checktp CARG2, LJ_TISNUM - | bne >3 - | checktp CARG4, LJ_TISNUM - | bne >4 - | cmp CARG1, CARG3 - if (vk) { - | subeq PC, RB, #0x20000 - |1: - } else { - |1: - | subne PC, RB, #0x20000 - } - |2: - | ins_next - | - |3: // CARG12 is not an integer. - |.if FFI - | bhi >7 - |.else - if (!vk) { - | subhi PC, RB, #0x20000 - } - | bhi <2 - |.endif - |.if FPU - | checktp CARG4, LJ_TISNUM - | vmov s4, CARG3 - | vldr d0, [RA] - | vldrlo d1, [RC] - | vcvths.f64.s32 d1, s4 - | b >5 - |4: // CARG1 is an integer, d1 is a number. - | vmov s4, CARG1 - | vldr d1, [RC] - | vcvt.f64.s32 d0, s4 - |5: // d0 and d1 are numbers. - | vcmp.f64 d0, d1 - | vmrs - if (vk) { - | subeq PC, RB, #0x20000 - } else { - | subne PC, RB, #0x20000 - } - | b <2 - |.else - | // CARG12 is a number. - | checktp CARG4, LJ_TISNUM - | movlo RA, RB // Save RB. - | blo >5 - | // CARG12 is a number, CARG3 is an integer. - | mov CARG1, CARG3 - | mov RC, RA - |4: // CARG1 is an integer, CARG34 is a number. - | mov RA, RB // Save RB. - | bl extern __aeabi_i2d - | ldrd CARG34, [RC] // Restore other operand. - |5: // CARG12 and CARG34 are numbers. - | bl extern __aeabi_cdcmpeq - if (vk) { - | subeq PC, RA, #0x20000 - } else { - | subne PC, RA, #0x20000 - } - | b <2 - |.endif - | - |.if FFI - |7: - | checktp CARG2, LJ_TCDATA - | bne <1 - | b ->vmeta_equal_cd - |.endif - break; - - case BC_ISEQP: case BC_ISNEP: - vk = op == BC_ISEQP; - | // RA = src*8, RC = primitive_type (~), JMP with RC = target - | ldrd CARG12, [BASE, RA] - | ldrh RB, [PC, #2] - | add PC, PC, #4 - | mvn RC, RC - | add RB, PC, RB, lsl #2 - |.if FFI - | checktp CARG2, LJ_TCDATA - | beq ->vmeta_equal_cd - |.endif - | cmp CARG2, RC - if (vk) { - | subeq PC, RB, #0x20000 - } else { - | subne PC, RB, #0x20000 - } - | ins_next - break; - - /* -- Unary test and copy ops ------------------------------------------- */ - - case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: - | // RA = dst*8 or unused, RC = src, JMP with RC = target - | add RC, BASE, RC, lsl #3 - | ldrh RB, [PC, #2] - | ldrd CARG12, [RC] - | add PC, PC, #4 - | add RB, PC, RB, lsl #2 - | checktp CARG2, LJ_TTRUE - if (op == BC_ISTC || op == BC_IST) { - | subls PC, RB, #0x20000 - if (op == BC_ISTC) { - | strdls CARG12, [BASE, RA] - } - } else { - | subhi PC, RB, #0x20000 - if (op == BC_ISFC) { - | strdhi CARG12, [BASE, RA] - } - } - | ins_next - break; - - case BC_ISTYPE: - | // RA = src*8, RC = -type - | ldrd CARG12, [BASE, RA] - | ins_next1 - | cmn CARG2, RC - | ins_next2 - | bne ->vmeta_istype - | ins_next3 - break; - case BC_ISNUM: - | // RA = src*8, RC = -(TISNUM-1) - | ldrd CARG12, [BASE, RA] - | ins_next1 - | checktp CARG2, LJ_TISNUM - | ins_next2 - | bhs ->vmeta_istype - | ins_next3 - break; - - /* -- Unary ops --------------------------------------------------------- */ - - case BC_MOV: - | // RA = dst*8, RC = src - | lsl RC, RC, #3 - | ins_next1 - | ldrd CARG12, [BASE, RC] - | ins_next2 - | strd CARG12, [BASE, RA] - | ins_next3 - break; - case BC_NOT: - | // RA = dst*8, RC = src - | add RC, BASE, RC, lsl #3 - | ins_next1 - | ldr CARG1, [RC, #4] - | add RA, BASE, RA - | ins_next2 - | checktp CARG1, LJ_TTRUE - | mvnls CARG2, #~LJ_TFALSE - | mvnhi CARG2, #~LJ_TTRUE - | str CARG2, [RA, #4] - | ins_next3 - break; - case BC_UNM: - | // RA = dst*8, RC = src - | lsl RC, RC, #3 - | ldrd CARG12, [BASE, RC] - | ins_next1 - | ins_next2 - | checktp CARG2, LJ_TISNUM - | bhi ->vmeta_unm - | eorne CARG2, CARG2, #0x80000000 - | bne >5 - | rsbseq CARG1, CARG1, #0 - | ldrdvs CARG12, >9 - |5: - | strd CARG12, [BASE, RA] - | ins_next3 - | - |.align 8 - |9: - | .long 0x00000000, 0x41e00000 // 2^31. - break; - case BC_LEN: - | // RA = dst*8, RC = src - | lsl RC, RC, #3 - | ldrd CARG12, [BASE, RC] - | checkstr CARG2, >2 - | ldr CARG1, STR:CARG1->len - |1: - | mvn CARG2, #~LJ_TISNUM - | ins_next1 - | ins_next2 - | strd CARG12, [BASE, RA] - | ins_next3 - |2: - | checktab CARG2, ->vmeta_len -#if LJ_52 - | ldr TAB:CARG3, TAB:CARG1->metatable - | cmp TAB:CARG3, #0 - | bne >9 - |3: -#endif - |->BC_LEN_Z: - | .IOS mov RC, BASE - | bl extern lj_tab_len // (GCtab *t) - | // Returns uint32_t (but less than 2^31). - | .IOS mov BASE, RC - | b <1 -#if LJ_52 - |9: - | ldrb CARG4, TAB:CARG3->nomm - | tst CARG4, #1<vmeta_len -#endif - break; - - /* -- Binary ops -------------------------------------------------------- */ - - |.macro ins_arithcheck, cond, ncond, target - ||if (vk == 1) { - | cmn CARG4, #-LJ_TISNUM - | cmn..cond CARG2, #-LJ_TISNUM - ||} else { - | cmn CARG2, #-LJ_TISNUM - | cmn..cond CARG4, #-LJ_TISNUM - ||} - | b..ncond target - |.endmacro - |.macro ins_arithcheck_int, target - | ins_arithcheck eq, ne, target - |.endmacro - |.macro ins_arithcheck_num, target - | ins_arithcheck lo, hs, target - |.endmacro - | - |.macro ins_arithpre - | decode_RB8 RB, INS - | decode_RC8 RC, INS - | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 - ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); - ||switch (vk) { - ||case 0: - | .if FPU - | ldrd CARG12, [RB, BASE]! - | ldrd CARG34, [RC, KBASE]! - | .else - | ldrd CARG12, [BASE, RB] - | ldrd CARG34, [KBASE, RC] - | .endif - || break; - ||case 1: - | .if FPU - | ldrd CARG34, [RB, BASE]! - | ldrd CARG12, [RC, KBASE]! - | .else - | ldrd CARG34, [BASE, RB] - | ldrd CARG12, [KBASE, RC] - | .endif - || break; - ||default: - | .if FPU - | ldrd CARG12, [RB, BASE]! - | ldrd CARG34, [RC, BASE]! - | .else - | ldrd CARG12, [BASE, RB] - | ldrd CARG34, [BASE, RC] - | .endif - || break; - ||} - |.endmacro - | - |.macro ins_arithpre_fpu, reg1, reg2 - |.if FPU - ||if (vk == 1) { - | vldr reg2, [RB] - | vldr reg1, [RC] - ||} else { - | vldr reg1, [RB] - | vldr reg2, [RC] - ||} - |.endif - |.endmacro - | - |.macro ins_arithpost_fpu, reg - | ins_next1 - | add RA, BASE, RA - | ins_next2 - | vstr reg, [RA] - | ins_next3 - |.endmacro - | - |.macro ins_arithfallback, ins - ||switch (vk) { - ||case 0: - | ins ->vmeta_arith_vn - || break; - ||case 1: - | ins ->vmeta_arith_nv - || break; - ||default: - | ins ->vmeta_arith_vv - || break; - ||} - |.endmacro - | - |.macro ins_arithdn, intins, fpins, fpcall - | ins_arithpre - |.if "intins" ~= "vm_modi" and not FPU - | ins_next1 - |.endif - | ins_arithcheck_int >5 - |.if "intins" == "smull" - | smull CARG1, RC, CARG3, CARG1 - | cmp RC, CARG1, asr #31 - | ins_arithfallback bne - |.elif "intins" == "vm_modi" - | movs CARG2, CARG3 - | ins_arithfallback beq - | bl ->vm_modi - | mvn CARG2, #~LJ_TISNUM - |.else - | intins CARG1, CARG1, CARG3 - | ins_arithfallback bvs - |.endif - |4: - |.if "intins" == "vm_modi" or FPU - | ins_next1 - |.endif - | ins_next2 - | strd CARG12, [BASE, RA] - | ins_next3 - |5: // FP variant. - | ins_arithpre_fpu d6, d7 - | ins_arithfallback ins_arithcheck_num - |.if FPU - |.if "intins" == "vm_modi" - | bl fpcall - |.else - | fpins d6, d6, d7 - |.endif - | ins_arithpost_fpu d6 - |.else - | bl fpcall - |.if "intins" ~= "vm_modi" - | ins_next1 - |.endif - | b <4 - |.endif - |.endmacro - | - |.macro ins_arithfp, fpins, fpcall - | ins_arithpre - |.if "fpins" ~= "extern" or HFABI - | ins_arithpre_fpu d0, d1 - |.endif - | ins_arithfallback ins_arithcheck_num - |.if "fpins" == "extern" - | .IOS mov RC, BASE - | bl fpcall - | .IOS mov BASE, RC - |.elif FPU - | fpins d0, d0, d1 - |.else - | bl fpcall - |.endif - |.if ("fpins" ~= "extern" or HFABI) and FPU - | ins_arithpost_fpu d0 - |.else - | ins_next1 - | ins_next2 - | strd CARG12, [BASE, RA] - | ins_next3 - |.endif - |.endmacro - - case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: - | ins_arithdn adds, vadd.f64, extern __aeabi_dadd - break; - case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: - | ins_arithdn subs, vsub.f64, extern __aeabi_dsub - break; - case BC_MULVN: case BC_MULNV: case BC_MULVV: - | ins_arithdn smull, vmul.f64, extern __aeabi_dmul - break; - case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: - | ins_arithfp vdiv.f64, extern __aeabi_ddiv - break; - case BC_MODVN: case BC_MODNV: case BC_MODVV: - | ins_arithdn vm_modi, vm_mod, ->vm_mod - break; - case BC_POW: - | // NYI: (partial) integer arithmetic. - | ins_arithfp extern, extern pow - break; - - case BC_CAT: - | decode_RB8 RC, INS - | decode_RC8 RB, INS - | // RA = dst*8, RC = src_start*8, RB = src_end*8 (note: RB/RC swapped!) - | sub CARG3, RB, RC - | str BASE, L->base - | add CARG2, BASE, RB - |->BC_CAT_Z: - | // RA = dst*8, RC = src_start*8, CARG2 = top-1 - | mov CARG1, L - | str PC, SAVE_PC - | lsr CARG3, CARG3, #3 - | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left) - | // Returns NULL (finished) or TValue * (metamethod). - | ldr BASE, L->base - | cmp CRET1, #0 - | bne ->vmeta_binop - | ldrd CARG34, [BASE, RC] - | ins_next1 - | ins_next2 - | strd CARG34, [BASE, RA] // Copy result to RA. - | ins_next3 - break; - - /* -- Constant ops ------------------------------------------------------ */ - - case BC_KSTR: - | // RA = dst*8, RC = str_const (~) - | mvn RC, RC - | ins_next1 - | ldr CARG1, [KBASE, RC, lsl #2] - | mvn CARG2, #~LJ_TSTR - | ins_next2 - | strd CARG12, [BASE, RA] - | ins_next3 - break; - case BC_KCDATA: - |.if FFI - | // RA = dst*8, RC = cdata_const (~) - | mvn RC, RC - | ins_next1 - | ldr CARG1, [KBASE, RC, lsl #2] - | mvn CARG2, #~LJ_TCDATA - | ins_next2 - | strd CARG12, [BASE, RA] - | ins_next3 - |.endif - break; - case BC_KSHORT: - | // RA = dst*8, (RC = int16_literal) - | mov CARG1, INS, asr #16 // Refetch sign-extended reg. - | mvn CARG2, #~LJ_TISNUM - | ins_next1 - | ins_next2 - | strd CARG12, [BASE, RA] - | ins_next3 - break; - case BC_KNUM: - | // RA = dst*8, RC = num_const - | lsl RC, RC, #3 - | ins_next1 - | ldrd CARG12, [KBASE, RC] - | ins_next2 - | strd CARG12, [BASE, RA] - | ins_next3 - break; - case BC_KPRI: - | // RA = dst*8, RC = primitive_type (~) - | add RA, BASE, RA - | mvn RC, RC - | ins_next1 - | ins_next2 - | str RC, [RA, #4] - | ins_next3 - break; - case BC_KNIL: - | // RA = base*8, RC = end - | add RA, BASE, RA - | add RC, BASE, RC, lsl #3 - | mvn CARG1, #~LJ_TNIL - | str CARG1, [RA, #4] - | add RA, RA, #8 - |1: - | str CARG1, [RA, #4] - | cmp RA, RC - | add RA, RA, #8 - | blt <1 - | ins_next_ - break; - - /* -- Upvalue and function ops ------------------------------------------ */ - - case BC_UGET: - | // RA = dst*8, RC = uvnum - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | lsl RC, RC, #2 - | add RC, RC, #offsetof(GCfuncL, uvptr) - | ldr UPVAL:CARG2, [LFUNC:CARG2, RC] - | ldr CARG2, UPVAL:CARG2->v - | ldrd CARG34, [CARG2] - | ins_next1 - | ins_next2 - | strd CARG34, [BASE, RA] - | ins_next3 - break; - case BC_USETV: - | // RA = uvnum*8, RC = src - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | lsr RA, RA, #1 - | add RA, RA, #offsetof(GCfuncL, uvptr) - | lsl RC, RC, #3 - | ldr UPVAL:CARG2, [LFUNC:CARG2, RA] - | ldrd CARG34, [BASE, RC] - | ldrb RB, UPVAL:CARG2->marked - | ldrb RC, UPVAL:CARG2->closed - | ldr CARG2, UPVAL:CARG2->v - | tst RB, #LJ_GC_BLACK // isblack(uv) - | add RB, CARG4, #-LJ_TISGCV - | cmpne RC, #0 - | strd CARG34, [CARG2] - | bne >2 // Upvalue is closed and black? - |1: - | ins_next - | - |2: // Check if new value is collectable. - | cmn RB, #-(LJ_TNUMX - LJ_TISGCV) - | ldrbhi RC, GCOBJ:CARG3->gch.marked - | bls <1 // tvisgcv(v) - | sub CARG1, DISPATCH, #-GG_DISP2G - | tst RC, #LJ_GC_WHITES - | // Crossed a write barrier. Move the barrier forward. - |.if IOS - | beq <1 - | mov RC, BASE - | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) - | mov BASE, RC - |.else - | blne extern lj_gc_barrieruv // (global_State *g, TValue *tv) - |.endif - | b <1 - break; - case BC_USETS: - | // RA = uvnum*8, RC = str_const (~) - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | lsr RA, RA, #1 - | add RA, RA, #offsetof(GCfuncL, uvptr) - | mvn RC, RC - | ldr UPVAL:CARG2, [LFUNC:CARG2, RA] - | ldr STR:CARG3, [KBASE, RC, lsl #2] - | ldrb RB, UPVAL:CARG2->marked - | ldrb RC, UPVAL:CARG2->closed - | ldr CARG2, UPVAL:CARG2->v - | mvn CARG4, #~LJ_TSTR - | tst RB, #LJ_GC_BLACK // isblack(uv) - | ldrb RB, STR:CARG3->marked - | strd CARG34, [CARG2] - | bne >2 - |1: - | ins_next - | - |2: // Check if string is white and ensure upvalue is closed. - | tst RB, #LJ_GC_WHITES // iswhite(str) - | cmpne RC, #0 - | sub CARG1, DISPATCH, #-GG_DISP2G - | // Crossed a write barrier. Move the barrier forward. - |.if IOS - | beq <1 - | mov RC, BASE - | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) - | mov BASE, RC - |.else - | blne extern lj_gc_barrieruv // (global_State *g, TValue *tv) - |.endif - | b <1 - break; - case BC_USETN: - | // RA = uvnum*8, RC = num_const - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | lsr RA, RA, #1 - | add RA, RA, #offsetof(GCfuncL, uvptr) - | lsl RC, RC, #3 - | ldr UPVAL:CARG2, [LFUNC:CARG2, RA] - | ldrd CARG34, [KBASE, RC] - | ldr CARG2, UPVAL:CARG2->v - | ins_next1 - | ins_next2 - | strd CARG34, [CARG2] - | ins_next3 - break; - case BC_USETP: - | // RA = uvnum*8, RC = primitive_type (~) - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | lsr RA, RA, #1 - | add RA, RA, #offsetof(GCfuncL, uvptr) - | ldr UPVAL:CARG2, [LFUNC:CARG2, RA] - | mvn RC, RC - | ldr CARG2, UPVAL:CARG2->v - | ins_next1 - | ins_next2 - | str RC, [CARG2, #4] - | ins_next3 - break; - - case BC_UCLO: - | // RA = level*8, RC = target - | ldr CARG3, L->openupval - | add RC, PC, RC, lsl #2 - | str BASE, L->base - | cmp CARG3, #0 - | sub PC, RC, #0x20000 - | beq >1 - | mov CARG1, L - | add CARG2, BASE, RA - | bl extern lj_func_closeuv // (lua_State *L, TValue *level) - | ldr BASE, L->base - |1: - | ins_next - break; - - case BC_FNEW: - | // RA = dst*8, RC = proto_const (~) (holding function prototype) - | mvn RC, RC - | str BASE, L->base - | ldr CARG2, [KBASE, RC, lsl #2] - | str PC, SAVE_PC - | ldr CARG3, [BASE, FRAME_FUNC] - | mov CARG1, L - | // (lua_State *L, GCproto *pt, GCfuncL *parent) - | bl extern lj_func_newL_gc - | // Returns GCfuncL *. - | ldr BASE, L->base - | mvn CARG2, #~LJ_TFUNC - | ins_next1 - | ins_next2 - | strd CARG12, [BASE, RA] - | ins_next3 - break; - - /* -- Table ops --------------------------------------------------------- */ - - case BC_TNEW: - case BC_TDUP: - | // RA = dst*8, RC = (hbits|asize) | tab_const (~) - if (op == BC_TDUP) { - | mvn RC, RC - } - | ldr CARG3, [DISPATCH, #DISPATCH_GL(gc.total)] - | ldr CARG4, [DISPATCH, #DISPATCH_GL(gc.threshold)] - | str BASE, L->base - | str PC, SAVE_PC - | cmp CARG3, CARG4 - | mov CARG1, L - | bhs >5 - |1: - if (op == BC_TNEW) { - | lsl CARG2, RC, #21 - | lsr CARG3, RC, #11 - | asr RC, CARG2, #21 - | lsr CARG2, CARG2, #21 - | cmn RC, #1 - | addeq CARG2, CARG2, #2 - | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) - | // Returns GCtab *. - } else { - | ldr CARG2, [KBASE, RC, lsl #2] - | bl extern lj_tab_dup // (lua_State *L, Table *kt) - | // Returns GCtab *. - } - | ldr BASE, L->base - | mvn CARG2, #~LJ_TTAB - | ins_next1 - | ins_next2 - | strd CARG12, [BASE, RA] - | ins_next3 - |5: - | bl extern lj_gc_step_fixtop // (lua_State *L) - | mov CARG1, L - | b <1 - break; - - case BC_GGET: - | // RA = dst*8, RC = str_const (~) - case BC_GSET: - | // RA = dst*8, RC = str_const (~) - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | mvn RC, RC - | ldr TAB:CARG1, LFUNC:CARG2->env - | ldr STR:RC, [KBASE, RC, lsl #2] - if (op == BC_GGET) { - | b ->BC_TGETS_Z - } else { - | b ->BC_TSETS_Z - } - break; - - case BC_TGETV: - | decode_RB8 RB, INS - | decode_RC8 RC, INS - | // RA = dst*8, RB = table*8, RC = key*8 - | ldrd TAB:CARG12, [BASE, RB] - | ldrd CARG34, [BASE, RC] - | checktab CARG2, ->vmeta_tgetv // STALL: load CARG12. - | checktp CARG4, LJ_TISNUM // Integer key? - | ldreq CARG4, TAB:CARG1->array - | ldreq CARG2, TAB:CARG1->asize - | bne >9 - | - | add CARG4, CARG4, CARG3, lsl #3 - | cmp CARG3, CARG2 // In array part? - | ldrdlo CARG34, [CARG4] - | bhs ->vmeta_tgetv - | ins_next1 // Overwrites RB! - | checktp CARG4, LJ_TNIL - | beq >5 - |1: - | ins_next2 - | strd CARG34, [BASE, RA] - | ins_next3 - | - |5: // Check for __index if table value is nil. - | ldr TAB:CARG2, TAB:CARG1->metatable - | cmp TAB:CARG2, #0 - | beq <1 // No metatable: done. - | ldrb CARG2, TAB:CARG2->nomm - | tst CARG2, #1<vmeta_tgetv - | - |9: - | checktp CARG4, LJ_TSTR // String key? - | moveq STR:RC, CARG3 - | beq ->BC_TGETS_Z - | b ->vmeta_tgetv - break; - case BC_TGETS: - | decode_RB8 RB, INS - | and RC, RC, #255 - | // RA = dst*8, RB = table*8, RC = str_const (~) - | ldrd CARG12, [BASE, RB] - | mvn RC, RC - | ldr STR:RC, [KBASE, RC, lsl #2] // STALL: early RC. - | checktab CARG2, ->vmeta_tgets1 - |->BC_TGETS_Z: - | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8 - | ldr CARG3, TAB:CARG1->hmask - | ldr CARG4, STR:RC->hash - | ldr NODE:INS, TAB:CARG1->node - | mov TAB:RB, TAB:CARG1 - | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask - | add CARG3, CARG3, CARG3, lsl #1 - | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 - |1: - | ldrd CARG12, NODE:INS->key // STALL: early NODE:INS. - | ldrd CARG34, NODE:INS->val - | ldr NODE:INS, NODE:INS->next - | checktp CARG2, LJ_TSTR - | cmpeq CARG1, STR:RC - | bne >4 - | checktp CARG4, LJ_TNIL - | beq >5 - |3: - | ins_next1 - | ins_next2 - | strd CARG34, [BASE, RA] - | ins_next3 - | - |4: // Follow hash chain. - | cmp NODE:INS, #0 - | bne <1 - | // End of hash chain: key not found, nil result. - | - |5: // Check for __index if table value is nil. - | ldr TAB:CARG1, TAB:RB->metatable - | mov CARG3, #0 // Optional clear of undef. value (during load stall). - | mvn CARG4, #~LJ_TNIL - | cmp TAB:CARG1, #0 - | beq <3 // No metatable: done. - | ldrb CARG2, TAB:CARG1->nomm - | tst CARG2, #1<vmeta_tgets - break; - case BC_TGETB: - | decode_RB8 RB, INS - | and RC, RC, #255 - | // RA = dst*8, RB = table*8, RC = index - | ldrd CARG12, [BASE, RB] - | checktab CARG2, ->vmeta_tgetb // STALL: load CARG12. - | ldr CARG3, TAB:CARG1->asize - | ldr CARG4, TAB:CARG1->array - | lsl CARG2, RC, #3 - | cmp RC, CARG3 - | ldrdlo CARG34, [CARG4, CARG2] - | bhs ->vmeta_tgetb - | ins_next1 // Overwrites RB! - | checktp CARG4, LJ_TNIL - | beq >5 - |1: - | ins_next2 - | strd CARG34, [BASE, RA] - | ins_next3 - | - |5: // Check for __index if table value is nil. - | ldr TAB:CARG2, TAB:CARG1->metatable - | cmp TAB:CARG2, #0 - | beq <1 // No metatable: done. - | ldrb CARG2, TAB:CARG2->nomm - | tst CARG2, #1<vmeta_tgetb - break; - case BC_TGETR: - | decode_RB8 RB, INS - | decode_RC8 RC, INS - | // RA = dst*8, RB = table*8, RC = key*8 - | ldr TAB:CARG1, [BASE, RB] - | ldr CARG2, [BASE, RC] - | ldr CARG4, TAB:CARG1->array - | ldr CARG3, TAB:CARG1->asize - | add CARG4, CARG4, CARG2, lsl #3 - | cmp CARG2, CARG3 // In array part? - | bhs ->vmeta_tgetr - | ldrd CARG12, [CARG4] - |->BC_TGETR_Z: - | ins_next1 - | ins_next2 - | strd CARG12, [BASE, RA] - | ins_next3 - break; - - case BC_TSETV: - | decode_RB8 RB, INS - | decode_RC8 RC, INS - | // RA = src*8, RB = table*8, RC = key*8 - | ldrd TAB:CARG12, [BASE, RB] - | ldrd CARG34, [BASE, RC] - | checktab CARG2, ->vmeta_tsetv // STALL: load CARG12. - | checktp CARG4, LJ_TISNUM // Integer key? - | ldreq CARG2, TAB:CARG1->array - | ldreq CARG4, TAB:CARG1->asize - | bne >9 - | - | add CARG2, CARG2, CARG3, lsl #3 - | cmp CARG3, CARG4 // In array part? - | ldrlo INS, [CARG2, #4] - | bhs ->vmeta_tsetv - | ins_next1 // Overwrites RB! - | checktp INS, LJ_TNIL - | ldrb INS, TAB:CARG1->marked - | ldrd CARG34, [BASE, RA] - | beq >5 - |1: - | tst INS, #LJ_GC_BLACK // isblack(table) - | strd CARG34, [CARG2] - | bne >7 - |2: - | ins_next2 - | ins_next3 - | - |5: // Check for __newindex if previous value is nil. - | ldr TAB:RA, TAB:CARG1->metatable - | cmp TAB:RA, #0 - | beq <1 // No metatable: done. - | ldrb RA, TAB:RA->nomm - | tst RA, #1<vmeta_tsetv - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:CARG1, INS, CARG3 - | b <2 - | - |9: - | checktp CARG4, LJ_TSTR // String key? - | moveq STR:RC, CARG3 - | beq ->BC_TSETS_Z - | b ->vmeta_tsetv - break; - case BC_TSETS: - | decode_RB8 RB, INS - | and RC, RC, #255 - | // RA = src*8, RB = table*8, RC = str_const (~) - | ldrd CARG12, [BASE, RB] - | mvn RC, RC - | ldr STR:RC, [KBASE, RC, lsl #2] // STALL: early RC. - | checktab CARG2, ->vmeta_tsets1 - |->BC_TSETS_Z: - | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8 - | ldr CARG3, TAB:CARG1->hmask - | ldr CARG4, STR:RC->hash - | ldr NODE:INS, TAB:CARG1->node - | mov TAB:RB, TAB:CARG1 - | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask - | add CARG3, CARG3, CARG3, lsl #1 - | mov CARG4, #0 - | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 - | strb CARG4, TAB:RB->nomm // Clear metamethod cache. - |1: - | ldrd CARG12, NODE:INS->key - | ldr CARG4, NODE:INS->val.it - | ldr NODE:CARG3, NODE:INS->next - | checktp CARG2, LJ_TSTR - | cmpeq CARG1, STR:RC - | bne >5 - | ldrb CARG2, TAB:RB->marked - | checktp CARG4, LJ_TNIL // Key found, but nil value? - | ldrd CARG34, [BASE, RA] - | beq >4 - |2: - | tst CARG2, #LJ_GC_BLACK // isblack(table) - | strd CARG34, NODE:INS->val - | bne >7 - |3: - | ins_next - | - |4: // Check for __newindex if previous value is nil. - | ldr TAB:CARG1, TAB:RB->metatable - | cmp TAB:CARG1, #0 - | beq <2 // No metatable: done. - | ldrb CARG1, TAB:CARG1->nomm - | tst CARG1, #1<vmeta_tsets - | - |5: // Follow hash chain. - | movs NODE:INS, NODE:CARG3 - | bne <1 - | // End of hash chain: key not found, add a new one. - | - | // But check for __newindex first. - | ldr TAB:CARG1, TAB:RB->metatable - | mov CARG3, TMPDp - | str PC, SAVE_PC - | cmp TAB:CARG1, #0 // No metatable: continue. - | str BASE, L->base - | ldrbne CARG2, TAB:CARG1->nomm - | mov CARG1, L - | beq >6 - | tst CARG2, #1<vmeta_tsets // 'no __newindex' flag NOT set: check. - |6: - | mvn CARG4, #~LJ_TSTR - | str STR:RC, TMPDlo - | mov CARG2, TAB:RB - | str CARG4, TMPDhi - | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) - | // Returns TValue *. - | ldr BASE, L->base - | ldrd CARG34, [BASE, RA] - | strd CARG34, [CRET1] - | b <3 // No 2nd write barrier needed. - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, CARG2, CARG3 - | b <3 - break; - case BC_TSETB: - | decode_RB8 RB, INS - | and RC, RC, #255 - | // RA = src*8, RB = table*8, RC = index - | ldrd CARG12, [BASE, RB] - | checktab CARG2, ->vmeta_tsetb // STALL: load CARG12. - | ldr CARG3, TAB:CARG1->asize - | ldr RB, TAB:CARG1->array - | lsl CARG2, RC, #3 - | cmp RC, CARG3 - | ldrdlo CARG34, [CARG2, RB]! - | bhs ->vmeta_tsetb - | ins_next1 // Overwrites RB! - | checktp CARG4, LJ_TNIL - | ldrb INS, TAB:CARG1->marked - | ldrd CARG34, [BASE, RA] - | beq >5 - |1: - | tst INS, #LJ_GC_BLACK // isblack(table) - | strd CARG34, [CARG2] - | bne >7 - |2: - | ins_next2 - | ins_next3 - | - |5: // Check for __newindex if previous value is nil. - | ldr TAB:RA, TAB:CARG1->metatable - | cmp TAB:RA, #0 - | beq <1 // No metatable: done. - | ldrb RA, TAB:RA->nomm - | tst RA, #1<vmeta_tsetb - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:CARG1, INS, CARG3 - | b <2 - break; - case BC_TSETR: - | decode_RB8 RB, INS - | decode_RC8 RC, INS - | // RA = src*8, RB = table*8, RC = key*8 - | ldr TAB:CARG2, [BASE, RB] - | ldr CARG3, [BASE, RC] - | ldrb INS, TAB:CARG2->marked - | ldr CARG1, TAB:CARG2->array - | ldr CARG4, TAB:CARG2->asize - | tst INS, #LJ_GC_BLACK // isblack(table) - | add CARG1, CARG1, CARG3, lsl #3 - | bne >7 - |2: - | cmp CARG3, CARG4 // In array part? - | bhs ->vmeta_tsetr - |->BC_TSETR_Z: - | ldrd CARG34, [BASE, RA] - | ins_next1 - | ins_next2 - | strd CARG34, [CARG1] - | ins_next3 - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:CARG2, INS, RB - | b <2 - break; - - case BC_TSETM: - | // RA = base*8 (table at base-1), RC = num_const (start index) - | add RA, BASE, RA - |1: - | ldr RB, SAVE_MULTRES - | ldr TAB:CARG2, [RA, #-8] // Guaranteed to be a table. - | ldr CARG1, [KBASE, RC, lsl #3] // Integer constant is in lo-word. - | subs RB, RB, #8 - | ldr CARG4, TAB:CARG2->asize - | beq >4 // Nothing to copy? - | add CARG3, CARG1, RB, lsr #3 - | cmp CARG3, CARG4 - | ldr CARG4, TAB:CARG2->array - | add RB, RA, RB - | bhi >5 - | add INS, CARG4, CARG1, lsl #3 - | ldrb CARG1, TAB:CARG2->marked - |3: // Copy result slots to table. - | ldrd CARG34, [RA], #8 - | strd CARG34, [INS], #8 - | cmp RA, RB - | blo <3 - | tst CARG1, #LJ_GC_BLACK // isblack(table) - | bne >7 - |4: - | ins_next - | - |5: // Need to resize array part. - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) - | // Must not reallocate the stack. - | .IOS ldr BASE, L->base - | b <1 - | - |7: // Possible table write barrier for any value. Skip valiswhite check. - | barrierback TAB:CARG2, CARG1, CARG3 - | b <4 - break; - - /* -- Calls and vararg handling ----------------------------------------- */ - - case BC_CALLM: - | // RA = base*8, (RB = nresults+1,) RC = extra_nargs - | ldr CARG1, SAVE_MULTRES - | decode_RC8 NARGS8:RC, INS - | add NARGS8:RC, NARGS8:RC, CARG1 - | b ->BC_CALL_Z - break; - case BC_CALL: - | decode_RC8 NARGS8:RC, INS - | // RA = base*8, (RB = nresults+1,) RC = (nargs+1)*8 - |->BC_CALL_Z: - | mov RB, BASE // Save old BASE for vmeta_call. - | ldrd CARG34, [BASE, RA]! - | sub NARGS8:RC, NARGS8:RC, #8 - | add BASE, BASE, #8 - | checkfunc CARG4, ->vmeta_call - | ins_call - break; - - case BC_CALLMT: - | // RA = base*8, (RB = 0,) RC = extra_nargs - | ldr CARG1, SAVE_MULTRES - | add NARGS8:RC, CARG1, RC, lsl #3 - | b ->BC_CALLT1_Z - break; - case BC_CALLT: - | lsl NARGS8:RC, RC, #3 - | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 - |->BC_CALLT1_Z: - | ldrd LFUNC:CARG34, [RA, BASE]! - | sub NARGS8:RC, NARGS8:RC, #8 - | add RA, RA, #8 - | checkfunc CARG4, ->vmeta_callt - | ldr PC, [BASE, FRAME_PC] - |->BC_CALLT2_Z: - | mov RB, #0 - | ldrb CARG4, LFUNC:CARG3->ffid - | tst PC, #FRAME_TYPE - | bne >7 - |1: - | str LFUNC:CARG3, [BASE, FRAME_FUNC] // Copy function down, but keep PC. - | cmp NARGS8:RC, #0 - | beq >3 - |2: - | ldrd CARG12, [RA, RB] - | add INS, RB, #8 - | cmp INS, NARGS8:RC - | strd CARG12, [BASE, RB] - | mov RB, INS - | bne <2 - |3: - | cmp CARG4, #1 // (> FF_C) Calling a fast function? - | bhi >5 - |4: - | ins_callt - | - |5: // Tailcall to a fast function with a Lua frame below. - | ldr INS, [PC, #-4] - | decode_RA8 RA, INS - | sub CARG1, BASE, RA - | ldr LFUNC:CARG1, [CARG1, #-16] - | ldr CARG1, LFUNC:CARG1->field_pc - | ldr KBASE, [CARG1, #PC2PROTO(k)] - | b <4 - | - |7: // Tailcall from a vararg function. - | eor PC, PC, #FRAME_VARG - | tst PC, #FRAME_TYPEP // Vararg frame below? - | movne CARG4, #0 // Clear ffid if no Lua function below. - | bne <1 - | sub BASE, BASE, PC - | ldr PC, [BASE, FRAME_PC] - | tst PC, #FRAME_TYPE - | movne CARG4, #0 // Clear ffid if no Lua function below. - | b <1 - break; - - case BC_ITERC: - | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1)) - | add RA, BASE, RA - | mov RB, BASE // Save old BASE for vmeta_call. - | ldrd CARG34, [RA, #-16] - | ldrd CARG12, [RA, #-8] - | add BASE, RA, #8 - | strd CARG34, [RA, #8] // Copy state. - | strd CARG12, [RA, #16] // Copy control var. - | // STALL: locked CARG34. - | ldrd LFUNC:CARG34, [RA, #-24] - | mov NARGS8:RC, #16 // Iterators get 2 arguments. - | // STALL: load CARG34. - | strd LFUNC:CARG34, [RA] // Copy callable. - | checkfunc CARG4, ->vmeta_call - | ins_call - break; - - case BC_ITERN: - | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1)) - |.if JIT - | // NYI: add hotloop, record BC_ITERN. - |.endif - | add RA, BASE, RA - | ldr TAB:RB, [RA, #-16] - | ldr CARG1, [RA, #-8] // Get index from control var. - | ldr INS, TAB:RB->asize - | ldr CARG2, TAB:RB->array - | add PC, PC, #4 - |1: // Traverse array part. - | subs RC, CARG1, INS - | add CARG3, CARG2, CARG1, lsl #3 - | bhs >5 // Index points after array part? - | ldrd CARG34, [CARG3] - | checktp CARG4, LJ_TNIL - | addeq CARG1, CARG1, #1 // Skip holes in array part. - | beq <1 - | ldrh RC, [PC, #-2] - | mvn CARG2, #~LJ_TISNUM - | strd CARG34, [RA, #8] - | add RC, PC, RC, lsl #2 - | add RB, CARG1, #1 - | strd CARG12, [RA] - | sub PC, RC, #0x20000 - | str RB, [RA, #-8] // Update control var. - |3: - | ins_next - | - |5: // Traverse hash part. - | ldr CARG4, TAB:RB->hmask - | ldr NODE:RB, TAB:RB->node - |6: - | add CARG1, RC, RC, lsl #1 - | cmp RC, CARG4 // End of iteration? Branch to ITERL+1. - | add NODE:CARG3, NODE:RB, CARG1, lsl #3 // node = tab->node + idx*3*8 - | bhi <3 - | ldrd CARG12, NODE:CARG3->val - | checktp CARG2, LJ_TNIL - | add RC, RC, #1 - | beq <6 // Skip holes in hash part. - | ldrh RB, [PC, #-2] - | add RC, RC, INS - | ldrd CARG34, NODE:CARG3->key - | str RC, [RA, #-8] // Update control var. - | strd CARG12, [RA, #8] - | add RC, PC, RB, lsl #2 - | sub PC, RC, #0x20000 - | strd CARG34, [RA] - | b <3 - break; - - case BC_ISNEXT: - | // RA = base*8, RC = target (points to ITERN) - | add RA, BASE, RA - | add RC, PC, RC, lsl #2 - | ldrd CFUNC:CARG12, [RA, #-24] - | ldr CARG3, [RA, #-12] - | ldr CARG4, [RA, #-4] - | checktp CARG2, LJ_TFUNC - | ldrbeq CARG1, CFUNC:CARG1->ffid - | checktpeq CARG3, LJ_TTAB - | checktpeq CARG4, LJ_TNIL - | cmpeq CARG1, #FF_next_N - | subeq PC, RC, #0x20000 - | bne >5 - | ins_next1 - | ins_next2 - | mov CARG1, #0 - | mvn CARG2, #0x00018000 - | strd CARG1, [RA, #-8] // Initialize control var. - |1: - | ins_next3 - |5: // Despecialize bytecode if any of the checks fail. - | mov CARG1, #BC_JMP - | mov OP, #BC_ITERC - | strb CARG1, [PC, #-4] - | sub PC, RC, #0x20000 - | strb OP, [PC] // Subsumes ins_next1. - | ins_next2 - | b <1 - break; - - case BC_VARG: - | decode_RB8 RB, INS - | decode_RC8 RC, INS - | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 - | ldr CARG1, [BASE, FRAME_PC] - | add RC, BASE, RC - | add RA, BASE, RA - | add RC, RC, #FRAME_VARG - | add CARG4, RA, RB - | sub CARG3, BASE, #8 // CARG3 = vtop - | sub RC, RC, CARG1 // RC = vbase - | // Note: RC may now be even _above_ BASE if nargs was < numparams. - | cmp RB, #0 - | sub CARG1, CARG3, RC - | beq >5 // Copy all varargs? - | sub CARG4, CARG4, #16 - |1: // Copy vararg slots to destination slots. - | cmp RC, CARG3 - | ldrdlo CARG12, [RC], #8 - | mvnhs CARG2, #~LJ_TNIL - | cmp RA, CARG4 - | strd CARG12, [RA], #8 - | blo <1 - |2: - | ins_next - | - |5: // Copy all varargs. - | ldr CARG4, L->maxstack - | cmp CARG1, #0 - | movle RB, #8 // MULTRES = (0+1)*8 - | addgt RB, CARG1, #8 - | add CARG2, RA, CARG1 - | str RB, SAVE_MULTRES - | ble <2 - | cmp CARG2, CARG4 - | bhi >7 - |6: - | ldrd CARG12, [RC], #8 - | strd CARG12, [RA], #8 - | cmp RC, CARG3 - | blo <6 - | b <2 - | - |7: // Grow stack for varargs. - | lsr CARG2, CARG1, #3 - | str RA, L->top - | mov CARG1, L - | str BASE, L->base - | sub RC, RC, BASE // Need delta, because BASE may change. - | str PC, SAVE_PC - | sub RA, RA, BASE - | bl extern lj_state_growstack // (lua_State *L, int n) - | ldr BASE, L->base - | add RA, BASE, RA - | add RC, BASE, RC - | sub CARG3, BASE, #8 - | b <6 - break; - - /* -- Returns ----------------------------------------------------------- */ - - case BC_RETM: - | // RA = results*8, RC = extra results - | ldr CARG1, SAVE_MULTRES - | ldr PC, [BASE, FRAME_PC] - | add RA, BASE, RA - | add RC, CARG1, RC, lsl #3 - | b ->BC_RETM_Z - break; - - case BC_RET: - | // RA = results*8, RC = nresults+1 - | ldr PC, [BASE, FRAME_PC] - | lsl RC, RC, #3 - | add RA, BASE, RA - |->BC_RETM_Z: - | str RC, SAVE_MULTRES - |1: - | ands CARG1, PC, #FRAME_TYPE - | eor CARG2, PC, #FRAME_VARG - | bne ->BC_RETV2_Z - | - |->BC_RET_Z: - | // BASE = base, RA = resultptr, RC = (nresults+1)*8, PC = return - | ldr INS, [PC, #-4] - | subs CARG4, RC, #8 - | sub CARG3, BASE, #8 - | beq >3 - |2: - | ldrd CARG12, [RA], #8 - | add BASE, BASE, #8 - | subs CARG4, CARG4, #8 - | strd CARG12, [BASE, #-16] - | bne <2 - |3: - | decode_RA8 RA, INS - | sub CARG4, CARG3, RA - | decode_RB8 RB, INS - | ldr LFUNC:CARG1, [CARG4, FRAME_FUNC] - |5: - | cmp RB, RC // More results expected? - | bhi >6 - | mov BASE, CARG4 - | ldr CARG2, LFUNC:CARG1->field_pc - | ins_next1 - | ins_next2 - | ldr KBASE, [CARG2, #PC2PROTO(k)] - | ins_next3 - | - |6: // Fill up results with nil. - | mvn CARG2, #~LJ_TNIL - | add BASE, BASE, #8 - | add RC, RC, #8 - | str CARG2, [BASE, #-12] - | b <5 - | - |->BC_RETV1_Z: // Non-standard return case. - | add RA, BASE, RA - |->BC_RETV2_Z: - | tst CARG2, #FRAME_TYPEP - | bne ->vm_return - | // Return from vararg function: relocate BASE down. - | sub BASE, BASE, CARG2 - | ldr PC, [BASE, FRAME_PC] - | b <1 - break; - - case BC_RET0: case BC_RET1: - | // RA = results*8, RC = nresults+1 - | ldr PC, [BASE, FRAME_PC] - | lsl RC, RC, #3 - | str RC, SAVE_MULTRES - | ands CARG1, PC, #FRAME_TYPE - | eor CARG2, PC, #FRAME_VARG - | ldreq INS, [PC, #-4] - | bne ->BC_RETV1_Z - if (op == BC_RET1) { - | ldrd CARG12, [BASE, RA] - } - | sub CARG4, BASE, #8 - | decode_RA8 RA, INS - if (op == BC_RET1) { - | strd CARG12, [CARG4] - } - | sub BASE, CARG4, RA - | decode_RB8 RB, INS - | ldr LFUNC:CARG1, [BASE, FRAME_FUNC] - |5: - | cmp RB, RC - | bhi >6 - | ldr CARG2, LFUNC:CARG1->field_pc - | ins_next1 - | ins_next2 - | ldr KBASE, [CARG2, #PC2PROTO(k)] - | ins_next3 - | - |6: // Fill up results with nil. - | sub CARG2, CARG4, #4 - | mvn CARG3, #~LJ_TNIL - | str CARG3, [CARG2, RC] - | add RC, RC, #8 - | b <5 - break; - - /* -- Loops and branches ------------------------------------------------ */ - - |.define FOR_IDX, [RA]; .define FOR_TIDX, [RA, #4] - |.define FOR_STOP, [RA, #8]; .define FOR_TSTOP, [RA, #12] - |.define FOR_STEP, [RA, #16]; .define FOR_TSTEP, [RA, #20] - |.define FOR_EXT, [RA, #24]; .define FOR_TEXT, [RA, #28] - - case BC_FORL: - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_IFORL follows. - break; - - case BC_JFORI: - case BC_JFORL: -#if !LJ_HASJIT - break; -#endif - case BC_FORI: - case BC_IFORL: - | // RA = base*8, RC = target (after end of loop or start of loop) - vk = (op == BC_IFORL || op == BC_JFORL); - | ldrd CARG12, [RA, BASE]! - if (op != BC_JFORL) { - | add RC, PC, RC, lsl #2 - } - if (!vk) { - | ldrd CARG34, FOR_STOP - | checktp CARG2, LJ_TISNUM - | ldr RB, FOR_TSTEP - | bne >5 - | checktp CARG4, LJ_TISNUM - | ldr CARG4, FOR_STEP - | checktpeq RB, LJ_TISNUM - | bne ->vmeta_for - | cmp CARG4, #0 - | blt >4 - | cmp CARG1, CARG3 - } else { - | ldrd CARG34, FOR_STEP - | checktp CARG2, LJ_TISNUM - | bne >5 - | adds CARG1, CARG1, CARG3 - | ldr CARG4, FOR_STOP - if (op == BC_IFORL) { - | addvs RC, PC, #0x20000 // Overflow: prevent branch. - } else { - | bvs >2 // Overflow: do not enter mcode. - } - | cmp CARG3, #0 - | blt >4 - | cmp CARG1, CARG4 - } - |1: - if (op == BC_FORI) { - | subgt PC, RC, #0x20000 - } else if (op == BC_JFORI) { - | sub PC, RC, #0x20000 - | ldrhle RC, [PC, #-2] - } else if (op == BC_IFORL) { - | suble PC, RC, #0x20000 - } - if (vk) { - | strd CARG12, FOR_IDX - } - |2: - | ins_next1 - | ins_next2 - | strd CARG12, FOR_EXT - if (op == BC_JFORI || op == BC_JFORL) { - | ble =>BC_JLOOP - } - |3: - | ins_next3 - | - |4: // Invert check for negative step. - if (!vk) { - | cmp CARG3, CARG1 - } else { - | cmp CARG4, CARG1 - } - | b <1 - | - |5: // FP loop. - if (!vk) { - | cmnlo CARG4, #-LJ_TISNUM - | cmnlo RB, #-LJ_TISNUM - | bhs ->vmeta_for - |.if FPU - | vldr d0, FOR_IDX - | vldr d1, FOR_STOP - | cmp RB, #0 - | vstr d0, FOR_EXT - |.else - | cmp RB, #0 - | strd CARG12, FOR_EXT - | blt >8 - |.endif - } else { - |.if FPU - | vldr d0, FOR_IDX - | vldr d2, FOR_STEP - | vldr d1, FOR_STOP - | cmp CARG4, #0 - | vadd.f64 d0, d0, d2 - |.else - | cmp CARG4, #0 - | blt >8 - | bl extern __aeabi_dadd - | strd CARG12, FOR_IDX - | ldrd CARG34, FOR_STOP - | strd CARG12, FOR_EXT - |.endif - } - |6: - |.if FPU - | vcmpge.f64 d0, d1 - | vcmplt.f64 d1, d0 - | vmrs - |.else - | bl extern __aeabi_cdcmple - |.endif - if (vk) { - |.if FPU - | vstr d0, FOR_IDX - | vstr d0, FOR_EXT - |.endif - } - if (op == BC_FORI) { - | subhi PC, RC, #0x20000 - } else if (op == BC_JFORI) { - | sub PC, RC, #0x20000 - | ldrhls RC, [PC, #-2] - | bls =>BC_JLOOP - } else if (op == BC_IFORL) { - | subls PC, RC, #0x20000 - } else { - | bls =>BC_JLOOP - } - | ins_next1 - | ins_next2 - | b <3 - | - |.if not FPU - |8: // Invert check for negative step. - if (vk) { - | bl extern __aeabi_dadd - | strd CARG12, FOR_IDX - | strd CARG12, FOR_EXT - } - | mov CARG3, CARG1 - | mov CARG4, CARG2 - | ldrd CARG12, FOR_STOP - | b <6 - |.endif - break; - - case BC_ITERL: - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_IITERL follows. - break; - - case BC_JITERL: -#if !LJ_HASJIT - break; -#endif - case BC_IITERL: - | // RA = base*8, RC = target - | ldrd CARG12, [RA, BASE]! - if (op == BC_JITERL) { - | cmn CARG2, #-LJ_TNIL // Stop if iterator returned nil. - | strdne CARG12, [RA, #-8] - | bne =>BC_JLOOP - } else { - | add RC, PC, RC, lsl #2 - | // STALL: load CARG12. - | cmn CARG2, #-LJ_TNIL // Stop if iterator returned nil. - | subne PC, RC, #0x20000 // Otherwise save control var + branch. - | strdne CARG12, [RA, #-8] - } - | ins_next - break; - - case BC_LOOP: - | // RA = base*8, RC = target (loop extent) - | // Note: RA/RC is only used by trace recorder to determine scope/extent - | // This opcode does NOT jump, it's only purpose is to detect a hot loop. - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_ILOOP follows. - break; - - case BC_ILOOP: - | // RA = base*8, RC = target (loop extent) - | ins_next - break; - - case BC_JLOOP: - |.if JIT - | // RA = base (ignored), RC = traceno - | ldr CARG1, [DISPATCH, #DISPATCH_J(trace)] - | mov CARG2, #0 // Traces on ARM don't store the trace number, so use 0. - | ldr TRACE:RC, [CARG1, RC, lsl #2] - | st_vmstate CARG2 - | ldr RA, TRACE:RC->mcode - | str BASE, [DISPATCH, #DISPATCH_GL(jit_base)] - | str L, [DISPATCH, #DISPATCH_GL(tmpbuf.L)] - | bx RA - |.endif - break; - - case BC_JMP: - | // RA = base*8 (only used by trace recorder), RC = target - | add RC, PC, RC, lsl #2 - | sub PC, RC, #0x20000 - | ins_next - break; - - /* -- Function headers -------------------------------------------------- */ - - case BC_FUNCF: - |.if JIT - | hotcall - |.endif - case BC_FUNCV: /* NYI: compiled vararg functions. */ - | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. - break; - - case BC_JFUNCF: -#if !LJ_HASJIT - break; -#endif - case BC_IFUNCF: - | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8 - | ldr CARG1, L->maxstack - | ldrb CARG2, [PC, #-4+PC2PROTO(numparams)] - | ldr KBASE, [PC, #-4+PC2PROTO(k)] - | cmp RA, CARG1 - | bhi ->vm_growstack_l - if (op != BC_JFUNCF) { - | ins_next1 - | ins_next2 - } - |2: - | cmp NARGS8:RC, CARG2, lsl #3 // Check for missing parameters. - | mvn CARG4, #~LJ_TNIL - | blo >3 - if (op == BC_JFUNCF) { - | decode_RD RC, INS - | b =>BC_JLOOP - } else { - | ins_next3 - } - | - |3: // Clear missing parameters. - | strd CARG34, [BASE, NARGS8:RC] - | add NARGS8:RC, NARGS8:RC, #8 - | b <2 - break; - - case BC_JFUNCV: -#if !LJ_HASJIT - break; -#endif - | NYI // NYI: compiled vararg functions - break; /* NYI: compiled vararg functions. */ - - case BC_IFUNCV: - | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8 - | ldr CARG1, L->maxstack - | add CARG4, BASE, RC - | add RA, RA, RC - | str LFUNC:CARG3, [CARG4] // Store copy of LFUNC. - | add CARG2, RC, #8+FRAME_VARG - | ldr KBASE, [PC, #-4+PC2PROTO(k)] - | cmp RA, CARG1 - | str CARG2, [CARG4, #4] // Store delta + FRAME_VARG. - | bhs ->vm_growstack_l - | ldrb RB, [PC, #-4+PC2PROTO(numparams)] - | mov RA, BASE - | mov RC, CARG4 - | cmp RB, #0 - | add BASE, CARG4, #8 - | beq >3 - | mvn CARG3, #~LJ_TNIL - |1: - | cmp RA, RC // Less args than parameters? - | ldrdlo CARG12, [RA], #8 - | movhs CARG2, CARG3 - | strlo CARG3, [RA, #-4] // Clear old fixarg slot (help the GC). - |2: - | subs RB, RB, #1 - | strd CARG12, [CARG4, #8]! - | bne <1 - |3: - | ins_next - break; - - case BC_FUNCC: - case BC_FUNCCW: - | // BASE = new base, RA = BASE+framesize*8, CARG3 = CFUNC, RC = nargs*8 - if (op == BC_FUNCC) { - | ldr CARG4, CFUNC:CARG3->f - } else { - | ldr CARG4, [DISPATCH, #DISPATCH_GL(wrapf)] - } - | add CARG2, RA, NARGS8:RC - | ldr CARG1, L->maxstack - | add RC, BASE, NARGS8:RC - | str BASE, L->base - | cmp CARG2, CARG1 - | str RC, L->top - if (op == BC_FUNCCW) { - | ldr CARG2, CFUNC:CARG3->f - } - | mv_vmstate CARG3, C - | mov CARG1, L - | bhi ->vm_growstack_c // Need to grow stack. - | st_vmstate CARG3 - | blx CARG4 // (lua_State *L [, lua_CFunction f]) - | // Returns nresults. - | ldr BASE, L->base - | mv_vmstate CARG3, INTERP - | ldr CRET2, L->top - | str L, [DISPATCH, #DISPATCH_GL(cur_L)] - | lsl RC, CRET1, #3 - | st_vmstate CARG3 - | ldr PC, [BASE, FRAME_PC] - | sub RA, CRET2, RC // RA = L->top - nresults*8 - | b ->vm_returnc - break; - - /* ---------------------------------------------------------------------- */ - - default: - fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); - exit(2); - break; - } -} - -static int build_backend(BuildCtx *ctx) -{ - int op; - - dasm_growpc(Dst, BC__MAX); - - build_subroutines(ctx); - - |.code_op - for (op = 0; op < BC__MAX; op++) - build_ins(ctx, (BCOp)op, op); - - return BC__MAX; -} - -/* Emit pseudo frame-info for all assembler functions. */ -static void emit_asm_debug(BuildCtx *ctx) -{ - int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); - int i; - switch (ctx->mode) { - case BUILD_elfasm: - fprintf(ctx->fp, "\t.section .debug_frame,\"\",%%progbits\n"); - fprintf(ctx->fp, - ".Lframe0:\n" - "\t.long .LECIE0-.LSCIE0\n" - ".LSCIE0:\n" - "\t.long 0xffffffff\n" - "\t.byte 0x1\n" - "\t.string \"\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -4\n" - "\t.byte 0xe\n" /* Return address is in lr. */ - "\t.byte 0xc\n\t.uleb128 0xd\n\t.uleb128 0\n" /* def_cfa sp */ - "\t.align 2\n" - ".LECIE0:\n\n"); - fprintf(ctx->fp, - ".LSFDE0:\n" - "\t.long .LEFDE0-.LASFDE0\n" - ".LASFDE0:\n" - "\t.long .Lframe0\n" - "\t.long .Lbegin\n" - "\t.long %d\n" - "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ - "\t.byte 0x8e\n\t.uleb128 1\n", /* offset lr */ - fcofs, CFRAME_SIZE); - for (i = 11; i >= (LJ_ARCH_HASFPU ? 5 : 4); i--) /* offset r4-r11 */ - fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2+(11-i)); -#if LJ_ARCH_HASFPU - for (i = 15; i >= 8; i--) /* offset d8-d15 */ - fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 %d, %d\n", - 64+2*i, 10+2*(15-i)); - fprintf(ctx->fp, "\t.byte 0x84\n\t.uleb128 %d\n", 25); /* offset r4 */ -#endif - fprintf(ctx->fp, - "\t.align 2\n" - ".LEFDE0:\n\n"); -#if LJ_HASFFI - fprintf(ctx->fp, - ".LSFDE1:\n" - "\t.long .LEFDE1-.LASFDE1\n" - ".LASFDE1:\n" - "\t.long .Lframe0\n" - "\t.long lj_vm_ffi_call\n" - "\t.long %d\n" - "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */ - "\t.byte 0x8e\n\t.uleb128 1\n" /* offset lr */ - "\t.byte 0x8b\n\t.uleb128 2\n" /* offset r11 */ - "\t.byte 0x85\n\t.uleb128 3\n" /* offset r5 */ - "\t.byte 0x84\n\t.uleb128 4\n" /* offset r4 */ - "\t.byte 0xd\n\t.uleb128 0xb\n" /* def_cfa_register r11 */ - "\t.align 2\n" - ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); -#endif - break; - default: - break; - } -} - diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc deleted file mode 100644 index bb2496ab18..0000000000 --- a/src/vm_arm64.dasc +++ /dev/null @@ -1,3964 +0,0 @@ -|// Low-level VM code for ARM64 CPUs. -|// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -| -|.arch arm64 -|.section code_op, code_sub -| -|.actionlist build_actionlist -|.globals GLOB_ -|.globalnames globnames -|.externnames extnames -| -|// Note: The ragged indentation of the instructions is intentional. -|// The starting columns indicate data dependencies. -| -|//----------------------------------------------------------------------- -| -|// ARM64 registers and the AAPCS64 ABI 1.0 at a glance: -|// -|// x0-x17 temp, x19-x28 callee-saved, x29 fp, x30 lr -|// x18 is reserved on most platforms. Don't use it, save it or restore it. -|// x31 doesn't exist. Register number 31 either means xzr/wzr (zero) or sp, -|// depending on the instruction. -|// v0-v7 temp, v8-v15 callee-saved (only d8-d15 preserved), v16-v31 temp -|// -|// x0-x7/v0-v7 hold parameters and results. -| -|// Fixed register assignments for the interpreter. -| -|// The following must be C callee-save. -|.define BASE, x19 // Base of current Lua stack frame. -|.define KBASE, x20 // Constants of current Lua function. -|.define PC, x21 // Next PC. -|.define GLREG, x22 // Global state. -|.define LREG, x23 // Register holding lua_State (also in SAVE_L). -|.define TISNUM, x24 // Constant LJ_TISNUM << 47. -|.define TISNUMhi, x25 // Constant LJ_TISNUM << 15. -|.define TISNIL, x26 // Constant -1LL. -|.define fp, x29 // Yes, we have to maintain a frame pointer. -| -|.define ST_INTERP, w26 // Constant -1. -| -|// The following temporaries are not saved across C calls, except for RA/RC. -|.define RA, x27 -|.define RC, x28 -|.define RB, x17 -|.define RAw, w27 -|.define RCw, w28 -|.define RBw, w17 -|.define INS, x16 -|.define INSw, w16 -|.define ITYPE, x15 -|.define TMP0, x8 -|.define TMP1, x9 -|.define TMP2, x10 -|.define TMP3, x11 -|.define TMP0w, w8 -|.define TMP1w, w9 -|.define TMP2w, w10 -|.define TMP3w, w11 -| -|// Calling conventions. Also used as temporaries. -|.define CARG1, x0 -|.define CARG2, x1 -|.define CARG3, x2 -|.define CARG4, x3 -|.define CARG5, x4 -|.define CARG1w, w0 -|.define CARG2w, w1 -|.define CARG3w, w2 -|.define CARG4w, w3 -|.define CARG5w, w4 -| -|.define FARG1, d0 -|.define FARG2, d1 -| -|.define CRET1, x0 -|.define CRET1w, w0 -| -|// Stack layout while in interpreter. Must match with lj_frame.h. -| -|.define CFRAME_SPACE, 208 -|//----- 16 byte aligned, <-- sp entering interpreter -|// Unused [sp, #204] // 32 bit values -|.define SAVE_NRES, [sp, #200] -|.define SAVE_ERRF, [sp, #196] -|.define SAVE_MULTRES, [sp, #192] -|.define TMPD, [sp, #184] // 64 bit values -|.define SAVE_L, [sp, #176] -|.define SAVE_PC, [sp, #168] -|.define SAVE_CFRAME, [sp, #160] -|.define SAVE_FPR_, 96 // 96+8*8: 64 bit FPR saves -|.define SAVE_GPR_, 16 // 16+10*8: 64 bit GPR saves -|.define SAVE_LR, [sp, #8] -|.define SAVE_FP, [sp] -|//----- 16 byte aligned, <-- sp while in interpreter. -| -|.define TMPDofs, #184 -| -|.macro save_, gpr1, gpr2, fpr1, fpr2 -| stp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8] -| stp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8] -|.endmacro -|.macro rest_, gpr1, gpr2, fpr1, fpr2 -| ldp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8] -| ldp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8] -|.endmacro -| -|.macro saveregs -| stp fp, lr, [sp, #-CFRAME_SPACE]! -| add fp, sp, #0 -| stp x19, x20, [sp, # SAVE_GPR_] -| save_ 21, 22, 8, 9 -| save_ 23, 24, 10, 11 -| save_ 25, 26, 12, 13 -| save_ 27, 28, 14, 15 -|.endmacro -|.macro restoreregs -| ldp x19, x20, [sp, # SAVE_GPR_] -| rest_ 21, 22, 8, 9 -| rest_ 23, 24, 10, 11 -| rest_ 25, 26, 12, 13 -| rest_ 27, 28, 14, 15 -| ldp fp, lr, [sp], # CFRAME_SPACE -|.endmacro -| -|// Type definitions. Some of these are only used for documentation. -|.type L, lua_State, LREG -|.type GL, global_State, GLREG -|.type TVALUE, TValue -|.type GCOBJ, GCobj -|.type STR, GCstr -|.type TAB, GCtab -|.type LFUNC, GCfuncL -|.type CFUNC, GCfuncC -|.type PROTO, GCproto -|.type UPVAL, GCupval -|.type NODE, Node -|.type NARGS8, int -|.type TRACE, GCtrace -|.type SBUF, SBuf -| -|//----------------------------------------------------------------------- -| -|// Trap for not-yet-implemented parts. -|.macro NYI; brk; .endmacro -| -|//----------------------------------------------------------------------- -| -|// Access to frame relative to BASE. -|.define FRAME_FUNC, #-16 -|.define FRAME_PC, #-8 -| -|.macro decode_RA, dst, ins; ubfx dst, ins, #8, #8; .endmacro -|.macro decode_RB, dst, ins; ubfx dst, ins, #24, #8; .endmacro -|.macro decode_RC, dst, ins; ubfx dst, ins, #16, #8; .endmacro -|.macro decode_RD, dst, ins; ubfx dst, ins, #16, #16; .endmacro -|.macro decode_RC8RD, dst, src; ubfiz dst, src, #3, #8; .endmacro -| -|// Instruction decode+dispatch. -|.macro ins_NEXT -| ldr INSw, [PC], #4 -| add TMP1, GL, INS, uxtb #3 -| decode_RA RA, INS -| ldr TMP0, [TMP1, #GG_G2DISP] -| decode_RD RC, INS -| br TMP0 -|.endmacro -| -|// Instruction footer. -|.if 1 -| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. -| .define ins_next, ins_NEXT -| .define ins_next_, ins_NEXT -|.else -| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. -| // Affects only certain kinds of benchmarks (and only with -j off). -| .macro ins_next -| b ->ins_next -| .endmacro -| .macro ins_next_ -| ->ins_next: -| ins_NEXT -| .endmacro -|.endif -| -|// Call decode and dispatch. -|.macro ins_callt -| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC -| ldr PC, LFUNC:CARG3->pc -| ldr INSw, [PC], #4 -| add TMP1, GL, INS, uxtb #3 -| decode_RA RA, INS -| ldr TMP0, [TMP1, #GG_G2DISP] -| add RA, BASE, RA, lsl #3 -| br TMP0 -|.endmacro -| -|.macro ins_call -| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, PC = caller PC -| str PC, [BASE, FRAME_PC] -| ins_callt -|.endmacro -| -|//----------------------------------------------------------------------- -| -|// Macros to check the TValue type and extract the GCobj. Branch on failure. -|.macro checktp, reg, tp, target -| asr ITYPE, reg, #47 -| cmn ITYPE, #-tp -| and reg, reg, #LJ_GCVMASK -| bne target -|.endmacro -|.macro checktp, dst, reg, tp, target -| asr ITYPE, reg, #47 -| cmn ITYPE, #-tp -| and dst, reg, #LJ_GCVMASK -| bne target -|.endmacro -|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro -|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro -|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro -|.macro checkint, reg, target -| cmp TISNUMhi, reg, lsr #32 -| bne target -|.endmacro -|.macro checknum, reg, target -| cmp TISNUMhi, reg, lsr #32 -| bls target -|.endmacro -|.macro checknumber, reg, target -| cmp TISNUMhi, reg, lsr #32 -| blo target -|.endmacro -| -|.macro mov_false, reg; movn reg, #0x8000, lsl #32; .endmacro -|.macro mov_true, reg; movn reg, #0x0001, lsl #48; .endmacro -| -#define GL_J(field) (GG_G2J + (int)offsetof(jit_State, field)) -| -#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) -| -|.macro hotcheck, delta -| lsr CARG1, PC, #1 -| and CARG1, CARG1, #126 -| add CARG1, CARG1, #GG_G2DISP+GG_DISP2HOT -| ldrh CARG2w, [GL, CARG1] -| subs CARG2, CARG2, #delta -| strh CARG2w, [GL, CARG1] -|.endmacro -| -|.macro hotloop -| hotcheck HOTCOUNT_LOOP -| blo ->vm_hotloop -|.endmacro -| -|.macro hotcall -| hotcheck HOTCOUNT_CALL -| blo ->vm_hotcall -|.endmacro -| -|// Set current VM state. -|.macro mv_vmstate, reg, st; movn reg, #LJ_VMST_..st; .endmacro -|.macro st_vmstate, reg; str reg, GL->vmstate; .endmacro -| -|// Move table write barrier back. Overwrites mark and tmp. -|.macro barrierback, tab, mark, tmp -| ldr tmp, GL->gc.grayagain -| and mark, mark, #~LJ_GC_BLACK // black2gray(tab) -| str tab, GL->gc.grayagain -| strb mark, tab->marked -| str tmp, tab->gclist -|.endmacro -| -|//----------------------------------------------------------------------- - -#if !LJ_DUALNUM -#error "Only dual-number mode supported for ARM64 target" -#endif - -/* Generate subroutines used by opcodes and other parts of the VM. */ -/* The .code_sub section should be last to help static branch prediction. */ -static void build_subroutines(BuildCtx *ctx) -{ - |.code_sub - | - |//----------------------------------------------------------------------- - |//-- Return handling ---------------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_returnp: - | // See vm_return. Also: RB = previous base. - | tbz PC, #2, ->cont_dispatch // (PC & FRAME_P) == 0? - | - | // Return from pcall or xpcall fast func. - | ldr PC, [RB, FRAME_PC] // Fetch PC of previous frame. - | mov_true TMP0 - | mov BASE, RB - | // Prepending may overwrite the pcall frame, so do it at the end. - | str TMP0, [RA, #-8]! // Prepend true to results. - | - |->vm_returnc: - | adds RC, RC, #8 // RC = (nresults+1)*8. - | mov CRET1, #LUA_YIELD - | beq ->vm_unwind_c_eh - | str RCw, SAVE_MULTRES - | ands CARG1, PC, #FRAME_TYPE - | beq ->BC_RET_Z // Handle regular return to Lua. - | - |->vm_return: - | // BASE = base, RA = resultptr, RC/MULTRES = (nresults+1)*8, PC = return - | // CARG1 = PC & FRAME_TYPE - | and RB, PC, #~FRAME_TYPEP - | cmp CARG1, #FRAME_C - | sub RB, BASE, RB // RB = previous base. - | bne ->vm_returnp - | - | str RB, L->base - | ldrsw CARG2, SAVE_NRES // CARG2 = nresults+1. - | mv_vmstate TMP0w, C - | sub BASE, BASE, #16 - | subs TMP2, RC, #8 - | st_vmstate TMP0w - | beq >2 - |1: - | subs TMP2, TMP2, #8 - | ldr TMP0, [RA], #8 - | str TMP0, [BASE], #8 - | bne <1 - |2: - | cmp RC, CARG2, lsl #3 // More/less results wanted? - | bne >6 - |3: - | str BASE, L->top // Store new top. - | - |->vm_leave_cp: - | ldr RC, SAVE_CFRAME // Restore previous C frame. - | mov CRET1, #0 // Ok return status for vm_pcall. - | str RC, L->cframe - | - |->vm_leave_unw: - | restoreregs - | ret - | - |6: - | bgt >7 // Less results wanted? - | // More results wanted. Check stack size and fill up results with nil. - | ldr CARG3, L->maxstack - | cmp BASE, CARG3 - | bhs >8 - | str TISNIL, [BASE], #8 - | add RC, RC, #8 - | b <2 - | - |7: // Less results wanted. - | cbz CARG2, <3 // LUA_MULTRET+1 case? - | sub CARG1, RC, CARG2, lsl #3 - | sub BASE, BASE, CARG1 // Shrink top. - | b <3 - | - |8: // Corner case: need to grow stack for filling up results. - | // This can happen if: - | // - A C function grows the stack (a lot). - | // - The GC shrinks the stack in between. - | // - A return back from a lua_call() with (high) nresults adjustment. - | str BASE, L->top // Save current top held in BASE (yes). - | mov CARG1, L - | bl extern lj_state_growstack // (lua_State *L, int n) - | ldr BASE, L->top // Need the (realloced) L->top in BASE. - | ldrsw CARG2, SAVE_NRES - | b <2 - | - |->vm_unwind_c: // Unwind C stack, return from vm_pcall. - | // (void *cframe, int errcode) - | mov sp, CARG1 - | mov CRET1, CARG2 - |->vm_unwind_c_eh: // Landing pad for external unwinder. - | ldr L, SAVE_L - | mv_vmstate TMP0w, C - | ldr GL, L->glref - | st_vmstate TMP0w - | b ->vm_leave_unw - | - |->vm_unwind_ff: // Unwind C stack, return from ff pcall. - | // (void *cframe) - | and sp, CARG1, #CFRAME_RAWMASK - |->vm_unwind_ff_eh: // Landing pad for external unwinder. - | ldr L, SAVE_L - | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 - | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 - | movn TISNIL, #0 - | mov RC, #16 // 2 results: false + error message. - | ldr BASE, L->base - | ldr GL, L->glref // Setup pointer to global state. - | mov_false TMP0 - | sub RA, BASE, #8 // Results start at BASE-8. - | ldr PC, [BASE, FRAME_PC] // Fetch PC of previous frame. - | str TMP0, [BASE, #-8] // Prepend false to error message. - | st_vmstate ST_INTERP - | b ->vm_returnc - | - |//----------------------------------------------------------------------- - |//-- Grow stack for calls ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_growstack_c: // Grow stack for C function. - | // CARG1 = L - | mov CARG2, #LUA_MINSTACK - | b >2 - | - |->vm_growstack_l: // Grow stack for Lua function. - | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC - | add RC, BASE, RC - | sub RA, RA, BASE - | mov CARG1, L - | stp BASE, RC, L->base - | add PC, PC, #4 // Must point after first instruction. - | lsr CARG2, RA, #3 - |2: - | // L->base = new base, L->top = top - | str PC, SAVE_PC - | bl extern lj_state_growstack // (lua_State *L, int n) - | ldp BASE, RC, L->base - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] - | sub NARGS8:RC, RC, BASE - | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC - | ins_callt // Just retry the call. - | - |//----------------------------------------------------------------------- - |//-- Entry points into the assembler VM --------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_resume: // Setup C frame and resume thread. - | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) - | saveregs - | mov L, CARG1 - | ldr GL, L->glref // Setup pointer to global state. - | mov BASE, CARG2 - | str L, SAVE_L - | mov PC, #FRAME_CP - | str wzr, SAVE_NRES - | add TMP0, sp, #CFRAME_RESUME - | ldrb TMP1w, L->status - | str wzr, SAVE_ERRF - | str L, SAVE_PC // Any value outside of bytecode is ok. - | str xzr, SAVE_CFRAME - | str TMP0, L->cframe - | cbz TMP1w, >3 - | - | // Resume after yield (like a return). - | str L, GL->cur_L - | mov RA, BASE - | ldp BASE, CARG1, L->base - | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 - | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 - | ldr PC, [BASE, FRAME_PC] - | strb wzr, L->status - | movn TISNIL, #0 - | sub RC, CARG1, BASE - | ands CARG1, PC, #FRAME_TYPE - | add RC, RC, #8 - | st_vmstate ST_INTERP - | str RCw, SAVE_MULTRES - | beq ->BC_RET_Z - | b ->vm_return - | - |->vm_pcall: // Setup protected C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) - | saveregs - | mov PC, #FRAME_CP - | str CARG4w, SAVE_ERRF - | b >1 - | - |->vm_call: // Setup C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1) - | saveregs - | mov PC, #FRAME_C - | - |1: // Entry point for vm_pcall above (PC = ftype). - | ldr RC, L:CARG1->cframe - | str CARG3w, SAVE_NRES - | mov L, CARG1 - | str CARG1, SAVE_L - | ldr GL, L->glref // Setup pointer to global state. - | mov BASE, CARG2 - | str CARG1, SAVE_PC // Any value outside of bytecode is ok. - | str RC, SAVE_CFRAME - | str fp, L->cframe // Add our C frame to cframe chain. - | - |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). - | str L, GL->cur_L - | ldp RB, CARG1, L->base // RB = old base (for vmeta_call). - | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 - | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 - | add PC, PC, BASE - | movn TISNIL, #0 - | sub PC, PC, RB // PC = frame delta + frame type - | sub NARGS8:RC, CARG1, BASE - | st_vmstate ST_INTERP - | - |->vm_call_dispatch: - | // RB = old base, BASE = new base, RC = nargs*8, PC = caller PC - | ldr CARG3, [BASE, FRAME_FUNC] - | checkfunc CARG3, ->vmeta_call - | - |->vm_call_dispatch_f: - | ins_call - | // BASE = new base, CARG3 = func, RC = nargs*8, PC = caller PC - | - |->vm_cpcall: // Setup protected C frame, call C. - | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) - | saveregs - | mov L, CARG1 - | ldr RA, L:CARG1->stack - | str CARG1, SAVE_L - | ldr GL, L->glref // Setup pointer to global state. - | ldr RB, L->top - | str CARG1, SAVE_PC // Any value outside of bytecode is ok. - | ldr RC, L->cframe - | sub RA, RA, RB // Compute -savestack(L, L->top). - | str RAw, SAVE_NRES // Neg. delta means cframe w/o frame. - | str wzr, SAVE_ERRF // No error function. - | str RC, SAVE_CFRAME - | str fp, L->cframe // Add our C frame to cframe chain. - | str L, GL->cur_L - | blr CARG4 // (lua_State *L, lua_CFunction func, void *ud) - | mov BASE, CRET1 - | mov PC, #FRAME_CP - | cbnz BASE, <3 // Else continue with the call. - | b ->vm_leave_cp // No base? Just remove C frame. - | - |//----------------------------------------------------------------------- - |//-- Metamethod handling ------------------------------------------------ - |//----------------------------------------------------------------------- - | - |//-- Continuation dispatch ---------------------------------------------- - | - |->cont_dispatch: - | // BASE = meta base, RA = resultptr, RC = (nresults+1)*8 - | ldr LFUNC:CARG3, [RB, FRAME_FUNC] - | ldr CARG1, [BASE, #-32] // Get continuation. - | mov CARG4, BASE - | mov BASE, RB // Restore caller BASE. - | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - |.if FFI - | cmp CARG1, #1 - |.endif - | ldr PC, [CARG4, #-24] // Restore PC from [cont|PC]. - | ldr CARG3, LFUNC:CARG3->pc - | add TMP0, RA, RC - | str TISNIL, [TMP0, #-8] // Ensure one valid arg. - |.if FFI - | bls >1 - |.endif - | ldr KBASE, [CARG3, #PC2PROTO(k)] - | // BASE = base, RA = resultptr, CARG4 = meta base - | br CARG1 - | - |.if FFI - |1: - | beq ->cont_ffi_callback // cont = 1: return from FFI callback. - | // cont = 0: tailcall from C function. - | sub CARG4, CARG4, #32 - | sub RC, CARG4, BASE - | b ->vm_call_tail - |.endif - | - |->cont_cat: // RA = resultptr, CARG4 = meta base - | ldr INSw, [PC, #-4] - | sub CARG2, CARG4, #32 - | ldr TMP0, [RA] - | str BASE, L->base - | decode_RB RB, INS - | decode_RA RA, INS - | add TMP1, BASE, RB, lsl #3 - | subs TMP1, CARG2, TMP1 - | beq >1 - | str TMP0, [CARG2] - | lsr CARG3, TMP1, #3 - | b ->BC_CAT_Z - | - |1: - | str TMP0, [BASE, RA, lsl #3] - | b ->cont_nop - | - |//-- Table indexing metamethods ----------------------------------------- - | - |->vmeta_tgets1: - | movn CARG4, #~LJ_TSTR - | add CARG2, BASE, RB, lsl #3 - | add CARG4, STR:RC, CARG4, lsl #47 - | b >2 - | - |->vmeta_tgets: - | movk CARG2, #(LJ_TTAB>>1)&0xffff, lsl #48 - | str CARG2, GL->tmptv - | add CARG2, GL, #offsetof(global_State, tmptv) - |2: - | add CARG3, sp, TMPDofs - | str CARG4, TMPD - | b >1 - | - |->vmeta_tgetb: // RB = table, RC = index - | add RC, RC, TISNUM - | add CARG2, BASE, RB, lsl #3 - | add CARG3, sp, TMPDofs - | str RC, TMPD - | b >1 - | - |->vmeta_tgetv: // RB = table, RC = key - | add CARG2, BASE, RB, lsl #3 - | add CARG3, BASE, RC, lsl #3 - |1: - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) - | // Returns TValue * (finished) or NULL (metamethod). - | cbz CRET1, >3 - | ldr TMP0, [CRET1] - | str TMP0, [BASE, RA, lsl #3] - | ins_next - | - |3: // Call __index metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k - | sub TMP1, BASE, #FRAME_CONT - | ldr BASE, L->top - | mov NARGS8:RC, #16 // 2 args for func(t, k). - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. - | str PC, [BASE, #-24] // [cont|PC] - | sub PC, BASE, TMP1 - | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - | b ->vm_call_dispatch_f - | - |->vmeta_tgetr: - | sxtw CARG2, TMP1w - | bl extern lj_tab_getinth // (GCtab *t, int32_t key) - | // Returns cTValue * or NULL. - | mov TMP0, TISNIL - | cbz CRET1, ->BC_TGETR_Z - | ldr TMP0, [CRET1] - | b ->BC_TGETR_Z - | - |//----------------------------------------------------------------------- - | - |->vmeta_tsets1: - | movn CARG4, #~LJ_TSTR - | add CARG2, BASE, RB, lsl #3 - | add CARG4, STR:RC, CARG4, lsl #47 - | b >2 - | - |->vmeta_tsets: - | movk CARG2, #(LJ_TTAB>>1)&0xffff, lsl #48 - | str CARG2, GL->tmptv - | add CARG2, GL, #offsetof(global_State, tmptv) - |2: - | add CARG3, sp, TMPDofs - | str CARG4, TMPD - | b >1 - | - |->vmeta_tsetb: // RB = table, RC = index - | add RC, RC, TISNUM - | add CARG2, BASE, RB, lsl #3 - | add CARG3, sp, TMPDofs - | str RC, TMPD - | b >1 - | - |->vmeta_tsetv: - | add CARG2, BASE, RB, lsl #3 - | add CARG3, BASE, RC, lsl #3 - |1: - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) - | // Returns TValue * (finished) or NULL (metamethod). - | ldr TMP0, [BASE, RA, lsl #3] - | cbz CRET1, >3 - | // NOBARRIER: lj_meta_tset ensures the table is not black. - | str TMP0, [CRET1] - | ins_next - | - |3: // Call __newindex metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) - | sub TMP1, BASE, #FRAME_CONT - | ldr BASE, L->top - | mov NARGS8:RC, #24 // 3 args for func(t, k, v). - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. - | str TMP0, [BASE, #16] // Copy value to third argument. - | str PC, [BASE, #-24] // [cont|PC] - | sub PC, BASE, TMP1 - | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - | b ->vm_call_dispatch_f - | - |->vmeta_tsetr: - | sxtw CARG3, TMP1w - | str BASE, L->base - | str PC, SAVE_PC - | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) - | // Returns TValue *. - | b ->BC_TSETR_Z - | - |//-- Comparison metamethods --------------------------------------------- - | - |->vmeta_comp: - | add CARG2, BASE, RA, lsl #3 - | sub PC, PC, #4 - | add CARG3, BASE, RC, lsl #3 - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | uxtb CARG4w, INSw - | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) - | // Returns 0/1 or TValue * (metamethod). - |3: - | cmp CRET1, #1 - | bhi ->vmeta_binop - |4: - | ldrh RBw, [PC, #2] - | add PC, PC, #4 - | add RB, PC, RB, lsl #2 - | sub RB, RB, #0x20000 - | csel PC, PC, RB, lo - |->cont_nop: - | ins_next - | - |->cont_ra: // RA = resultptr - | ldr INSw, [PC, #-4] - | ldr TMP0, [RA] - | decode_RA TMP1, INS - | str TMP0, [BASE, TMP1, lsl #3] - | b ->cont_nop - | - |->cont_condt: // RA = resultptr - | ldr TMP0, [RA] - | mov_true TMP1 - | cmp TMP1, TMP0 // Branch if result is true. - | b <4 - | - |->cont_condf: // RA = resultptr - | ldr TMP0, [RA] - | mov_false TMP1 - | cmp TMP0, TMP1 // Branch if result is false. - | b <4 - | - |->vmeta_equal: - | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV. - | and TAB:CARG3, CARG3, #LJ_GCVMASK - | sub PC, PC, #4 - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) - | // Returns 0/1 or TValue * (metamethod). - | b <3 - | - |->vmeta_equal_cd: - |.if FFI - | sub PC, PC, #4 - | str BASE, L->base - | mov CARG1, L - | mov CARG2, INS - | str PC, SAVE_PC - | bl extern lj_meta_equal_cd // (lua_State *L, BCIns op) - | // Returns 0/1 or TValue * (metamethod). - | b <3 - |.endif - | - |->vmeta_istype: - | sub PC, PC, #4 - | str BASE, L->base - | mov CARG1, L - | mov CARG2, RA - | mov CARG3, RC - | str PC, SAVE_PC - | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) - | b ->cont_nop - | - |//-- Arithmetic metamethods --------------------------------------------- - | - |->vmeta_arith_vn: - | add CARG3, BASE, RB, lsl #3 - | add CARG4, KBASE, RC, lsl #3 - | b >1 - | - |->vmeta_arith_nv: - | add CARG4, BASE, RB, lsl #3 - | add CARG3, KBASE, RC, lsl #3 - | b >1 - | - |->vmeta_unm: - | add CARG3, BASE, RC, lsl #3 - | mov CARG4, CARG3 - | b >1 - | - |->vmeta_arith_vv: - | add CARG3, BASE, RB, lsl #3 - | add CARG4, BASE, RC, lsl #3 - |1: - | uxtb CARG5w, INSw - | add CARG2, BASE, RA, lsl #3 - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) - | // Returns NULL (finished) or TValue * (metamethod). - | cbz CRET1, ->cont_nop - | - | // Call metamethod for binary op. - |->vmeta_binop: - | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 - | sub TMP1, CRET1, BASE - | str PC, [CRET1, #-24] // [cont|PC] - | add PC, TMP1, #FRAME_CONT - | mov BASE, CRET1 - | mov NARGS8:RC, #16 // 2 args for func(o1, o2). - | b ->vm_call_dispatch - | - |->vmeta_len: - | add CARG2, BASE, RC, lsl #3 -#if LJ_52 - | mov TAB:RC, TAB:CARG1 // Save table (ignored for other types). -#endif - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_meta_len // (lua_State *L, TValue *o) - | // Returns NULL (retry) or TValue * (metamethod base). -#if LJ_52 - | cbnz CRET1, ->vmeta_binop // Binop call for compatibility. - | mov TAB:CARG1, TAB:RC - | b ->BC_LEN_Z -#else - | b ->vmeta_binop // Binop call for compatibility. -#endif - | - |//-- Call metamethod ---------------------------------------------------- - | - |->vmeta_call: // Resolve and call __call metamethod. - | // RB = old base, BASE = new base, RC = nargs*8 - | mov CARG1, L - | str RB, L->base // This is the callers base! - | sub CARG2, BASE, #16 - | str PC, SAVE_PC - | add CARG3, BASE, NARGS8:RC - | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. - | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now. - | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - | ins_call - | - |->vmeta_callt: // Resolve __call for BC_CALLT. - | // BASE = old base, RA = new base, RC = nargs*8 - | mov CARG1, L - | str BASE, L->base - | sub CARG2, RA, #16 - | str PC, SAVE_PC - | add CARG3, RA, NARGS8:RC - | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) - | ldr TMP1, [RA, FRAME_FUNC] // Guaranteed to be a function here. - | ldr PC, [BASE, FRAME_PC] - | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now. - | and LFUNC:CARG3, TMP1, #LJ_GCVMASK - | b ->BC_CALLT2_Z - | - |//-- Argument coercion for 'for' statement ------------------------------ - | - |->vmeta_for: - | mov CARG1, L - | str BASE, L->base - | mov CARG2, RA - | str PC, SAVE_PC - | bl extern lj_meta_for // (lua_State *L, TValue *base) - | ldr INSw, [PC, #-4] - |.if JIT - | uxtb TMP0w, INSw - |.endif - | decode_RA RA, INS - | decode_RD RC, INS - |.if JIT - | cmp TMP0, #BC_JFORI - | beq =>BC_JFORI - |.endif - | b =>BC_FORI - | - |//----------------------------------------------------------------------- - |//-- Fast functions ----------------------------------------------------- - |//----------------------------------------------------------------------- - | - |.macro .ffunc, name - |->ff_ .. name: - |.endmacro - | - |.macro .ffunc_1, name - |->ff_ .. name: - | ldr CARG1, [BASE] - | cmp NARGS8:RC, #8 - | blo ->fff_fallback - |.endmacro - | - |.macro .ffunc_2, name - |->ff_ .. name: - | ldp CARG1, CARG2, [BASE] - | cmp NARGS8:RC, #16 - | blo ->fff_fallback - |.endmacro - | - |.macro .ffunc_n, name - | .ffunc name - | ldr CARG1, [BASE] - | cmp NARGS8:RC, #8 - | ldr FARG1, [BASE] - | blo ->fff_fallback - | checknum CARG1, ->fff_fallback - |.endmacro - | - |.macro .ffunc_nn, name - | .ffunc name - | ldp CARG1, CARG2, [BASE] - | cmp NARGS8:RC, #16 - | ldp FARG1, FARG2, [BASE] - | blo ->fff_fallback - | checknum CARG1, ->fff_fallback - | checknum CARG2, ->fff_fallback - |.endmacro - | - |// Inlined GC threshold check. Caveat: uses CARG1 and CARG2. - |.macro ffgccheck - | ldp CARG1, CARG2, GL->gc.total // Assumes threshold follows total. - | cmp CARG1, CARG2 - | blt >1 - | bl ->fff_gcstep - |1: - |.endmacro - | - |//-- Base library: checks ----------------------------------------------- - | - |.ffunc_1 assert - | ldr PC, [BASE, FRAME_PC] - | mov_false TMP1 - | cmp CARG1, TMP1 - | bhs ->fff_fallback - | str CARG1, [BASE, #-16] - | sub RB, BASE, #8 - | subs RA, NARGS8:RC, #8 - | add RC, NARGS8:RC, #8 // Compute (nresults+1)*8. - | cbz RA, ->fff_res // Done if exactly 1 argument. - |1: - | ldr CARG1, [RB, #16] - | sub RA, RA, #8 - | str CARG1, [RB], #8 - | cbnz RA, <1 - | b ->fff_res - | - |.ffunc_1 type - | mov TMP0, #~LJ_TISNUM - | asr ITYPE, CARG1, #47 - | cmn ITYPE, #~LJ_TISNUM - | csinv TMP1, TMP0, ITYPE, lo - | add TMP1, TMP1, #offsetof(GCfuncC, upvalue)/8 - | ldr CARG1, [CFUNC:CARG3, TMP1, lsl #3] - | b ->fff_restv - | - |//-- Base library: getters and setters --------------------------------- - | - |.ffunc_1 getmetatable - | asr ITYPE, CARG1, #47 - | cmn ITYPE, #-LJ_TTAB - | ccmn ITYPE, #-LJ_TUDATA, #4, ne - | and TAB:CARG1, CARG1, #LJ_GCVMASK - | bne >6 - |1: // Field metatable must be at same offset for GCtab and GCudata! - | ldr TAB:RB, TAB:CARG1->metatable - |2: - | mov CARG1, TISNIL - | ldr STR:RC, GL->gcroot[GCROOT_MMNAME+MM_metatable] - | cbz TAB:RB, ->fff_restv - | ldr TMP1w, TAB:RB->hmask - | ldr TMP2w, STR:RC->hash - | ldr NODE:CARG3, TAB:RB->node - | and TMP1w, TMP1w, TMP2w // idx = str->hash & tab->hmask - | add TMP1, TMP1, TMP1, lsl #1 - | movn CARG4, #~LJ_TSTR - | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8 - | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for. - |3: // Rearranged logic, because we expect _not_ to find the key. - | ldp CARG1, TMP0, NODE:CARG3->val - | ldr NODE:CARG3, NODE:CARG3->next - | cmp TMP0, CARG4 - | beq >5 - | cbnz NODE:CARG3, <3 - |4: - | mov CARG1, RB // Use metatable as default result. - | movk CARG1, #(LJ_TTAB>>1)&0xffff, lsl #48 - | b ->fff_restv - |5: - | cmp TMP0, TISNIL - | bne ->fff_restv - | b <4 - | - |6: - | movn TMP0, #~LJ_TISNUM - | cmp ITYPE, TMP0 - | csel ITYPE, ITYPE, TMP0, hs - | sub TMP1, GL, ITYPE, lsl #3 - | ldr TAB:RB, [TMP1, #offsetof(global_State, gcroot[GCROOT_BASEMT])-8] - | b <2 - | - |.ffunc_2 setmetatable - | // Fast path: no mt for table yet and not clearing the mt. - | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback - | ldr TAB:TMP0, TAB:TMP1->metatable - | asr ITYPE, CARG2, #47 - | ldrb TMP2w, TAB:TMP1->marked - | cmn ITYPE, #-LJ_TTAB - | and TAB:CARG2, CARG2, #LJ_GCVMASK - | ccmp TAB:TMP0, #0, #0, eq - | bne ->fff_fallback - | str TAB:CARG2, TAB:TMP1->metatable - | tbz TMP2w, #2, ->fff_restv // isblack(table) - | barrierback TAB:TMP1, TMP2w, TMP0 - | b ->fff_restv - | - |.ffunc rawget - | ldr CARG2, [BASE] - | cmp NARGS8:RC, #16 - | blo ->fff_fallback - | checktab CARG2, ->fff_fallback - | mov CARG1, L - | add CARG3, BASE, #8 - | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) - | // Returns cTValue *. - | ldr CARG1, [CRET1] - | b ->fff_restv - | - |//-- Base library: conversions ------------------------------------------ - | - |.ffunc tonumber - | // Only handles the number case inline (without a base argument). - | ldr CARG1, [BASE] - | cmp NARGS8:RC, #8 - | bne ->fff_fallback - | checknumber CARG1, ->fff_fallback - | b ->fff_restv - | - |.ffunc_1 tostring - | // Only handles the string or number case inline. - | asr ITYPE, CARG1, #47 - | cmn ITYPE, #-LJ_TSTR - | // A __tostring method in the string base metatable is ignored. - | beq ->fff_restv - | // Handle numbers inline, unless a number base metatable is present. - | ldr TMP1, GL->gcroot[GCROOT_BASEMT_NUM] - | str BASE, L->base - | cmn ITYPE, #-LJ_TISNUM - | ccmp TMP1, #0, #0, ls - | str PC, SAVE_PC // Redundant (but a defined value). - | bne ->fff_fallback - | ffgccheck - | mov CARG1, L - | mov CARG2, BASE - | bl extern lj_strfmt_number // (lua_State *L, cTValue *o) - | // Returns GCstr *. - | movn TMP1, #~LJ_TSTR - | ldr BASE, L->base - | add CARG1, CARG1, TMP1, lsl #47 - | b ->fff_restv - | - |//-- Base library: iterators ------------------------------------------- - | - |.ffunc_1 next - | checktp CARG2, CARG1, LJ_TTAB, ->fff_fallback - | str TISNIL, [BASE, NARGS8:RC] // Set missing 2nd arg to nil. - | ldr PC, [BASE, FRAME_PC] - | stp BASE, BASE, L->base // Add frame since C call can throw. - | mov CARG1, L - | add CARG3, BASE, #8 - | str PC, SAVE_PC - | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) - | // Returns 0 at end of traversal. - | str TISNIL, [BASE, #-16] - | cbz CRET1, ->fff_res1 // End of traversal: return nil. - | ldp CARG1, CARG2, [BASE, #8] // Copy key and value to results. - | mov RC, #(2+1)*8 - | stp CARG1, CARG2, [BASE, #-16] - | b ->fff_res - | - |.ffunc_1 pairs - | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback -#if LJ_52 - | ldr TAB:CARG2, TAB:TMP1->metatable -#endif - | ldr CFUNC:CARG4, CFUNC:CARG3->upvalue[0] - | ldr PC, [BASE, FRAME_PC] -#if LJ_52 - | cbnz TAB:CARG2, ->fff_fallback -#endif - | mov RC, #(3+1)*8 - | stp CARG1, TISNIL, [BASE, #-8] - | str CFUNC:CARG4, [BASE, #-16] - | b ->fff_res - | - |.ffunc_2 ipairs_aux - | checktab CARG1, ->fff_fallback - | checkint CARG2, ->fff_fallback - | ldr TMP1w, TAB:CARG1->asize - | ldr CARG3, TAB:CARG1->array - | ldr TMP0w, TAB:CARG1->hmask - | add CARG2w, CARG2w, #1 - | cmp CARG2w, TMP1w - | ldr PC, [BASE, FRAME_PC] - | add TMP2, CARG2, TISNUM - | mov RC, #(0+1)*8 - | str TMP2, [BASE, #-16] - | bhs >2 // Not in array part? - | ldr TMP0, [CARG3, CARG2, lsl #3] - |1: - | mov TMP1, #(2+1)*8 - | cmp TMP0, TISNIL - | str TMP0, [BASE, #-8] - | csel RC, RC, TMP1, eq - | b ->fff_res - |2: // Check for empty hash part first. Otherwise call C function. - | cbz TMP0w, ->fff_res - | bl extern lj_tab_getinth // (GCtab *t, int32_t key) - | // Returns cTValue * or NULL. - | cbz CRET1, ->fff_res - | ldr TMP0, [CRET1] - | b <1 - | - |.ffunc_1 ipairs - | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback -#if LJ_52 - | ldr TAB:CARG2, TAB:TMP1->metatable -#endif - | ldr CFUNC:CARG4, CFUNC:CARG3->upvalue[0] - | ldr PC, [BASE, FRAME_PC] -#if LJ_52 - | cbnz TAB:CARG2, ->fff_fallback -#endif - | mov RC, #(3+1)*8 - | stp CARG1, TISNUM, [BASE, #-8] - | str CFUNC:CARG4, [BASE, #-16] - | b ->fff_res - | - |//-- Base library: catch errors ---------------------------------------- - | - |.ffunc pcall - | ldrb TMP0w, GL->hookmask - | subs NARGS8:RC, NARGS8:RC, #8 - | blo ->fff_fallback - | mov RB, BASE - | add BASE, BASE, #16 - | ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1 - | add PC, TMP0, #16+FRAME_PCALL - | beq ->vm_call_dispatch - |1: - | add TMP2, BASE, NARGS8:RC - |2: - | ldr TMP0, [TMP2, #-16] - | str TMP0, [TMP2, #-8]! - | cmp TMP2, BASE - | bne <2 - | b ->vm_call_dispatch - | - |.ffunc xpcall - | ldp CARG1, CARG2, [BASE] - | ldrb TMP0w, GL->hookmask - | subs NARGS8:RC, NARGS8:RC, #16 - | blo ->fff_fallback - | mov RB, BASE - | add BASE, BASE, #24 - | asr ITYPE, CARG2, #47 - | ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1 - | cmn ITYPE, #-LJ_TFUNC - | add PC, TMP0, #24+FRAME_PCALL - | bne ->fff_fallback // Traceback must be a function. - | stp CARG2, CARG1, [RB] // Swap function and traceback. - | cbz NARGS8:RC, ->vm_call_dispatch - | b <1 - | - |//-- Coroutine library -------------------------------------------------- - | - |.macro coroutine_resume_wrap, resume - |.if resume - |.ffunc_1 coroutine_resume - | checktp CARG1, LJ_TTHREAD, ->fff_fallback - |.else - |.ffunc coroutine_wrap_aux - | ldr L:CARG1, CFUNC:CARG3->upvalue[0].gcr - | and L:CARG1, CARG1, #LJ_GCVMASK - |.endif - | ldr PC, [BASE, FRAME_PC] - | str BASE, L->base - | ldp RB, CARG2, L:CARG1->base - | ldrb TMP1w, L:CARG1->status - | add TMP0, CARG2, TMP1 - | str PC, SAVE_PC - | cmp TMP0, RB - | beq ->fff_fallback - | cmp TMP1, #LUA_YIELD - | add TMP0, CARG2, #8 - | csel CARG2, CARG2, TMP0, hs - | ldr CARG4, L:CARG1->maxstack - | add CARG3, CARG2, NARGS8:RC - | ldr RB, L:CARG1->cframe - | ccmp CARG3, CARG4, #2, ls - | ccmp RB, #0, #2, ls - | bhi ->fff_fallback - |.if resume - | sub CARG3, CARG3, #8 // Keep resumed thread in stack for GC. - | add BASE, BASE, #8 - | sub NARGS8:RC, NARGS8:RC, #8 - |.endif - | str CARG3, L:CARG1->top - | str BASE, L->top - | cbz NARGS8:RC, >3 - |2: // Move args to coroutine. - | ldr TMP0, [BASE, RB] - | cmp RB, NARGS8:RC - | str TMP0, [CARG2, RB] - | add RB, RB, #8 - | bne <2 - |3: - | mov CARG3, #0 - | mov L:RA, L:CARG1 - | mov CARG4, #0 - | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0) - | // Returns thread status. - |4: - | ldp CARG3, CARG4, L:RA->base - | cmp CRET1, #LUA_YIELD - | ldr BASE, L->base - | str L, GL->cur_L - | st_vmstate ST_INTERP - | bhi >8 - | sub RC, CARG4, CARG3 - | ldr CARG1, L->maxstack - | add CARG2, BASE, RC - | cbz RC, >6 // No results? - | cmp CARG2, CARG1 - | mov RB, #0 - | bhi >9 // Need to grow stack? - | - | sub CARG4, RC, #8 - | str CARG3, L:RA->top // Clear coroutine stack. - |5: // Move results from coroutine. - | ldr TMP0, [CARG3, RB] - | cmp RB, CARG4 - | str TMP0, [BASE, RB] - | add RB, RB, #8 - | bne <5 - |6: - |.if resume - | mov_true TMP1 - | add RC, RC, #16 - |7: - | str TMP1, [BASE, #-8] // Prepend true/false to results. - | sub RA, BASE, #8 - |.else - | mov RA, BASE - | add RC, RC, #8 - |.endif - | ands CARG1, PC, #FRAME_TYPE - | str PC, SAVE_PC - | str RCw, SAVE_MULTRES - | beq ->BC_RET_Z - | b ->vm_return - | - |8: // Coroutine returned with error (at co->top-1). - |.if resume - | ldr TMP0, [CARG4, #-8]! - | mov_false TMP1 - | mov RC, #(2+1)*8 - | str CARG4, L:RA->top // Remove error from coroutine stack. - | str TMP0, [BASE] // Copy error message. - | b <7 - |.else - | mov CARG1, L - | mov CARG2, L:RA - | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) - | // Never returns. - |.endif - | - |9: // Handle stack expansion on return from yield. - | mov CARG1, L - | lsr CARG2, RC, #3 - | bl extern lj_state_growstack // (lua_State *L, int n) - | mov CRET1, #0 - | b <4 - |.endmacro - | - | coroutine_resume_wrap 1 // coroutine.resume - | coroutine_resume_wrap 0 // coroutine.wrap - | - |.ffunc coroutine_yield - | ldr TMP0, L->cframe - | add TMP1, BASE, NARGS8:RC - | mov CRET1, #LUA_YIELD - | stp BASE, TMP1, L->base - | tbz TMP0, #0, ->fff_fallback - | str xzr, L->cframe - | strb CRET1w, L->status - | b ->vm_leave_unw - | - |//-- Math library ------------------------------------------------------- - | - |.macro math_round, func, round - | .ffunc math_ .. func - | ldr CARG1, [BASE] - | cmp NARGS8:RC, #8 - | ldr d0, [BASE] - | blo ->fff_fallback - | cmp TISNUMhi, CARG1, lsr #32 - | beq ->fff_restv - | blo ->fff_fallback - | round d0, d0 - | b ->fff_resn - |.endmacro - | - | math_round floor, frintm - | math_round ceil, frintp - | - |.ffunc_1 math_abs - | checknumber CARG1, ->fff_fallback - | and CARG1, CARG1, #U64x(7fffffff,ffffffff) - | bne ->fff_restv - | eor CARG2w, CARG1w, CARG1w, asr #31 - | movz CARG3, #0x41e0, lsl #48 // 2^31. - | subs CARG1w, CARG2w, CARG1w, asr #31 - | add CARG1, CARG1, TISNUM - | csel CARG1, CARG1, CARG3, pl - | // Fallthrough. - | - |->fff_restv: - | // CARG1 = TValue result. - | ldr PC, [BASE, FRAME_PC] - | str CARG1, [BASE, #-16] - |->fff_res1: - | // PC = return. - | mov RC, #(1+1)*8 - |->fff_res: - | // RC = (nresults+1)*8, PC = return. - | ands CARG1, PC, #FRAME_TYPE - | str RCw, SAVE_MULTRES - | sub RA, BASE, #16 - | bne ->vm_return - | ldr INSw, [PC, #-4] - | decode_RB RB, INS - |5: - | cmp RC, RB, lsl #3 // More results expected? - | blo >6 - | decode_RA TMP1, INS - | // Adjust BASE. KBASE is assumed to be set for the calling frame. - | sub BASE, RA, TMP1, lsl #3 - | ins_next - | - |6: // Fill up results with nil. - | add TMP1, RA, RC - | add RC, RC, #8 - | str TISNIL, [TMP1, #-8] - | b <5 - | - |.macro math_extern, func - | .ffunc_n math_ .. func - | bl extern func - | b ->fff_resn - |.endmacro - | - |.macro math_extern2, func - | .ffunc_nn math_ .. func - | bl extern func - | b ->fff_resn - |.endmacro - | - |.ffunc_n math_sqrt - | fsqrt d0, d0 - |->fff_resn: - | ldr PC, [BASE, FRAME_PC] - | str d0, [BASE, #-16] - | b ->fff_res1 - | - |.ffunc math_log - | ldr CARG1, [BASE] - | cmp NARGS8:RC, #8 - | ldr FARG1, [BASE] - | bne ->fff_fallback // Need exactly 1 argument. - | checknum CARG1, ->fff_fallback - | bl extern log - | b ->fff_resn - | - | math_extern log10 - | math_extern exp - | math_extern sin - | math_extern cos - | math_extern tan - | math_extern asin - | math_extern acos - | math_extern atan - | math_extern sinh - | math_extern cosh - | math_extern tanh - | math_extern2 pow - | math_extern2 atan2 - | math_extern2 fmod - | - |.ffunc_2 math_ldexp - | ldr FARG1, [BASE] - | checknum CARG1, ->fff_fallback - | checkint CARG2, ->fff_fallback - | sxtw CARG1, CARG2w - | bl extern ldexp // (double x, int exp) - | b ->fff_resn - | - |.ffunc_n math_frexp - | add CARG1, sp, TMPDofs - | bl extern frexp - | ldr CARG2w, TMPD - | ldr PC, [BASE, FRAME_PC] - | str d0, [BASE, #-16] - | mov RC, #(2+1)*8 - | add CARG2, CARG2, TISNUM - | str CARG2, [BASE, #-8] - | b ->fff_res - | - |.ffunc_n math_modf - | sub CARG1, BASE, #16 - | ldr PC, [BASE, FRAME_PC] - | bl extern modf - | mov RC, #(2+1)*8 - | str d0, [BASE, #-8] - | b ->fff_res - | - |.macro math_minmax, name, cond, fcond - | .ffunc_1 name - | add RB, BASE, RC - | add RA, BASE, #8 - | checkint CARG1, >4 - |1: // Handle integers. - | ldr CARG2, [RA] - | cmp RA, RB - | bhs ->fff_restv - | checkint CARG2, >3 - | cmp CARG1w, CARG2w - | add RA, RA, #8 - | csel CARG1, CARG2, CARG1, cond - | b <1 - |3: // Convert intermediate result to number and continue below. - | scvtf d0, CARG1w - | blo ->fff_fallback - | ldr d1, [RA] - | b >6 - | - |4: - | ldr d0, [BASE] - | blo ->fff_fallback - |5: // Handle numbers. - | ldr CARG2, [RA] - | ldr d1, [RA] - | cmp RA, RB - | bhs ->fff_resn - | checknum CARG2, >7 - |6: - | fcmp d0, d1 - | add RA, RA, #8 - | fcsel d0, d1, d0, fcond - | b <5 - |7: // Convert integer to number and continue above. - | scvtf d1, CARG2w - | blo ->fff_fallback - | b <6 - |.endmacro - | - | math_minmax math_min, gt, hi - | math_minmax math_max, lt, lo - | - |//-- String library ----------------------------------------------------- - | - |.ffunc string_byte // Only handle the 1-arg case here. - | ldp PC, CARG1, [BASE, FRAME_PC] - | cmp NARGS8:RC, #8 - | asr ITYPE, CARG1, #47 - | ccmn ITYPE, #-LJ_TSTR, #0, eq - | and STR:CARG1, CARG1, #LJ_GCVMASK - | bne ->fff_fallback - | ldrb TMP0w, STR:CARG1[1] // Access is always ok (NUL at end). - | ldr CARG3w, STR:CARG1->len - | add TMP0, TMP0, TISNUM - | str TMP0, [BASE, #-16] - | mov RC, #(0+1)*8 - | cbz CARG3, ->fff_res - | b ->fff_res1 - | - |.ffunc string_char // Only handle the 1-arg case here. - | ffgccheck - | ldp PC, CARG1, [BASE, FRAME_PC] - | cmp CARG1w, #255 - | ccmp NARGS8:RC, #8, #0, ls // Need exactly 1 argument. - | bne ->fff_fallback - | checkint CARG1, ->fff_fallback - | mov CARG3, #1 - | mov CARG2, BASE // Points to stack. Little-endian. - |->fff_newstr: - | // CARG2 = str, CARG3 = len. - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_str_new // (lua_State *L, char *str, size_t l) - |->fff_resstr: - | // Returns GCstr *. - | ldr BASE, L->base - | movn TMP1, #~LJ_TSTR - | add CARG1, CARG1, TMP1, lsl #47 - | b ->fff_restv - | - |.ffunc string_sub - | ffgccheck - | ldr CARG1, [BASE] - | ldr CARG3, [BASE, #16] - | cmp NARGS8:RC, #16 - | movn RB, #0 - | beq >1 - | blo ->fff_fallback - | checkint CARG3, ->fff_fallback - | sxtw RB, CARG3w - |1: - | ldr CARG2, [BASE, #8] - | checkstr CARG1, ->fff_fallback - | ldr TMP1w, STR:CARG1->len - | checkint CARG2, ->fff_fallback - | sxtw CARG2, CARG2w - | // CARG1 = str, TMP1 = str->len, CARG2 = start, RB = end - | add TMP2, RB, TMP1 - | cmp RB, #0 - | add TMP0, CARG2, TMP1 - | csinc RB, RB, TMP2, ge // if (end < 0) end += len+1 - | cmp CARG2, #0 - | csinc CARG2, CARG2, TMP0, ge // if (start < 0) start += len+1 - | cmp RB, #0 - | csel RB, RB, xzr, ge // if (end < 0) end = 0 - | cmp CARG2, #1 - | csinc CARG2, CARG2, xzr, ge // if (start < 1) start = 1 - | cmp RB, TMP1 - | csel RB, RB, TMP1, le // if (end > len) end = len - | add CARG1, STR:CARG1, #sizeof(GCstr)-1 - | subs CARG3, RB, CARG2 // len = end - start - | add CARG2, CARG1, CARG2 - | add CARG3, CARG3, #1 // len += 1 - | bge ->fff_newstr - | add STR:CARG1, GL, #offsetof(global_State, strempty) - | movn TMP1, #~LJ_TSTR - | add CARG1, CARG1, TMP1, lsl #47 - | b ->fff_restv - | - |.macro ffstring_op, name - | .ffunc string_ .. name - | ffgccheck - | ldr CARG2, [BASE] - | cmp NARGS8:RC, #8 - | asr ITYPE, CARG2, #47 - | ccmn ITYPE, #-LJ_TSTR, #0, hs - | and STR:CARG2, CARG2, #LJ_GCVMASK - | bne ->fff_fallback - | ldr TMP0, GL->tmpbuf.b - | add SBUF:CARG1, GL, #offsetof(global_State, tmpbuf) - | str BASE, L->base - | str PC, SAVE_PC - | str L, GL->tmpbuf.L - | str TMP0, GL->tmpbuf.p - | bl extern lj_buf_putstr_ .. name - | bl extern lj_buf_tostr - | b ->fff_resstr - |.endmacro - | - |ffstring_op reverse - |ffstring_op lower - |ffstring_op upper - | - |//-- Bit library -------------------------------------------------------- - | - |// FP number to bit conversion for soft-float. Clobbers CARG1-CARG3 - |->vm_tobit_fb: - | bls ->fff_fallback - | add CARG2, CARG1, CARG1 - | mov CARG3, #1076 - | sub CARG3, CARG3, CARG2, lsr #53 - | cmp CARG3, #53 - | bhi >1 - | and CARG2, CARG2, #U64x(001fffff,ffffffff) - | orr CARG2, CARG2, #U64x(00200000,00000000) - | cmp CARG1, #0 - | lsr CARG2, CARG2, CARG3 - | cneg CARG1w, CARG2w, mi - | br lr - |1: - | mov CARG1w, #0 - | br lr - | - |.macro .ffunc_bit, name - | .ffunc_1 bit_..name - | adr lr, >1 - | checkint CARG1, ->vm_tobit_fb - |1: - |.endmacro - | - |.macro .ffunc_bit_op, name, ins - | .ffunc_bit name - | mov RA, #8 - | mov TMP0w, CARG1w - | adr lr, >2 - |1: - | ldr CARG1, [BASE, RA] - | cmp RA, NARGS8:RC - | add RA, RA, #8 - | bge >9 - | checkint CARG1, ->vm_tobit_fb - |2: - | ins TMP0w, TMP0w, CARG1w - | b <1 - |.endmacro - | - |.ffunc_bit_op band, and - |.ffunc_bit_op bor, orr - |.ffunc_bit_op bxor, eor - | - |.ffunc_bit tobit - | mov TMP0w, CARG1w - |9: // Label reused by .ffunc_bit_op users. - | add CARG1, TMP0, TISNUM - | b ->fff_restv - | - |.ffunc_bit bswap - | rev TMP0w, CARG1w - | add CARG1, TMP0, TISNUM - | b ->fff_restv - | - |.ffunc_bit bnot - | mvn TMP0w, CARG1w - | add CARG1, TMP0, TISNUM - | b ->fff_restv - | - |.macro .ffunc_bit_sh, name, ins, shmod - | .ffunc bit_..name - | ldp TMP0, CARG1, [BASE] - | cmp NARGS8:RC, #16 - | blo ->fff_fallback - | adr lr, >1 - | checkint CARG1, ->vm_tobit_fb - |1: - |.if shmod == 0 - | mov TMP1, CARG1 - |.else - | neg TMP1, CARG1 - |.endif - | mov CARG1, TMP0 - | adr lr, >2 - | checkint CARG1, ->vm_tobit_fb - |2: - | ins TMP0w, CARG1w, TMP1w - | add CARG1, TMP0, TISNUM - | b ->fff_restv - |.endmacro - | - |.ffunc_bit_sh lshift, lsl, 0 - |.ffunc_bit_sh rshift, lsr, 0 - |.ffunc_bit_sh arshift, asr, 0 - |.ffunc_bit_sh rol, ror, 1 - |.ffunc_bit_sh ror, ror, 0 - | - |//----------------------------------------------------------------------- - | - |->fff_fallback: // Call fast function fallback handler. - | // BASE = new base, RC = nargs*8 - | ldp CFUNC:CARG3, PC, [BASE, FRAME_FUNC] // Fallback may overwrite PC. - | ldr TMP2, L->maxstack - | add TMP1, BASE, NARGS8:RC - | stp BASE, TMP1, L->base - | and CFUNC:CARG3, CARG3, #LJ_GCVMASK - | add TMP1, TMP1, #8*LUA_MINSTACK - | ldr CARG3, CFUNC:CARG3->f - | str PC, SAVE_PC // Redundant (but a defined value). - | cmp TMP1, TMP2 - | mov CARG1, L - | bhi >5 // Need to grow stack. - | blr CARG3 // (lua_State *L) - | // Either throws an error, or recovers and returns -1, 0 or nresults+1. - | ldr BASE, L->base - | cmp CRET1w, #0 - | lsl RC, CRET1, #3 - | sub RA, BASE, #16 - | bgt ->fff_res // Returned nresults+1? - |1: // Returned 0 or -1: retry fast path. - | ldr CARG1, L->top - | ldr CFUNC:CARG3, [BASE, FRAME_FUNC] - | sub NARGS8:RC, CARG1, BASE - | bne ->vm_call_tail // Returned -1? - | and CFUNC:CARG3, CARG3, #LJ_GCVMASK - | ins_callt // Returned 0: retry fast path. - | - |// Reconstruct previous base for vmeta_call during tailcall. - |->vm_call_tail: - | ands TMP0, PC, #FRAME_TYPE - | and TMP1, PC, #~FRAME_TYPEP - | bne >3 - | ldrb RAw, [PC, #-3] - | lsl RA, RA, #3 - | add TMP1, RA, #16 - |3: - | sub RB, BASE, TMP1 - | b ->vm_call_dispatch // Resolve again for tailcall. - | - |5: // Grow stack for fallback handler. - | mov CARG2, #LUA_MINSTACK - | bl extern lj_state_growstack // (lua_State *L, int n) - | ldr BASE, L->base - | cmp CARG1, CARG1 // Set zero-flag to force retry. - | b <1 - | - |->fff_gcstep: // Call GC step function. - | // BASE = new base, RC = nargs*8 - | add CARG2, BASE, NARGS8:RC // Calculate L->top. - | mov RA, lr - | stp BASE, CARG2, L->base - | str PC, SAVE_PC // Redundant (but a defined value). - | mov CARG1, L - | bl extern lj_gc_step // (lua_State *L) - | ldp BASE, CARG2, L->base - | ldr CFUNC:CARG3, [BASE, FRAME_FUNC] - | mov lr, RA // Help return address predictor. - | sub NARGS8:RC, CARG2, BASE // Calculate nargs*8. - | and CFUNC:CARG3, CARG3, #LJ_GCVMASK - | ret - | - |//----------------------------------------------------------------------- - |//-- Special dispatch targets ------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_record: // Dispatch target for recording phase. - |.if JIT - | ldrb CARG1w, GL->hookmask - | tst CARG1, #HOOK_VMEVENT // No recording while in vmevent. - | bne >5 - | // Decrement the hookcount for consistency, but always do the call. - | ldr CARG2w, GL->hookcount - | tst CARG1, #HOOK_ACTIVE - | bne >1 - | sub CARG2w, CARG2w, #1 - | tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT - | beq >1 - | str CARG2w, GL->hookcount - | b >1 - |.endif - | - |->vm_rethook: // Dispatch target for return hooks. - | ldrb TMP2w, GL->hookmask - | tbz TMP2w, #HOOK_ACTIVE_SHIFT, >1 // Hook already active? - |5: // Re-dispatch to static ins. - | ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC] - | br TMP0 - | - |->vm_inshook: // Dispatch target for instr/line hooks. - | ldrb TMP2w, GL->hookmask - | ldr TMP3w, GL->hookcount - | tbnz TMP2w, #HOOK_ACTIVE_SHIFT, <5 // Hook already active? - | tst TMP2w, #LUA_MASKLINE|LUA_MASKCOUNT - | beq <5 - | sub TMP3w, TMP3w, #1 - | str TMP3w, GL->hookcount - | cbz TMP3w, >1 - | tbz TMP2w, #LUA_HOOKLINE, <5 - |1: - | mov CARG1, L - | str BASE, L->base - | mov CARG2, PC - | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. - | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) - |3: - | ldr BASE, L->base - |4: // Re-dispatch to static ins. - | ldr INSw, [PC, #-4] - | add TMP1, GL, INS, uxtb #3 - | decode_RA RA, INS - | ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC] - | decode_RD RC, INS - | br TMP0 - | - |->cont_hook: // Continue from hook yield. - | ldr CARG1, [CARG4, #-40] - | add PC, PC, #4 - | str CARG1w, SAVE_MULTRES // Restore MULTRES for *M ins. - | b <4 - | - |->vm_hotloop: // Hot loop counter underflow. - |.if JIT - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Same as curr_topL(L). - | add CARG1, GL, #GG_G2DISP+GG_DISP2J - | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - | str PC, SAVE_PC - | ldr CARG3, LFUNC:CARG3->pc - | mov CARG2, PC - | str L, [GL, #GL_J(L)] - | ldrb CARG3w, [CARG3, #PC2PROTO(framesize)] - | str BASE, L->base - | add CARG3, BASE, CARG3, lsl #3 - | str CARG3, L->top - | bl extern lj_trace_hot // (jit_State *J, const BCIns *pc) - | b <3 - |.endif - | - |->vm_callhook: // Dispatch target for call hooks. - | mov CARG2, PC - |.if JIT - | b >1 - |.endif - | - |->vm_hotcall: // Hot call counter underflow. - |.if JIT - | orr CARG2, PC, #1 - |1: - |.endif - | add TMP1, BASE, NARGS8:RC - | str PC, SAVE_PC - | mov CARG1, L - | sub RA, RA, BASE - | stp BASE, TMP1, L->base - | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc) - | // Returns ASMFunction. - | ldp BASE, TMP1, L->base - | str xzr, SAVE_PC // Invalidate for subsequent line hook. - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] - | add RA, BASE, RA - | sub NARGS8:RC, TMP1, BASE - | ldr INSw, [PC, #-4] - | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - | br CRET1 - | - |->cont_stitch: // Trace stitching. - |.if JIT - | // RA = resultptr, CARG4 = meta base - | ldr RB, SAVE_MULTRES - | ldr INSw, [PC, #-4] - | ldr TRACE:CARG3, [CARG4, #-40] // Save previous trace. - | subs RB, RB, #8 - | decode_RA RC, INS // Call base. - | and CARG3, CARG3, #LJ_GCVMASK - | beq >2 - |1: // Move results down. - | ldr CARG1, [RA] - | add RA, RA, #8 - | subs RB, RB, #8 - | str CARG1, [BASE, RC, lsl #3] - | add RC, RC, #1 - | bne <1 - |2: - | decode_RA RA, INS - | decode_RB RB, INS - | add RA, RA, RB - |3: - | cmp RA, RC - | bhi >9 // More results wanted? - | - | ldrh RAw, TRACE:CARG3->traceno - | ldrh RCw, TRACE:CARG3->link - | cmp RCw, RAw - | beq ->cont_nop // Blacklisted. - | cmp RCw, #0 - | bne =>BC_JLOOP // Jump to stitched trace. - | - | // Stitch a new trace to the previous trace. - | mov CARG1, #GL_J(exitno) - | str RA, [GL, CARG1] - | mov CARG1, #GL_J(L) - | str L, [GL, CARG1] - | str BASE, L->base - | add CARG1, GL, #GG_G2J - | mov CARG2, PC - | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) - | ldr BASE, L->base - | b ->cont_nop - | - |9: // Fill up results with nil. - | str TISNIL, [BASE, RC, lsl #3] - | add RC, RC, #1 - | b <3 - |.endif - | - |->vm_profhook: // Dispatch target for profiler hook. -#if LJ_HASPROFILE - | mov CARG1, L - | str BASE, L->base - | mov CARG2, PC - | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) - | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. - | ldr BASE, L->base - | sub PC, PC, #4 - | b ->cont_nop -#endif - | - |//----------------------------------------------------------------------- - |//-- Trace exit handler ------------------------------------------------- - |//----------------------------------------------------------------------- - | - |.macro savex_, a, b - | stp d..a, d..b, [sp, #a*8] - | stp x..a, x..b, [sp, #32*8+a*8] - |.endmacro - | - |->vm_exit_handler: - |.if JIT - | sub sp, sp, #(64*8) - | savex_, 0, 1 - | savex_, 2, 3 - | savex_, 4, 5 - | savex_, 6, 7 - | savex_, 8, 9 - | savex_, 10, 11 - | savex_, 12, 13 - | savex_, 14, 15 - | savex_, 16, 17 - | savex_, 18, 19 - | savex_, 20, 21 - | savex_, 22, 23 - | savex_, 24, 25 - | savex_, 26, 27 - | savex_, 28, 29 - | stp d30, d31, [sp, #30*8] - | ldr CARG1, [sp, #64*8] // Load original value of lr. - | add CARG3, sp, #64*8 // Recompute original value of sp. - | mv_vmstate CARG4, EXIT - | stp xzr, CARG3, [sp, #62*8] // Store 0/sp in RID_LR/RID_SP. - | sub CARG1, CARG1, lr - | ldr L, GL->cur_L - | lsr CARG1, CARG1, #2 - | ldr BASE, GL->jit_base - | sub CARG1, CARG1, #2 - | ldr CARG2w, [lr] // Load trace number. - | st_vmstate CARG4 - | str BASE, L->base - | ubfx CARG2w, CARG2w, #5, #16 - | str CARG1w, [GL, #GL_J(exitno)] - | str CARG2w, [GL, #GL_J(parent)] - | str L, [GL, #GL_J(L)] - | str xzr, GL->jit_base - | add CARG1, GL, #GG_G2J - | mov CARG2, sp - | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) - | // Returns MULTRES (unscaled) or negated error code. - | ldr CARG2, L->cframe - | ldr BASE, L->base - | and sp, CARG2, #CFRAME_RAWMASK - | ldr PC, SAVE_PC // Get SAVE_PC. - | str L, SAVE_L // Set SAVE_L (on-trace resume/yield). - | b >1 - |.endif - | - |->vm_exit_interp: - | // CARG1 = MULTRES or negated error code, BASE, PC and GL set. - |.if JIT - | ldr L, SAVE_L - |1: - | cmp CARG1w, #0 - | blt >9 // Check for error from exit. - | lsl RC, CARG1, #3 - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 - | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 - | movn TISNIL, #0 - | and LFUNC:CARG2, CARG2, #LJ_GCVMASK - | str RC, SAVE_MULTRES - | str BASE, L->base - | ldr CARG2, LFUNC:CARG2->pc - | str xzr, GL->jit_base - | mv_vmstate CARG4, INTERP - | ldr KBASE, [CARG2, #PC2PROTO(k)] - | // Modified copy of ins_next which handles function header dispatch, too. - | ldrb RBw, [PC] - | ldr INSw, [PC], #4 - | st_vmstate CARG4 - | cmp RBw, #BC_FUNCC+2 // Fast function? - | add TMP1, GL, INS, uxtb #3 - | bhs >4 - |2: - | cmp RBw, #BC_FUNCF // Function header? - | add TMP0, GL, RB, uxtb #3 - | ldr RB, [TMP0, #GG_G2DISP] - | decode_RA RA, INS - | lsr TMP0, INS, #16 - | csel RC, TMP0, RC, lo - | blo >5 - | ldr CARG3, [BASE, FRAME_FUNC] - | sub RC, RC, #8 - | add RA, BASE, RA, lsl #3 // Yes: RA = BASE+framesize*8, RC = nargs*8 - | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - |5: - | br RB - | - |4: // Check frame below fast function. - | ldr CARG1, [BASE, FRAME_PC] - | ands CARG2, CARG1, #FRAME_TYPE - | bne <2 // Trace stitching continuation? - | // Otherwise set KBASE for Lua function below fast function. - | ldr CARG3, [CARG1, #-4] - | decode_RA CARG1, CARG3 - | sub CARG2, BASE, CARG1, lsl #3 - | ldr LFUNC:CARG3, [CARG2, #-32] - | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - | ldr CARG3, LFUNC:CARG3->pc - | ldr KBASE, [CARG3, #PC2PROTO(k)] - | b <2 - | - |9: // Rethrow error from the right C frame. - | neg CARG2, CARG1 - | mov CARG1, L - | bl extern lj_err_throw // (lua_State *L, int errcode) - |.endif - | - |//----------------------------------------------------------------------- - |//-- Math helper functions ---------------------------------------------- - |//----------------------------------------------------------------------- - | - | // int lj_vm_modi(int dividend, int divisor); - |->vm_modi: - | eor CARG4w, CARG1w, CARG2w - | cmp CARG4w, #0 - | eor CARG3w, CARG1w, CARG1w, asr #31 - | eor CARG4w, CARG2w, CARG2w, asr #31 - | sub CARG3w, CARG3w, CARG1w, asr #31 - | sub CARG4w, CARG4w, CARG2w, asr #31 - | udiv CARG1w, CARG3w, CARG4w - | msub CARG1w, CARG1w, CARG4w, CARG3w - | ccmp CARG1w, #0, #4, mi - | sub CARG3w, CARG1w, CARG4w - | csel CARG1w, CARG1w, CARG3w, eq - | eor CARG3w, CARG1w, CARG2w - | cmp CARG3w, #0 - | cneg CARG1w, CARG1w, mi - | ret - | - |//----------------------------------------------------------------------- - |//-- Miscellaneous functions -------------------------------------------- - |//----------------------------------------------------------------------- - | - |//----------------------------------------------------------------------- - |//-- FFI helper functions ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |// Handler for callback functions. - |// Saveregs already performed. Callback slot number in [sp], g in r12. - |->vm_ffi_callback: - |.if FFI - |.type CTSTATE, CTState, PC - | saveregs - | ldr CTSTATE, GL:x10->ctype_state - | mov GL, x10 - | add x10, sp, # CFRAME_SPACE - | str w9, CTSTATE->cb.slot - | stp x0, x1, CTSTATE->cb.gpr[0] - | stp d0, d1, CTSTATE->cb.fpr[0] - | stp x2, x3, CTSTATE->cb.gpr[2] - | stp d2, d3, CTSTATE->cb.fpr[2] - | stp x4, x5, CTSTATE->cb.gpr[4] - | stp d4, d5, CTSTATE->cb.fpr[4] - | stp x6, x7, CTSTATE->cb.gpr[6] - | stp d6, d7, CTSTATE->cb.fpr[6] - | str x10, CTSTATE->cb.stack - | mov CARG1, CTSTATE - | str CTSTATE, SAVE_PC // Any value outside of bytecode is ok. - | mov CARG2, sp - | bl extern lj_ccallback_enter // (CTState *cts, void *cf) - | // Returns lua_State *. - | ldp BASE, RC, L:CRET1->base - | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 - | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 - | movn TISNIL, #0 - | mov L, CRET1 - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] - | sub RC, RC, BASE - | st_vmstate ST_INTERP - | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - | ins_callt - |.endif - | - |->cont_ffi_callback: // Return from FFI callback. - |.if FFI - | ldr CTSTATE, GL->ctype_state - | stp BASE, CARG4, L->base - | str L, CTSTATE->L - | mov CARG1, CTSTATE - | mov CARG2, RA - | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) - | ldp x0, x1, CTSTATE->cb.gpr[0] - | ldp d0, d1, CTSTATE->cb.fpr[0] - | b ->vm_leave_unw - |.endif - | - |->vm_ffi_call: // Call C function via FFI. - | // Caveat: needs special frame unwinding, see below. - |.if FFI - | .type CCSTATE, CCallState, x19 - | stp fp, lr, [sp, #-32]! - | add fp, sp, #0 - | str CCSTATE, [sp, #16] - | mov CCSTATE, x0 - | ldr TMP0w, CCSTATE:x0->spadj - | ldrb TMP1w, CCSTATE->nsp - | add TMP2, CCSTATE, #offsetof(CCallState, stack) - | subs TMP1, TMP1, #1 - | ldr TMP3, CCSTATE->func - | sub sp, fp, TMP0 - | bmi >2 - |1: // Copy stack slots - | ldr TMP0, [TMP2, TMP1, lsl #3] - | str TMP0, [sp, TMP1, lsl #3] - | subs TMP1, TMP1, #1 - | bpl <1 - |2: - | ldp x0, x1, CCSTATE->gpr[0] - | ldp d0, d1, CCSTATE->fpr[0] - | ldp x2, x3, CCSTATE->gpr[2] - | ldp d2, d3, CCSTATE->fpr[2] - | ldp x4, x5, CCSTATE->gpr[4] - | ldp d4, d5, CCSTATE->fpr[4] - | ldp x6, x7, CCSTATE->gpr[6] - | ldp d6, d7, CCSTATE->fpr[6] - | ldr x8, CCSTATE->retp - | blr TMP3 - | mov sp, fp - | stp x0, x1, CCSTATE->gpr[0] - | stp d0, d1, CCSTATE->fpr[0] - | stp d2, d3, CCSTATE->fpr[2] - | ldr CCSTATE, [sp, #16] - | ldp fp, lr, [sp], #32 - | ret - |.endif - |// Note: vm_ffi_call must be the last function in this object file! - | - |//----------------------------------------------------------------------- -} - -/* Generate the code for a single instruction. */ -static void build_ins(BuildCtx *ctx, BCOp op, int defop) -{ - int vk = 0; - |=>defop: - - switch (op) { - - /* -- Comparison ops ---------------------------------------------------- */ - - /* Remember: all ops branch for a true comparison, fall through otherwise. */ - - case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: - | // RA = src1, RC = src2, JMP with RC = target - | ldr CARG1, [BASE, RA, lsl #3] - | ldrh RBw, [PC, #2] - | ldr CARG2, [BASE, RC, lsl #3] - | add PC, PC, #4 - | add RB, PC, RB, lsl #2 - | sub RB, RB, #0x20000 - | checkint CARG1, >3 - | checkint CARG2, >4 - | cmp CARG1w, CARG2w - if (op == BC_ISLT) { - | csel PC, RB, PC, lt - } else if (op == BC_ISGE) { - | csel PC, RB, PC, ge - } else if (op == BC_ISLE) { - | csel PC, RB, PC, le - } else { - | csel PC, RB, PC, gt - } - |1: - | ins_next - | - |3: // RA not int. - | ldr FARG1, [BASE, RA, lsl #3] - | blo ->vmeta_comp - | ldr FARG2, [BASE, RC, lsl #3] - | cmp TISNUMhi, CARG2, lsr #32 - | bhi >5 - | bne ->vmeta_comp - | // RA number, RC int. - | scvtf FARG2, CARG2w - | b >5 - | - |4: // RA int, RC not int - | ldr FARG2, [BASE, RC, lsl #3] - | blo ->vmeta_comp - | // RA int, RC number. - | scvtf FARG1, CARG1w - | - |5: // RA number, RC number - | fcmp FARG1, FARG2 - | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. - if (op == BC_ISLT) { - | csel PC, RB, PC, lo - } else if (op == BC_ISGE) { - | csel PC, RB, PC, hs - } else if (op == BC_ISLE) { - | csel PC, RB, PC, ls - } else { - | csel PC, RB, PC, hi - } - | b <1 - break; - - case BC_ISEQV: case BC_ISNEV: - vk = op == BC_ISEQV; - | // RA = src1, RC = src2, JMP with RC = target - | ldr CARG1, [BASE, RA, lsl #3] - | add RC, BASE, RC, lsl #3 - | ldrh RBw, [PC, #2] - | ldr CARG3, [RC] - | add PC, PC, #4 - | add RB, PC, RB, lsl #2 - | sub RB, RB, #0x20000 - | asr ITYPE, CARG3, #47 - | cmn ITYPE, #-LJ_TISNUM - if (vk) { - | bls ->BC_ISEQN_Z - } else { - | bls ->BC_ISNEN_Z - } - | // RC is not a number. - | asr TMP0, CARG1, #47 - |.if FFI - | // Check if RC or RA is a cdata. - | cmn ITYPE, #-LJ_TCDATA - | ccmn TMP0, #-LJ_TCDATA, #4, ne - | beq ->vmeta_equal_cd - |.endif - | cmp CARG1, CARG3 - | bne >2 - | // Tag and value are equal. - if (vk) { - |->BC_ISEQV_Z: - | mov PC, RB // Perform branch. - } - |1: - | ins_next - | - |2: // Check if the tags are the same and it's a table or userdata. - | cmp ITYPE, TMP0 - | ccmn ITYPE, #-LJ_TISTABUD, #2, eq - if (vk) { - | bhi <1 - } else { - | bhi ->BC_ISEQV_Z // Reuse code from opposite instruction. - } - | // Different tables or userdatas. Need to check __eq metamethod. - | // Field metatable must be at same offset for GCtab and GCudata! - | and TAB:CARG2, CARG1, #LJ_GCVMASK - | ldr TAB:TMP2, TAB:CARG2->metatable - if (vk) { - | cbz TAB:TMP2, <1 // No metatable? - | ldrb TMP1w, TAB:TMP2->nomm - | mov CARG4, #0 // ne = 0 - | tbnz TMP1w, #MM_eq, <1 // 'no __eq' flag set: done. - } else { - | cbz TAB:TMP2, ->BC_ISEQV_Z // No metatable? - | ldrb TMP1w, TAB:TMP2->nomm - | mov CARG4, #1 // ne = 1. - | tbnz TMP1w, #MM_eq, ->BC_ISEQV_Z // 'no __eq' flag set: done. - } - | b ->vmeta_equal - break; - - case BC_ISEQS: case BC_ISNES: - vk = op == BC_ISEQS; - | // RA = src, RC = str_const (~), JMP with RC = target - | ldr CARG1, [BASE, RA, lsl #3] - | mvn RC, RC - | ldrh RBw, [PC, #2] - | ldr CARG2, [KBASE, RC, lsl #3] - | add PC, PC, #4 - | movn TMP0, #~LJ_TSTR - |.if FFI - | asr ITYPE, CARG1, #47 - |.endif - | add RB, PC, RB, lsl #2 - | add CARG2, CARG2, TMP0, lsl #47 - | sub RB, RB, #0x20000 - |.if FFI - | cmn ITYPE, #-LJ_TCDATA - | beq ->vmeta_equal_cd - |.endif - | cmp CARG1, CARG2 - if (vk) { - | csel PC, RB, PC, eq - } else { - | csel PC, RB, PC, ne - } - | ins_next - break; - - case BC_ISEQN: case BC_ISNEN: - vk = op == BC_ISEQN; - | // RA = src, RC = num_const (~), JMP with RC = target - | ldr CARG1, [BASE, RA, lsl #3] - | add RC, KBASE, RC, lsl #3 - | ldrh RBw, [PC, #2] - | ldr CARG3, [RC] - | add PC, PC, #4 - | add RB, PC, RB, lsl #2 - | sub RB, RB, #0x20000 - if (vk) { - |->BC_ISEQN_Z: - } else { - |->BC_ISNEN_Z: - } - | checkint CARG1, >4 - | checkint CARG3, >6 - | cmp CARG1w, CARG3w - |1: - if (vk) { - | csel PC, RB, PC, eq - |2: - } else { - |2: - | csel PC, RB, PC, ne - } - |3: - | ins_next - | - |4: // RA not int. - |.if FFI - | blo >7 - |.else - | blo <2 - |.endif - | ldr FARG1, [BASE, RA, lsl #3] - | ldr FARG2, [RC] - | cmp TISNUMhi, CARG3, lsr #32 - | bne >5 - | // RA number, RC int. - | scvtf FARG2, CARG3w - |5: - | // RA number, RC number. - | fcmp FARG1, FARG2 - | b <1 - | - |6: // RA int, RC number - | ldr FARG2, [RC] - | scvtf FARG1, CARG1w - | fcmp FARG1, FARG2 - | b <1 - | - |.if FFI - |7: - | asr ITYPE, CARG1, #47 - | cmn ITYPE, #-LJ_TCDATA - | bne <2 - | b ->vmeta_equal_cd - |.endif - break; - - case BC_ISEQP: case BC_ISNEP: - vk = op == BC_ISEQP; - | // RA = src, RC = primitive_type (~), JMP with RC = target - | ldr TMP0, [BASE, RA, lsl #3] - | ldrh RBw, [PC, #2] - | add PC, PC, #4 - | add RC, RC, #1 - | add RB, PC, RB, lsl #2 - |.if FFI - | asr ITYPE, TMP0, #47 - | cmn ITYPE, #-LJ_TCDATA - | beq ->vmeta_equal_cd - | cmn RC, ITYPE - |.else - | cmn RC, TMP0, asr #47 - |.endif - | sub RB, RB, #0x20000 - if (vk) { - | csel PC, RB, PC, eq - } else { - | csel PC, RB, PC, ne - } - | ins_next - break; - - /* -- Unary test and copy ops ------------------------------------------- */ - - case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: - | // RA = dst or unused, RC = src, JMP with RC = target - | ldrh RBw, [PC, #2] - | ldr TMP0, [BASE, RC, lsl #3] - | add PC, PC, #4 - | mov_false TMP1 - | add RB, PC, RB, lsl #2 - | cmp TMP0, TMP1 - | sub RB, RB, #0x20000 - if (op == BC_ISTC || op == BC_IST) { - if (op == BC_ISTC) { - | csel RA, RA, RC, lo - } - | csel PC, RB, PC, lo - } else { - if (op == BC_ISFC) { - | csel RA, RA, RC, hs - } - | csel PC, RB, PC, hs - } - if (op == BC_ISTC || op == BC_ISFC) { - | str TMP0, [BASE, RA, lsl #3] - } - | ins_next - break; - - case BC_ISTYPE: - | // RA = src, RC = -type - | ldr TMP0, [BASE, RA, lsl #3] - | cmn RC, TMP0, asr #47 - | bne ->vmeta_istype - | ins_next - break; - case BC_ISNUM: - | // RA = src, RC = -(TISNUM-1) - | ldr TMP0, [BASE, RA] - | checknum TMP0, ->vmeta_istype - | ins_next - break; - - /* -- Unary ops --------------------------------------------------------- */ - - case BC_MOV: - | // RA = dst, RC = src - | ldr TMP0, [BASE, RC, lsl #3] - | str TMP0, [BASE, RA, lsl #3] - | ins_next - break; - case BC_NOT: - | // RA = dst, RC = src - | ldr TMP0, [BASE, RC, lsl #3] - | mov_false TMP1 - | mov_true TMP2 - | cmp TMP0, TMP1 - | csel TMP0, TMP1, TMP2, lo - | str TMP0, [BASE, RA, lsl #3] - | ins_next - break; - case BC_UNM: - | // RA = dst, RC = src - | ldr TMP0, [BASE, RC, lsl #3] - | asr ITYPE, TMP0, #47 - | cmn ITYPE, #-LJ_TISNUM - | bhi ->vmeta_unm - | eor TMP0, TMP0, #U64x(80000000,00000000) - | bne >5 - | negs TMP0w, TMP0w - | movz CARG3, #0x41e0, lsl #48 // 2^31. - | add TMP0, TMP0, TISNUM - | csel TMP0, TMP0, CARG3, vc - |5: - | str TMP0, [BASE, RA, lsl #3] - | ins_next - break; - case BC_LEN: - | // RA = dst, RC = src - | ldr CARG1, [BASE, RC, lsl #3] - | asr ITYPE, CARG1, #47 - | cmn ITYPE, #-LJ_TSTR - | and CARG1, CARG1, #LJ_GCVMASK - | bne >2 - | ldr CARG1w, STR:CARG1->len - |1: - | add CARG1, CARG1, TISNUM - | str CARG1, [BASE, RA, lsl #3] - | ins_next - | - |2: - | cmn ITYPE, #-LJ_TTAB - | bne ->vmeta_len -#if LJ_52 - | ldr TAB:CARG2, TAB:CARG1->metatable - | cbnz TAB:CARG2, >9 - |3: -#endif - |->BC_LEN_Z: - | bl extern lj_tab_len // (GCtab *t) - | // Returns uint32_t (but less than 2^31). - | b <1 - | -#if LJ_52 - |9: - | ldrb TMP1w, TAB:CARG2->nomm - | tbnz TMP1w, #MM_len, <3 // 'no __len' flag set: done. - | b ->vmeta_len -#endif - break; - - /* -- Binary ops -------------------------------------------------------- */ - - |.macro ins_arithcheck_int, target - | checkint CARG1, target - | checkint CARG2, target - |.endmacro - | - |.macro ins_arithcheck_num, target - | checknum CARG1, target - | checknum CARG2, target - |.endmacro - | - |.macro ins_arithcheck_nzdiv, target - | cbz CARG2w, target - |.endmacro - | - |.macro ins_arithhead - ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); - ||if (vk == 1) { - | and RC, RC, #255 - | decode_RB RB, INS - ||} else { - | decode_RB RB, INS - | and RC, RC, #255 - ||} - |.endmacro - | - |.macro ins_arithload, reg1, reg2 - | // RA = dst, RB = src1, RC = src2 | num_const - ||switch (vk) { - ||case 0: - | ldr reg1, [BASE, RB, lsl #3] - | ldr reg2, [KBASE, RC, lsl #3] - || break; - ||case 1: - | ldr reg1, [KBASE, RC, lsl #3] - | ldr reg2, [BASE, RB, lsl #3] - || break; - ||default: - | ldr reg1, [BASE, RB, lsl #3] - | ldr reg2, [BASE, RC, lsl #3] - || break; - ||} - |.endmacro - | - |.macro ins_arithfallback, ins - ||switch (vk) { - ||case 0: - | ins ->vmeta_arith_vn - || break; - ||case 1: - | ins ->vmeta_arith_nv - || break; - ||default: - | ins ->vmeta_arith_vv - || break; - ||} - |.endmacro - | - |.macro ins_arithmod, res, reg1, reg2 - | fdiv d2, reg1, reg2 - | frintm d2, d2 - | fmsub res, d2, reg2, reg1 - |.endmacro - | - |.macro ins_arithdn, intins, fpins - | ins_arithhead - | ins_arithload CARG1, CARG2 - | ins_arithcheck_int >5 - |.if "intins" == "smull" - | smull CARG1, CARG1w, CARG2w - | cmp CARG1, CARG1, sxtw - | mov CARG1w, CARG1w - | ins_arithfallback bne - |.elif "intins" == "ins_arithmodi" - | ins_arithfallback ins_arithcheck_nzdiv - | bl ->vm_modi - |.else - | intins CARG1w, CARG1w, CARG2w - | ins_arithfallback bvs - |.endif - | add CARG1, CARG1, TISNUM - | str CARG1, [BASE, RA, lsl #3] - |4: - | ins_next - | - |5: // FP variant. - | ins_arithload FARG1, FARG2 - | ins_arithfallback ins_arithcheck_num - | fpins FARG1, FARG1, FARG2 - | str FARG1, [BASE, RA, lsl #3] - | b <4 - |.endmacro - | - |.macro ins_arithfp, fpins - | ins_arithhead - | ins_arithload CARG1, CARG2 - | ins_arithload FARG1, FARG2 - | ins_arithfallback ins_arithcheck_num - |.if "fpins" == "fpow" - | bl extern pow - |.else - | fpins FARG1, FARG1, FARG2 - |.endif - | str FARG1, [BASE, RA, lsl #3] - | ins_next - |.endmacro - - case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: - | ins_arithdn adds, fadd - break; - case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: - | ins_arithdn subs, fsub - break; - case BC_MULVN: case BC_MULNV: case BC_MULVV: - | ins_arithdn smull, fmul - break; - case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: - | ins_arithfp fdiv - break; - case BC_MODVN: case BC_MODNV: case BC_MODVV: - | ins_arithdn ins_arithmodi, ins_arithmod - break; - case BC_POW: - | // NYI: (partial) integer arithmetic. - | ins_arithfp fpow - break; - - case BC_CAT: - | decode_RB RB, INS - | and RC, RC, #255 - | // RA = dst, RB = src_start, RC = src_end - | str BASE, L->base - | sub CARG3, RC, RB - | add CARG2, BASE, RC, lsl #3 - |->BC_CAT_Z: - | // RA = dst, CARG2 = top-1, CARG3 = left - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left) - | // Returns NULL (finished) or TValue * (metamethod). - | ldrb RBw, [PC, #-1] - | ldr BASE, L->base - | cbnz CRET1, ->vmeta_binop - | ldr TMP0, [BASE, RB, lsl #3] - | str TMP0, [BASE, RA, lsl #3] // Copy result to RA. - | ins_next - break; - - /* -- Constant ops ------------------------------------------------------ */ - - case BC_KSTR: - | // RA = dst, RC = str_const (~) - | mvn RC, RC - | ldr TMP0, [KBASE, RC, lsl #3] - | movn TMP1, #~LJ_TSTR - | add TMP0, TMP0, TMP1, lsl #47 - | str TMP0, [BASE, RA, lsl #3] - | ins_next - break; - case BC_KCDATA: - |.if FFI - | // RA = dst, RC = cdata_const (~) - | mvn RC, RC - | ldr TMP0, [KBASE, RC, lsl #3] - | movn TMP1, #~LJ_TCDATA - | add TMP0, TMP0, TMP1, lsl #47 - | str TMP0, [BASE, RA, lsl #3] - | ins_next - |.endif - break; - case BC_KSHORT: - | // RA = dst, RC = int16_literal - | sxth RCw, RCw - | add TMP0, RC, TISNUM - | str TMP0, [BASE, RA, lsl #3] - | ins_next - break; - case BC_KNUM: - | // RA = dst, RC = num_const - | ldr TMP0, [KBASE, RC, lsl #3] - | str TMP0, [BASE, RA, lsl #3] - | ins_next - break; - case BC_KPRI: - | // RA = dst, RC = primitive_type (~) - | mvn TMP0, RC, lsl #47 - | str TMP0, [BASE, RA, lsl #3] - | ins_next - break; - case BC_KNIL: - | // RA = base, RC = end - | add RA, BASE, RA, lsl #3 - | add RC, BASE, RC, lsl #3 - | str TISNIL, [RA], #8 - |1: - | cmp RA, RC - | str TISNIL, [RA], #8 - | blt <1 - | ins_next_ - break; - - /* -- Upvalue and function ops ------------------------------------------ */ - - case BC_UGET: - | // RA = dst, RC = uvnum - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | add RC, RC, #offsetof(GCfuncL, uvptr)/8 - | and LFUNC:CARG2, CARG2, #LJ_GCVMASK - | ldr UPVAL:CARG2, [LFUNC:CARG2, RC, lsl #3] - | ldr CARG2, UPVAL:CARG2->v - | ldr TMP0, [CARG2] - | str TMP0, [BASE, RA, lsl #3] - | ins_next - break; - case BC_USETV: - | // RA = uvnum, RC = src - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | add RA, RA, #offsetof(GCfuncL, uvptr)/8 - | and LFUNC:CARG2, CARG2, #LJ_GCVMASK - | ldr UPVAL:CARG1, [LFUNC:CARG2, RA, lsl #3] - | ldr CARG3, [BASE, RC, lsl #3] - | ldr CARG2, UPVAL:CARG1->v - | ldrb TMP2w, UPVAL:CARG1->marked - | ldrb TMP0w, UPVAL:CARG1->closed - | asr ITYPE, CARG3, #47 - | str CARG3, [CARG2] - | add ITYPE, ITYPE, #-LJ_TISGCV - | tst TMP2w, #LJ_GC_BLACK // isblack(uv) - | ccmp TMP0w, #0, #4, ne // && uv->closed - | ccmn ITYPE, #-(LJ_TNUMX - LJ_TISGCV), #0, ne // && tvisgcv(v) - | bhi >2 - |1: - | ins_next - | - |2: // Check if new value is white. - | and GCOBJ:CARG3, CARG3, #LJ_GCVMASK - | ldrb TMP1w, GCOBJ:CARG3->gch.marked - | tst TMP1w, #LJ_GC_WHITES // iswhite(str) - | beq <1 - | // Crossed a write barrier. Move the barrier forward. - | mov CARG1, GL - | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) - | b <1 - break; - case BC_USETS: - | // RA = uvnum, RC = str_const (~) - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | add RA, RA, #offsetof(GCfuncL, uvptr)/8 - | mvn RC, RC - | and LFUNC:CARG2, CARG2, #LJ_GCVMASK - | ldr UPVAL:CARG1, [LFUNC:CARG2, RA, lsl #3] - | ldr STR:CARG3, [KBASE, RC, lsl #3] - | movn TMP0, #~LJ_TSTR - | ldr CARG2, UPVAL:CARG1->v - | ldrb TMP2w, UPVAL:CARG1->marked - | add TMP0, STR:CARG3, TMP0, lsl #47 - | ldrb TMP1w, STR:CARG3->marked - | str TMP0, [CARG2] - | tbnz TMP2w, #2, >2 // isblack(uv) - |1: - | ins_next - | - |2: // Check if string is white and ensure upvalue is closed. - | ldrb TMP0w, UPVAL:CARG1->closed - | tst TMP1w, #LJ_GC_WHITES // iswhite(str) - | ccmp TMP0w, #0, #0, ne - | beq <1 - | // Crossed a write barrier. Move the barrier forward. - | mov CARG1, GL - | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) - | b <1 - break; - case BC_USETN: - | // RA = uvnum, RC = num_const - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | add RA, RA, #offsetof(GCfuncL, uvptr)/8 - | and LFUNC:CARG2, CARG2, #LJ_GCVMASK - | ldr UPVAL:CARG2, [LFUNC:CARG2, RA, lsl #3] - | ldr TMP0, [KBASE, RC, lsl #3] - | ldr CARG2, UPVAL:CARG2->v - | str TMP0, [CARG2] - | ins_next - break; - case BC_USETP: - | // RA = uvnum, RC = primitive_type (~) - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | add RA, RA, #offsetof(GCfuncL, uvptr)/8 - | and LFUNC:CARG2, CARG2, #LJ_GCVMASK - | ldr UPVAL:CARG2, [LFUNC:CARG2, RA, lsl #3] - | mvn TMP0, RC, lsl #47 - | ldr CARG2, UPVAL:CARG2->v - | str TMP0, [CARG2] - | ins_next - break; - - case BC_UCLO: - | // RA = level, RC = target - | ldr CARG3, L->openupval - | add RC, PC, RC, lsl #2 - | str BASE, L->base - | sub PC, RC, #0x20000 - | cbz CARG3, >1 - | mov CARG1, L - | add CARG2, BASE, RA, lsl #3 - | bl extern lj_func_closeuv // (lua_State *L, TValue *level) - | ldr BASE, L->base - |1: - | ins_next - break; - - case BC_FNEW: - | // RA = dst, RC = proto_const (~) (holding function prototype) - | mvn RC, RC - | str BASE, L->base - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] - | str PC, SAVE_PC - | ldr CARG2, [KBASE, RC, lsl #3] - | mov CARG1, L - | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - | // (lua_State *L, GCproto *pt, GCfuncL *parent) - | bl extern lj_func_newL_gc - | // Returns GCfuncL *. - | ldr BASE, L->base - | movn TMP0, #~LJ_TFUNC - | add CRET1, CRET1, TMP0, lsl #47 - | str CRET1, [BASE, RA, lsl #3] - | ins_next - break; - - /* -- Table ops --------------------------------------------------------- */ - - case BC_TNEW: - case BC_TDUP: - | // RA = dst, RC = (hbits|asize) | tab_const (~) - | ldp CARG3, CARG4, GL->gc.total // Assumes threshold follows total. - | str BASE, L->base - | str PC, SAVE_PC - | mov CARG1, L - | cmp CARG3, CARG4 - | bhs >5 - |1: - if (op == BC_TNEW) { - | and CARG2, RC, #0x7ff - | lsr CARG3, RC, #11 - | cmp CARG2, #0x7ff - | mov TMP0, #0x801 - | csel CARG2, CARG2, TMP0, ne - | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) - | // Returns GCtab *. - } else { - | mvn RC, RC - | ldr CARG2, [KBASE, RC, lsl #3] - | bl extern lj_tab_dup // (lua_State *L, Table *kt) - | // Returns GCtab *. - } - | ldr BASE, L->base - | movk CRET1, #(LJ_TTAB>>1)&0xffff, lsl #48 - | str CRET1, [BASE, RA, lsl #3] - | ins_next - | - |5: - | bl extern lj_gc_step_fixtop // (lua_State *L) - | mov CARG1, L - | b <1 - break; - - case BC_GGET: - | // RA = dst, RC = str_const (~) - case BC_GSET: - | // RA = dst, RC = str_const (~) - | ldr LFUNC:CARG1, [BASE, FRAME_FUNC] - | mvn RC, RC - | and LFUNC:CARG1, CARG1, #LJ_GCVMASK - | ldr TAB:CARG2, LFUNC:CARG1->env - | ldr STR:RC, [KBASE, RC, lsl #3] - if (op == BC_GGET) { - | b ->BC_TGETS_Z - } else { - | b ->BC_TSETS_Z - } - break; - - case BC_TGETV: - | decode_RB RB, INS - | and RC, RC, #255 - | // RA = dst, RB = table, RC = key - | ldr CARG2, [BASE, RB, lsl #3] - | ldr TMP1, [BASE, RC, lsl #3] - | checktab CARG2, ->vmeta_tgetv - | checkint TMP1, >9 // Integer key? - | ldr CARG3, TAB:CARG2->array - | ldr CARG1w, TAB:CARG2->asize - | add CARG3, CARG3, TMP1, uxtw #3 - | cmp TMP1w, CARG1w // In array part? - | bhs ->vmeta_tgetv - | ldr TMP0, [CARG3] - | cmp TMP0, TISNIL - | beq >5 - |1: - | str TMP0, [BASE, RA, lsl #3] - | ins_next - | - |5: // Check for __index if table value is nil. - | ldr TAB:CARG1, TAB:CARG2->metatable - | cbz TAB:CARG1, <1 // No metatable: done. - | ldrb TMP1w, TAB:CARG1->nomm - | tbnz TMP1w, #MM_index, <1 // 'no __index' flag set: done. - | b ->vmeta_tgetv - | - |9: - | asr ITYPE, TMP1, #47 - | cmn ITYPE, #-LJ_TSTR // String key? - | bne ->vmeta_tgetv - | and STR:RC, TMP1, #LJ_GCVMASK - | b ->BC_TGETS_Z - break; - case BC_TGETS: - | decode_RB RB, INS - | and RC, RC, #255 - | // RA = dst, RB = table, RC = str_const (~) - | ldr CARG2, [BASE, RB, lsl #3] - | mvn RC, RC - | ldr STR:RC, [KBASE, RC, lsl #3] - | checktab CARG2, ->vmeta_tgets1 - |->BC_TGETS_Z: - | // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = dst - | ldr TMP1w, TAB:CARG2->hmask - | ldr TMP2w, STR:RC->hash - | ldr NODE:CARG3, TAB:CARG2->node - | and TMP1w, TMP1w, TMP2w // idx = str->hash & tab->hmask - | add TMP1, TMP1, TMP1, lsl #1 - | movn CARG4, #~LJ_TSTR - | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8 - | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for. - |1: - | ldp TMP0, CARG1, NODE:CARG3->val - | ldr NODE:CARG3, NODE:CARG3->next - | cmp CARG1, CARG4 - | bne >4 - | cmp TMP0, TISNIL - | beq >5 - |3: - | str TMP0, [BASE, RA, lsl #3] - | ins_next - | - |4: // Follow hash chain. - | cbnz NODE:CARG3, <1 - | // End of hash chain: key not found, nil result. - | mov TMP0, TISNIL - | - |5: // Check for __index if table value is nil. - | ldr TAB:CARG1, TAB:CARG2->metatable - | cbz TAB:CARG1, <3 // No metatable: done. - | ldrb TMP1w, TAB:CARG1->nomm - | tbnz TMP1w, #MM_index, <3 // 'no __index' flag set: done. - | b ->vmeta_tgets - break; - case BC_TGETB: - | decode_RB RB, INS - | and RC, RC, #255 - | // RA = dst, RB = table, RC = index - | ldr CARG2, [BASE, RB, lsl #3] - | checktab CARG2, ->vmeta_tgetb - | ldr CARG3, TAB:CARG2->array - | ldr CARG1w, TAB:CARG2->asize - | add CARG3, CARG3, RC, lsl #3 - | cmp RCw, CARG1w // In array part? - | bhs ->vmeta_tgetb - | ldr TMP0, [CARG3] - | cmp TMP0, TISNIL - | beq >5 - |1: - | str TMP0, [BASE, RA, lsl #3] - | ins_next - | - |5: // Check for __index if table value is nil. - | ldr TAB:CARG1, TAB:CARG2->metatable - | cbz TAB:CARG1, <1 // No metatable: done. - | ldrb TMP1w, TAB:CARG1->nomm - | tbnz TMP1w, #MM_index, <1 // 'no __index' flag set: done. - | b ->vmeta_tgetb - break; - case BC_TGETR: - | decode_RB RB, INS - | and RC, RC, #255 - | // RA = dst, RB = table, RC = key - | ldr CARG1, [BASE, RB, lsl #3] - | ldr TMP1, [BASE, RC, lsl #3] - | and TAB:CARG1, CARG1, #LJ_GCVMASK - | ldr CARG3, TAB:CARG1->array - | ldr TMP2w, TAB:CARG1->asize - | add CARG3, CARG3, TMP1w, uxtw #3 - | cmp TMP1w, TMP2w // In array part? - | bhs ->vmeta_tgetr - | ldr TMP0, [CARG3] - |->BC_TGETR_Z: - | str TMP0, [BASE, RA, lsl #3] - | ins_next - break; - - case BC_TSETV: - | decode_RB RB, INS - | and RC, RC, #255 - | // RA = src, RB = table, RC = key - | ldr CARG2, [BASE, RB, lsl #3] - | ldr TMP1, [BASE, RC, lsl #3] - | checktab CARG2, ->vmeta_tsetv - | checkint TMP1, >9 // Integer key? - | ldr CARG3, TAB:CARG2->array - | ldr CARG1w, TAB:CARG2->asize - | add CARG3, CARG3, TMP1, uxtw #3 - | cmp TMP1w, CARG1w // In array part? - | bhs ->vmeta_tsetv - | ldr TMP1, [CARG3] - | ldr TMP0, [BASE, RA, lsl #3] - | ldrb TMP2w, TAB:CARG2->marked - | cmp TMP1, TISNIL // Previous value is nil? - | beq >5 - |1: - | str TMP0, [CARG3] - | tbnz TMP2w, #2, >7 // isblack(table) - |2: - | ins_next - | - |5: // Check for __newindex if previous value is nil. - | ldr TAB:CARG1, TAB:CARG2->metatable - | cbz TAB:CARG1, <1 // No metatable: done. - | ldrb TMP1w, TAB:CARG1->nomm - | tbnz TMP1w, #MM_newindex, <1 // 'no __newindex' flag set: done. - | b ->vmeta_tsetv - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP2w, TMP1 - | b <2 - | - |9: - | asr ITYPE, TMP1, #47 - | cmn ITYPE, #-LJ_TSTR // String key? - | bne ->vmeta_tsetv - | and STR:RC, TMP1, #LJ_GCVMASK - | b ->BC_TSETS_Z - break; - case BC_TSETS: - | decode_RB RB, INS - | and RC, RC, #255 - | // RA = dst, RB = table, RC = str_const (~) - | ldr CARG2, [BASE, RB, lsl #3] - | mvn RC, RC - | ldr STR:RC, [KBASE, RC, lsl #3] - | checktab CARG2, ->vmeta_tsets1 - |->BC_TSETS_Z: - | // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = src - | ldr TMP1w, TAB:CARG2->hmask - | ldr TMP2w, STR:RC->hash - | ldr NODE:CARG3, TAB:CARG2->node - | and TMP1w, TMP1w, TMP2w // idx = str->hash & tab->hmask - | add TMP1, TMP1, TMP1, lsl #1 - | movn CARG4, #~LJ_TSTR - | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8 - | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for. - | strb wzr, TAB:CARG2->nomm // Clear metamethod cache. - |1: - | ldp TMP1, CARG1, NODE:CARG3->val - | ldr NODE:TMP3, NODE:CARG3->next - | ldrb TMP2w, TAB:CARG2->marked - | cmp CARG1, CARG4 - | bne >5 - | ldr TMP0, [BASE, RA, lsl #3] - | cmp TMP1, TISNIL // Previous value is nil? - | beq >4 - |2: - | str TMP0, NODE:CARG3->val - | tbnz TMP2w, #2, >7 // isblack(table) - |3: - | ins_next - | - |4: // Check for __newindex if previous value is nil. - | ldr TAB:CARG1, TAB:CARG2->metatable - | cbz TAB:CARG1, <2 // No metatable: done. - | ldrb TMP1w, TAB:CARG1->nomm - | tbnz TMP1w, #MM_newindex, <2 // 'no __newindex' flag set: done. - | b ->vmeta_tsets - | - |5: // Follow hash chain. - | mov NODE:CARG3, NODE:TMP3 - | cbnz NODE:TMP3, <1 - | // End of hash chain: key not found, add a new one. - | - | // But check for __newindex first. - | ldr TAB:CARG1, TAB:CARG2->metatable - | cbz TAB:CARG1, >6 // No metatable: continue. - | ldrb TMP1w, TAB:CARG1->nomm - | // 'no __newindex' flag NOT set: check. - | tbz TMP1w, #MM_newindex, ->vmeta_tsets - |6: - | movn TMP1, #~LJ_TSTR - | str PC, SAVE_PC - | add TMP0, STR:RC, TMP1, lsl #47 - | str BASE, L->base - | mov CARG1, L - | str TMP0, TMPD - | add CARG3, sp, TMPDofs - | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) - | // Returns TValue *. - | ldr BASE, L->base - | ldr TMP0, [BASE, RA, lsl #3] - | str TMP0, [CRET1] - | b <3 // No 2nd write barrier needed. - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP2w, TMP1 - | b <3 - break; - case BC_TSETB: - | decode_RB RB, INS - | and RC, RC, #255 - | // RA = src, RB = table, RC = index - | ldr CARG2, [BASE, RB, lsl #3] - | checktab CARG2, ->vmeta_tsetb - | ldr CARG3, TAB:CARG2->array - | ldr CARG1w, TAB:CARG2->asize - | add CARG3, CARG3, RC, lsl #3 - | cmp RCw, CARG1w // In array part? - | bhs ->vmeta_tsetb - | ldr TMP1, [CARG3] - | ldr TMP0, [BASE, RA, lsl #3] - | ldrb TMP2w, TAB:CARG2->marked - | cmp TMP1, TISNIL // Previous value is nil? - | beq >5 - |1: - | str TMP0, [CARG3] - | tbnz TMP2w, #2, >7 // isblack(table) - |2: - | ins_next - | - |5: // Check for __newindex if previous value is nil. - | ldr TAB:CARG1, TAB:CARG2->metatable - | cbz TAB:CARG1, <1 // No metatable: done. - | ldrb TMP1w, TAB:CARG1->nomm - | tbnz TMP1w, #MM_newindex, <1 // 'no __newindex' flag set: done. - | b ->vmeta_tsetb - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP2w, TMP1 - | b <2 - break; - case BC_TSETR: - | decode_RB RB, INS - | and RC, RC, #255 - | // RA = src, RB = table, RC = key - | ldr CARG2, [BASE, RB, lsl #3] - | ldr TMP1, [BASE, RC, lsl #3] - | and TAB:CARG2, CARG2, #LJ_GCVMASK - | ldr CARG1, TAB:CARG2->array - | ldrb TMP2w, TAB:CARG2->marked - | ldr CARG4w, TAB:CARG2->asize - | add CARG1, CARG1, TMP1, uxtw #3 - | tbnz TMP2w, #2, >7 // isblack(table) - |2: - | cmp TMP1w, CARG4w // In array part? - | bhs ->vmeta_tsetr - |->BC_TSETR_Z: - | ldr TMP0, [BASE, RA, lsl #3] - | str TMP0, [CARG1] - | ins_next - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP2w, TMP0 - | b <2 - break; - - case BC_TSETM: - | // RA = base (table at base-1), RC = num_const (start index) - | add RA, BASE, RA, lsl #3 - |1: - | ldr RBw, SAVE_MULTRES - | ldr TAB:CARG2, [RA, #-8] // Guaranteed to be a table. - | ldr TMP1, [KBASE, RC, lsl #3] // Integer constant is in lo-word. - | sub RB, RB, #8 - | cbz RB, >4 // Nothing to copy? - | and TAB:CARG2, CARG2, #LJ_GCVMASK - | ldr CARG1w, TAB:CARG2->asize - | add CARG3w, TMP1w, RBw, lsr #3 - | ldr CARG4, TAB:CARG2->array - | cmp CARG3, CARG1 - | add RB, RA, RB - | bhi >5 - | add TMP1, CARG4, TMP1w, uxtw #3 - | ldrb TMP2w, TAB:CARG2->marked - |3: // Copy result slots to table. - | ldr TMP0, [RA], #8 - | str TMP0, [TMP1], #8 - | cmp RA, RB - | blo <3 - | tbnz TMP2w, #2, >7 // isblack(table) - |4: - | ins_next - | - |5: // Need to resize array part. - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) - | // Must not reallocate the stack. - | b <1 - | - |7: // Possible table write barrier for any value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP2w, TMP1 - | b <4 - break; - - /* -- Calls and vararg handling ----------------------------------------- */ - - case BC_CALLM: - | // RA = base, (RB = nresults+1,) RC = extra_nargs - | ldr TMP0w, SAVE_MULTRES - | decode_RC8RD NARGS8:RC, RC - | add NARGS8:RC, NARGS8:RC, TMP0 - | b ->BC_CALL_Z - break; - case BC_CALL: - | decode_RC8RD NARGS8:RC, RC - | // RA = base, (RB = nresults+1,) RC = (nargs+1)*8 - |->BC_CALL_Z: - | mov RB, BASE // Save old BASE for vmeta_call. - | add BASE, BASE, RA, lsl #3 - | ldr CARG3, [BASE] - | sub NARGS8:RC, NARGS8:RC, #8 - | add BASE, BASE, #16 - | checkfunc CARG3, ->vmeta_call - | ins_call - break; - - case BC_CALLMT: - | // RA = base, (RB = 0,) RC = extra_nargs - | ldr TMP0w, SAVE_MULTRES - | add NARGS8:RC, TMP0, RC, lsl #3 - | b ->BC_CALLT1_Z - break; - case BC_CALLT: - | lsl NARGS8:RC, RC, #3 - | // RA = base, (RB = 0,) RC = (nargs+1)*8 - |->BC_CALLT1_Z: - | add RA, BASE, RA, lsl #3 - | ldr TMP1, [RA] - | sub NARGS8:RC, NARGS8:RC, #8 - | add RA, RA, #16 - | checktp CARG3, TMP1, LJ_TFUNC, ->vmeta_callt - | ldr PC, [BASE, FRAME_PC] - |->BC_CALLT2_Z: - | mov RB, #0 - | ldrb TMP2w, LFUNC:CARG3->ffid - | tst PC, #FRAME_TYPE - | bne >7 - |1: - | str TMP1, [BASE, FRAME_FUNC] // Copy function down, but keep PC. - | cbz NARGS8:RC, >3 - |2: - | ldr TMP0, [RA, RB] - | add TMP1, RB, #8 - | cmp TMP1, NARGS8:RC - | str TMP0, [BASE, RB] - | mov RB, TMP1 - | bne <2 - |3: - | cmp TMP2, #1 // (> FF_C) Calling a fast function? - | bhi >5 - |4: - | ins_callt - | - |5: // Tailcall to a fast function with a Lua frame below. - | ldrb RAw, [PC, #-3] - | sub CARG1, BASE, RA, lsl #3 - | ldr LFUNC:CARG1, [CARG1, #-32] - | and LFUNC:CARG1, CARG1, #LJ_GCVMASK - | ldr CARG1, LFUNC:CARG1->pc - | ldr KBASE, [CARG1, #PC2PROTO(k)] - | b <4 - | - |7: // Tailcall from a vararg function. - | eor PC, PC, #FRAME_VARG - | tst PC, #FRAME_TYPEP // Vararg frame below? - | csel TMP2, RB, TMP2, ne // Clear ffid if no Lua function below. - | bne <1 - | sub BASE, BASE, PC - | ldr PC, [BASE, FRAME_PC] - | tst PC, #FRAME_TYPE - | csel TMP2, RB, TMP2, ne // Clear ffid if no Lua function below. - | b <1 - break; - - case BC_ITERC: - | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) - | add RA, BASE, RA, lsl #3 - | ldr CARG3, [RA, #-24] - | mov RB, BASE // Save old BASE for vmeta_call. - | ldp CARG1, CARG2, [RA, #-16] - | add BASE, RA, #16 - | mov NARGS8:RC, #16 // Iterators get 2 arguments. - | str CARG3, [RA] // Copy callable. - | stp CARG1, CARG2, [RA, #16] // Copy state and control var. - | checkfunc CARG3, ->vmeta_call - | ins_call - break; - - case BC_ITERN: - | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) - |.if JIT - | // NYI: add hotloop, record BC_ITERN. - |.endif - | add RA, BASE, RA, lsl #3 - | ldr TAB:RB, [RA, #-16] - | ldrh TMP3w, [PC, #2] - | ldr CARG1w, [RA, #-8] // Get index from control var. - | add PC, PC, #4 - | add TMP3, PC, TMP3, lsl #2 - | and TAB:RB, RB, #LJ_GCVMASK - | sub TMP3, TMP3, #0x20000 - | ldr TMP1w, TAB:RB->asize - | ldr CARG2, TAB:RB->array - |1: // Traverse array part. - | subs RC, CARG1, TMP1 - | add CARG3, CARG2, CARG1, lsl #3 - | bhs >5 // Index points after array part? - | ldr TMP0, [CARG3] - | cmp TMP0, TISNIL - | cinc CARG1, CARG1, eq // Skip holes in array part. - | beq <1 - | add CARG1, CARG1, TISNUM - | stp CARG1, TMP0, [RA] - | add CARG1, CARG1, #1 - |3: - | str CARG1w, [RA, #-8] // Update control var. - | mov PC, TMP3 - |4: - | ins_next - | - |5: // Traverse hash part. - | ldr TMP2w, TAB:RB->hmask - | ldr NODE:RB, TAB:RB->node - |6: - | add CARG1, RC, RC, lsl #1 - | cmp RC, TMP2 // End of iteration? Branch to ITERN+1. - | add NODE:CARG3, NODE:RB, CARG1, lsl #3 // node = tab->node + idx*3*8 - | bhi <4 - | ldp TMP0, CARG1, NODE:CARG3->val - | cmp TMP0, TISNIL - | add RC, RC, #1 - | beq <6 // Skip holes in hash part. - | stp CARG1, TMP0, [RA] - | add CARG1, RC, TMP1 - | b <3 - break; - - case BC_ISNEXT: - | // RA = base, RC = target (points to ITERN) - | add RA, BASE, RA, lsl #3 - | ldr CFUNC:CARG1, [RA, #-24] - | add RC, PC, RC, lsl #2 - | ldp TAB:CARG3, CARG4, [RA, #-16] - | sub RC, RC, #0x20000 - | checkfunc CFUNC:CARG1, >5 - | asr TMP0, TAB:CARG3, #47 - | ldrb TMP1w, CFUNC:CARG1->ffid - | cmn TMP0, #-LJ_TTAB - | ccmp CARG4, TISNIL, #0, eq - | ccmp TMP1w, #FF_next_N, #0, eq - | bne >5 - | mov TMP0w, #0xfffe7fff - | lsl TMP0, TMP0, #32 - | str TMP0, [RA, #-8] // Initialize control var. - |1: - | mov PC, RC - | ins_next - | - |5: // Despecialize bytecode if any of the checks fail. - | mov TMP0, #BC_JMP - | mov TMP1, #BC_ITERC - | strb TMP0w, [PC, #-4] - | strb TMP1w, [RC] - | b <1 - break; - - case BC_VARG: - | decode_RB RB, INS - | and RC, RC, #255 - | // RA = base, RB = (nresults+1), RC = numparams - | ldr TMP1, [BASE, FRAME_PC] - | add RC, BASE, RC, lsl #3 - | add RA, BASE, RA, lsl #3 - | add RC, RC, #FRAME_VARG - | add TMP2, RA, RB, lsl #3 - | sub RC, RC, TMP1 // RC = vbase - | // Note: RC may now be even _above_ BASE if nargs was < numparams. - | sub TMP3, BASE, #16 // TMP3 = vtop - | cbz RB, >5 - | sub TMP2, TMP2, #16 - |1: // Copy vararg slots to destination slots. - | cmp RC, TMP3 - | ldr TMP0, [RC], #8 - | csel TMP0, TMP0, TISNIL, lo - | cmp RA, TMP2 - | str TMP0, [RA], #8 - | blo <1 - |2: - | ins_next - | - |5: // Copy all varargs. - | ldr TMP0, L->maxstack - | subs TMP2, TMP3, RC - | csel RB, xzr, TMP2, le // MULTRES = (max(vtop-vbase,0)+1)*8 - | add RB, RB, #8 - | add TMP1, RA, TMP2 - | str RBw, SAVE_MULTRES - | ble <2 // Nothing to copy. - | cmp TMP1, TMP0 - | bhi >7 - |6: - | ldr TMP0, [RC], #8 - | str TMP0, [RA], #8 - | cmp RC, TMP3 - | blo <6 - | b <2 - | - |7: // Grow stack for varargs. - | lsr CARG2, TMP2, #3 - | stp BASE, RA, L->base - | mov CARG1, L - | sub RC, RC, BASE // Need delta, because BASE may change. - | str PC, SAVE_PC - | bl extern lj_state_growstack // (lua_State *L, int n) - | ldp BASE, RA, L->base - | add RC, BASE, RC - | sub TMP3, BASE, #16 - | b <6 - break; - - /* -- Returns ----------------------------------------------------------- */ - - case BC_RETM: - | // RA = results, RC = extra results - | ldr TMP0w, SAVE_MULTRES - | ldr PC, [BASE, FRAME_PC] - | add RA, BASE, RA, lsl #3 - | add RC, TMP0, RC, lsl #3 - | b ->BC_RETM_Z - break; - - case BC_RET: - | // RA = results, RC = nresults+1 - | ldr PC, [BASE, FRAME_PC] - | lsl RC, RC, #3 - | add RA, BASE, RA, lsl #3 - |->BC_RETM_Z: - | str RCw, SAVE_MULTRES - |1: - | ands CARG1, PC, #FRAME_TYPE - | eor CARG2, PC, #FRAME_VARG - | bne ->BC_RETV2_Z - | - |->BC_RET_Z: - | // BASE = base, RA = resultptr, RC = (nresults+1)*8, PC = return - | ldr INSw, [PC, #-4] - | subs TMP1, RC, #8 - | sub CARG3, BASE, #16 - | beq >3 - |2: - | ldr TMP0, [RA], #8 - | add BASE, BASE, #8 - | sub TMP1, TMP1, #8 - | str TMP0, [BASE, #-24] - | cbnz TMP1, <2 - |3: - | decode_RA RA, INS - | sub CARG4, CARG3, RA, lsl #3 - | decode_RB RB, INS - | ldr LFUNC:CARG1, [CARG4, FRAME_FUNC] - |5: - | cmp RC, RB, lsl #3 // More results expected? - | blo >6 - | and LFUNC:CARG1, CARG1, #LJ_GCVMASK - | mov BASE, CARG4 - | ldr CARG2, LFUNC:CARG1->pc - | ldr KBASE, [CARG2, #PC2PROTO(k)] - | ins_next - | - |6: // Fill up results with nil. - | add BASE, BASE, #8 - | add RC, RC, #8 - | str TISNIL, [BASE, #-24] - | b <5 - | - |->BC_RETV1_Z: // Non-standard return case. - | add RA, BASE, RA, lsl #3 - |->BC_RETV2_Z: - | tst CARG2, #FRAME_TYPEP - | bne ->vm_return - | // Return from vararg function: relocate BASE down. - | sub BASE, BASE, CARG2 - | ldr PC, [BASE, FRAME_PC] - | b <1 - break; - - case BC_RET0: case BC_RET1: - | // RA = results, RC = nresults+1 - | ldr PC, [BASE, FRAME_PC] - | lsl RC, RC, #3 - | str RCw, SAVE_MULTRES - | ands CARG1, PC, #FRAME_TYPE - | eor CARG2, PC, #FRAME_VARG - | bne ->BC_RETV1_Z - | ldr INSw, [PC, #-4] - if (op == BC_RET1) { - | ldr TMP0, [BASE, RA, lsl #3] - } - | sub CARG4, BASE, #16 - | decode_RA RA, INS - | sub BASE, CARG4, RA, lsl #3 - if (op == BC_RET1) { - | str TMP0, [CARG4], #8 - } - | decode_RB RB, INS - | ldr LFUNC:CARG1, [BASE, FRAME_FUNC] - |5: - | cmp RC, RB, lsl #3 - | blo >6 - | and LFUNC:CARG1, CARG1, #LJ_GCVMASK - | ldr CARG2, LFUNC:CARG1->pc - | ldr KBASE, [CARG2, #PC2PROTO(k)] - | ins_next - | - |6: // Fill up results with nil. - | add RC, RC, #8 - | str TISNIL, [CARG4], #8 - | b <5 - break; - - /* -- Loops and branches ------------------------------------------------ */ - - |.define FOR_IDX, [RA]; .define FOR_TIDX, [RA, #4] - |.define FOR_STOP, [RA, #8]; .define FOR_TSTOP, [RA, #12] - |.define FOR_STEP, [RA, #16]; .define FOR_TSTEP, [RA, #20] - |.define FOR_EXT, [RA, #24]; .define FOR_TEXT, [RA, #28] - - case BC_FORL: - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_IFORL follows. - break; - - case BC_JFORI: - case BC_JFORL: -#if !LJ_HASJIT - break; -#endif - case BC_FORI: - case BC_IFORL: - | // RA = base, RC = target (after end of loop or start of loop) - vk = (op == BC_IFORL || op == BC_JFORL); - | add RA, BASE, RA, lsl #3 - | ldp CARG1, CARG2, FOR_IDX // CARG1 = IDX, CARG2 = STOP - | ldr CARG3, FOR_STEP // CARG3 = STEP - if (op != BC_JFORL) { - | add RC, PC, RC, lsl #2 - | sub RC, RC, #0x20000 - } - | checkint CARG1, >5 - if (!vk) { - | checkint CARG2, ->vmeta_for - | checkint CARG3, ->vmeta_for - | tbnz CARG3w, #31, >4 - | cmp CARG1w, CARG2w - } else { - | adds CARG1w, CARG1w, CARG3w - | bvs >2 - | add TMP0, CARG1, TISNUM - | tbnz CARG3w, #31, >4 - | cmp CARG1w, CARG2w - } - |1: - if (op == BC_FORI) { - | csel PC, RC, PC, gt - } else if (op == BC_JFORI) { - | mov PC, RC - | ldrh RCw, [RC, #-2] - } else if (op == BC_IFORL) { - | csel PC, RC, PC, le - } - if (vk) { - | str TMP0, FOR_IDX - | str TMP0, FOR_EXT - } else { - | str CARG1, FOR_EXT - } - if (op == BC_JFORI || op == BC_JFORL) { - | ble =>BC_JLOOP - } - |2: - | ins_next - | - |4: // Invert check for negative step. - | cmp CARG2w, CARG1w - | b <1 - | - |5: // FP loop. - | ldp d0, d1, FOR_IDX - | blo ->vmeta_for - if (!vk) { - | checknum CARG2, ->vmeta_for - | checknum CARG3, ->vmeta_for - | str d0, FOR_EXT - } else { - | ldr d2, FOR_STEP - | fadd d0, d0, d2 - } - | tbnz CARG3, #63, >7 - | fcmp d0, d1 - |6: - if (vk) { - | str d0, FOR_IDX - | str d0, FOR_EXT - } - if (op == BC_FORI) { - | csel PC, RC, PC, hi - } else if (op == BC_JFORI) { - | ldrh RCw, [RC, #-2] - | bls =>BC_JLOOP - } else if (op == BC_IFORL) { - | csel PC, RC, PC, ls - } else { - | bls =>BC_JLOOP - } - | b <2 - | - |7: // Invert check for negative step. - | fcmp d1, d0 - | b <6 - break; - - case BC_ITERL: - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_IITERL follows. - break; - - case BC_JITERL: -#if !LJ_HASJIT - break; -#endif - case BC_IITERL: - | // RA = base, RC = target - | ldr CARG1, [BASE, RA, lsl #3] - | add TMP1, BASE, RA, lsl #3 - | cmp CARG1, TISNIL - | beq >1 // Stop if iterator returned nil. - if (op == BC_JITERL) { - | str CARG1, [TMP1, #-8] - | b =>BC_JLOOP - } else { - | add TMP0, PC, RC, lsl #2 // Otherwise save control var + branch. - | sub PC, TMP0, #0x20000 - | str CARG1, [TMP1, #-8] - } - |1: - | ins_next - break; - - case BC_LOOP: - | // RA = base, RC = target (loop extent) - | // Note: RA/RC is only used by trace recorder to determine scope/extent - | // This opcode does NOT jump, it's only purpose is to detect a hot loop. - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_ILOOP follows. - break; - - case BC_ILOOP: - | // RA = base, RC = target (loop extent) - | ins_next - break; - - case BC_JLOOP: - |.if JIT - | // RA = base (ignored), RC = traceno - | ldr CARG1, [GL, #GL_J(trace)] - | mov CARG2, #0 // Traces on ARM64 don't store the trace #, so use 0. - | ldr TRACE:RC, [CARG1, RC, lsl #3] - | st_vmstate CARG2 - | ldr RA, TRACE:RC->mcode - | str BASE, GL->jit_base - | str L, GL->tmpbuf.L - | sub sp, sp, #16 // See SPS_FIXED. Avoids sp adjust in every root trace. - | br RA - |.endif - break; - - case BC_JMP: - | // RA = base (only used by trace recorder), RC = target - | add RC, PC, RC, lsl #2 - | sub PC, RC, #0x20000 - | ins_next - break; - - /* -- Function headers -------------------------------------------------- */ - - case BC_FUNCF: - |.if JIT - | hotcall - |.endif - case BC_FUNCV: /* NYI: compiled vararg functions. */ - | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. - break; - - case BC_JFUNCF: -#if !LJ_HASJIT - break; -#endif - case BC_IFUNCF: - | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8 - | ldr CARG1, L->maxstack - | ldrb TMP1w, [PC, #-4+PC2PROTO(numparams)] - | ldr KBASE, [PC, #-4+PC2PROTO(k)] - | cmp RA, CARG1 - | bhi ->vm_growstack_l - |2: - | cmp NARGS8:RC, TMP1, lsl #3 // Check for missing parameters. - | blo >3 - if (op == BC_JFUNCF) { - | decode_RD RC, INS - | b =>BC_JLOOP - } else { - | ins_next - } - | - |3: // Clear missing parameters. - | str TISNIL, [BASE, NARGS8:RC] - | add NARGS8:RC, NARGS8:RC, #8 - | b <2 - break; - - case BC_JFUNCV: -#if !LJ_HASJIT - break; -#endif - | NYI // NYI: compiled vararg functions - break; /* NYI: compiled vararg functions. */ - - case BC_IFUNCV: - | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8 - | ldr CARG1, L->maxstack - | movn TMP0, #~LJ_TFUNC - | add TMP2, BASE, RC - | add LFUNC:CARG3, CARG3, TMP0, lsl #47 - | add RA, RA, RC - | add TMP0, RC, #16+FRAME_VARG - | str LFUNC:CARG3, [TMP2], #8 // Store (tagged) copy of LFUNC. - | ldr KBASE, [PC, #-4+PC2PROTO(k)] - | cmp RA, CARG1 - | str TMP0, [TMP2], #8 // Store delta + FRAME_VARG. - | bhs ->vm_growstack_l - | sub RC, TMP2, #16 - | ldrb TMP1w, [PC, #-4+PC2PROTO(numparams)] - | mov RA, BASE - | mov BASE, TMP2 - | cbz TMP1, >2 - |1: - | cmp RA, RC // Less args than parameters? - | bhs >3 - | ldr TMP0, [RA] - | sub TMP1, TMP1, #1 - | str TISNIL, [RA], #8 // Clear old fixarg slot (help the GC). - | str TMP0, [TMP2], #8 - | cbnz TMP1, <1 - |2: - | ins_next - | - |3: - | sub TMP1, TMP1, #1 - | str TISNIL, [TMP2], #8 - | cbz TMP1, <2 - | b <3 - break; - - case BC_FUNCC: - case BC_FUNCCW: - | // BASE = new base, RA = BASE+framesize*8, CARG3 = CFUNC, RC = nargs*8 - if (op == BC_FUNCC) { - | ldr CARG4, CFUNC:CARG3->f - } else { - | ldr CARG4, GL->wrapf - } - | add CARG2, RA, NARGS8:RC - | ldr CARG1, L->maxstack - | add RC, BASE, NARGS8:RC - | cmp CARG2, CARG1 - | stp BASE, RC, L->base - if (op == BC_FUNCCW) { - | ldr CARG2, CFUNC:CARG3->f - } - | mv_vmstate TMP0w, C - | mov CARG1, L - | bhi ->vm_growstack_c // Need to grow stack. - | st_vmstate TMP0w - | blr CARG4 // (lua_State *L [, lua_CFunction f]) - | // Returns nresults. - | ldp BASE, TMP1, L->base - | str L, GL->cur_L - | sbfiz RC, CRET1, #3, #32 - | st_vmstate ST_INTERP - | ldr PC, [BASE, FRAME_PC] - | sub RA, TMP1, RC // RA = L->top - nresults*8 - | b ->vm_returnc - break; - - /* ---------------------------------------------------------------------- */ - - default: - fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); - exit(2); - break; - } -} - -static int build_backend(BuildCtx *ctx) -{ - int op; - - dasm_growpc(Dst, BC__MAX); - - build_subroutines(ctx); - - |.code_op - for (op = 0; op < BC__MAX; op++) - build_ins(ctx, (BCOp)op, op); - - return BC__MAX; -} - -/* Emit pseudo frame-info for all assembler functions. */ -static void emit_asm_debug(BuildCtx *ctx) -{ - int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); - int i, cf = CFRAME_SIZE >> 3; - switch (ctx->mode) { - case BUILD_elfasm: - fprintf(ctx->fp, "\t.section .debug_frame,\"\",%%progbits\n"); - fprintf(ctx->fp, - ".Lframe0:\n" - "\t.long .LECIE0-.LSCIE0\n" - ".LSCIE0:\n" - "\t.long 0xffffffff\n" - "\t.byte 0x1\n" - "\t.string \"\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -8\n" - "\t.byte 30\n" /* Return address is in lr. */ - "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */ - "\t.align 3\n" - ".LECIE0:\n\n"); - fprintf(ctx->fp, - ".LSFDE0:\n" - "\t.long .LEFDE0-.LASFDE0\n" - ".LASFDE0:\n" - "\t.long .Lframe0\n" - "\t.quad .Lbegin\n" - "\t.quad %d\n" - "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ - "\t.byte 0x9d\n\t.uleb128 %d\n" /* offset fp */ - "\t.byte 0x9e\n\t.uleb128 %d\n", /* offset lr */ - fcofs, CFRAME_SIZE, cf, cf-1); - for (i = 19; i <= 28; i++) /* offset x19-x28 */ - fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17); - for (i = 8; i <= 15; i++) /* offset d8-d15 */ - fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n", - 64+i, cf-i-4); - fprintf(ctx->fp, - "\t.align 3\n" - ".LEFDE0:\n\n"); -#if LJ_HASFFI - fprintf(ctx->fp, - ".LSFDE1:\n" - "\t.long .LEFDE1-.LASFDE1\n" - ".LASFDE1:\n" - "\t.long .Lframe0\n" - "\t.quad lj_vm_ffi_call\n" - "\t.quad %d\n" - "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */ - "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */ - "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */ - "\t.byte 0x93\n\t.uleb128 2\n" /* offset x19 */ - "\t.align 3\n" - ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); -#endif - fprintf(ctx->fp, "\t.section .eh_frame,\"a\",%%progbits\n"); - fprintf(ctx->fp, - ".Lframe1:\n" - "\t.long .LECIE1-.LSCIE1\n" - ".LSCIE1:\n" - "\t.long 0\n" - "\t.byte 0x1\n" - "\t.string \"zPR\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -8\n" - "\t.byte 30\n" /* Return address is in lr. */ - "\t.uleb128 6\n" /* augmentation length */ - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.long lj_err_unwind_dwarf-.\n" - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */ - "\t.align 3\n" - ".LECIE1:\n\n"); - fprintf(ctx->fp, - ".LSFDE2:\n" - "\t.long .LEFDE2-.LASFDE2\n" - ".LASFDE2:\n" - "\t.long .LASFDE2-.Lframe1\n" - "\t.long .Lbegin-.\n" - "\t.long %d\n" - "\t.uleb128 0\n" /* augmentation length */ - "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ - "\t.byte 0x9d\n\t.uleb128 %d\n" /* offset fp */ - "\t.byte 0x9e\n\t.uleb128 %d\n", /* offset lr */ - fcofs, CFRAME_SIZE, cf, cf-1); - for (i = 19; i <= 28; i++) /* offset x19-x28 */ - fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17); - for (i = 8; i <= 15; i++) /* offset d8-d15 */ - fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n", - 64+i, cf-i-4); - fprintf(ctx->fp, - "\t.align 3\n" - ".LEFDE2:\n\n"); -#if LJ_HASFFI - fprintf(ctx->fp, - ".Lframe2:\n" - "\t.long .LECIE2-.LSCIE2\n" - ".LSCIE2:\n" - "\t.long 0\n" - "\t.byte 0x1\n" - "\t.string \"zR\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -8\n" - "\t.byte 30\n" /* Return address is in lr. */ - "\t.uleb128 1\n" /* augmentation length */ - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */ - "\t.align 3\n" - ".LECIE2:\n\n"); - fprintf(ctx->fp, - ".LSFDE3:\n" - "\t.long .LEFDE3-.LASFDE3\n" - ".LASFDE3:\n" - "\t.long .LASFDE3-.Lframe2\n" - "\t.long lj_vm_ffi_call-.\n" - "\t.long %d\n" - "\t.uleb128 0\n" /* augmentation length */ - "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */ - "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */ - "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */ - "\t.byte 0x93\n\t.uleb128 2\n" /* offset x19 */ - "\t.align 3\n" - ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); -#endif - break; - default: - break; - } -} - diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc deleted file mode 100644 index 1afd61187a..0000000000 --- a/src/vm_mips.dasc +++ /dev/null @@ -1,5264 +0,0 @@ -|// Low-level VM code for MIPS CPUs. -|// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -|// -|// MIPS soft-float support contributed by Djordje Kovacevic and -|// Stefan Pejic from RT-RK.com, sponsored by Cisco Systems, Inc. -| -|.arch mips -|.section code_op, code_sub -| -|.actionlist build_actionlist -|.globals GLOB_ -|.globalnames globnames -|.externnames extnames -| -|// Note: The ragged indentation of the instructions is intentional. -|// The starting columns indicate data dependencies. -| -|//----------------------------------------------------------------------- -| -|// Fixed register assignments for the interpreter. -|// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra -| -|.macro .FPU, a, b -|.if FPU -| a, b -|.endif -|.endmacro -| -|// The following must be C callee-save (but BASE is often refetched). -|.define BASE, r16 // Base of current Lua stack frame. -|.define KBASE, r17 // Constants of current Lua function. -|.define PC, r18 // Next PC. -|.define DISPATCH, r19 // Opcode dispatch table. -|.define LREG, r20 // Register holding lua_State (also in SAVE_L). -|.define MULTRES, r21 // Size of multi-result: (nresults+1)*8. -| -|.define JGL, r30 // On-trace: global_State + 32768. -| -|// Constants for type-comparisons, stores and conversions. C callee-save. -|.define TISNUM, r22 -|.define TISNIL, r30 -|.if FPU -|.define TOBIT, f30 // 2^52 + 2^51. -|.endif -| -|// The following temporaries are not saved across C calls, except for RA. -|.define RA, r23 // Callee-save. -|.define RB, r8 -|.define RC, r9 -|.define RD, r10 -|.define INS, r11 -| -|.define AT, r1 // Assembler temporary. -|.define TMP0, r12 -|.define TMP1, r13 -|.define TMP2, r14 -|.define TMP3, r15 -| -|// MIPS o32 calling convention. -|.define CFUNCADDR, r25 -|.define CARG1, r4 -|.define CARG2, r5 -|.define CARG3, r6 -|.define CARG4, r7 -| -|.define CRET1, r2 -|.define CRET2, r3 -| -|.if ENDIAN_LE -|.define SFRETLO, CRET1 -|.define SFRETHI, CRET2 -|.define SFARG1LO, CARG1 -|.define SFARG1HI, CARG2 -|.define SFARG2LO, CARG3 -|.define SFARG2HI, CARG4 -|.else -|.define SFRETLO, CRET2 -|.define SFRETHI, CRET1 -|.define SFARG1LO, CARG2 -|.define SFARG1HI, CARG1 -|.define SFARG2LO, CARG4 -|.define SFARG2HI, CARG3 -|.endif -| -|.if FPU -|.define FARG1, f12 -|.define FARG2, f14 -| -|.define FRET1, f0 -|.define FRET2, f2 -|.endif -| -|// Stack layout while in interpreter. Must match with lj_frame.h. -|.if FPU // MIPS32 hard-float. -| -|.define CFRAME_SPACE, 112 // Delta for sp. -| -|.define SAVE_ERRF, 124(sp) // 32 bit C frame info. -|.define SAVE_NRES, 120(sp) -|.define SAVE_CFRAME, 116(sp) -|.define SAVE_L, 112(sp) -|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter. -|.define SAVE_GPR_, 72 // .. 72+10*4: 32 bit GPR saves. -|.define SAVE_FPR_, 24 // .. 24+6*8: 64 bit FPR saves. -| -|.else // MIPS32 soft-float -| -|.define CFRAME_SPACE, 64 // Delta for sp. -| -|.define SAVE_ERRF, 76(sp) // 32 bit C frame info. -|.define SAVE_NRES, 72(sp) -|.define SAVE_CFRAME, 68(sp) -|.define SAVE_L, 64(sp) -|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter. -|.define SAVE_GPR_, 24 // .. 24+10*4: 32 bit GPR saves. -| -|.endif -| -|.define SAVE_PC, 20(sp) -|.define ARG5, 16(sp) -|.define CSAVE_4, 12(sp) -|.define CSAVE_3, 8(sp) -|.define CSAVE_2, 4(sp) -|.define CSAVE_1, 0(sp) -|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by callee. -| -|.define ARG5_OFS, 16 -|.define SAVE_MULTRES, ARG5 -| -|//----------------------------------------------------------------------- -| -|.macro saveregs -| addiu sp, sp, -CFRAME_SPACE -| sw ra, SAVE_GPR_+9*4(sp) -| sw r30, SAVE_GPR_+8*4(sp) -| .FPU sdc1 f30, SAVE_FPR_+5*8(sp) -| sw r23, SAVE_GPR_+7*4(sp) -| sw r22, SAVE_GPR_+6*4(sp) -| .FPU sdc1 f28, SAVE_FPR_+4*8(sp) -| sw r21, SAVE_GPR_+5*4(sp) -| sw r20, SAVE_GPR_+4*4(sp) -| .FPU sdc1 f26, SAVE_FPR_+3*8(sp) -| sw r19, SAVE_GPR_+3*4(sp) -| sw r18, SAVE_GPR_+2*4(sp) -| .FPU sdc1 f24, SAVE_FPR_+2*8(sp) -| sw r17, SAVE_GPR_+1*4(sp) -| sw r16, SAVE_GPR_+0*4(sp) -| .FPU sdc1 f22, SAVE_FPR_+1*8(sp) -| .FPU sdc1 f20, SAVE_FPR_+0*8(sp) -|.endmacro -| -|.macro restoreregs_ret -| lw ra, SAVE_GPR_+9*4(sp) -| lw r30, SAVE_GPR_+8*4(sp) -| .FPU ldc1 f30, SAVE_FPR_+5*8(sp) -| lw r23, SAVE_GPR_+7*4(sp) -| lw r22, SAVE_GPR_+6*4(sp) -| .FPU ldc1 f28, SAVE_FPR_+4*8(sp) -| lw r21, SAVE_GPR_+5*4(sp) -| lw r20, SAVE_GPR_+4*4(sp) -| .FPU ldc1 f26, SAVE_FPR_+3*8(sp) -| lw r19, SAVE_GPR_+3*4(sp) -| lw r18, SAVE_GPR_+2*4(sp) -| .FPU ldc1 f24, SAVE_FPR_+2*8(sp) -| lw r17, SAVE_GPR_+1*4(sp) -| lw r16, SAVE_GPR_+0*4(sp) -| .FPU ldc1 f22, SAVE_FPR_+1*8(sp) -| .FPU ldc1 f20, SAVE_FPR_+0*8(sp) -| jr ra -| addiu sp, sp, CFRAME_SPACE -|.endmacro -| -|// Type definitions. Some of these are only used for documentation. -|.type L, lua_State, LREG -|.type GL, global_State -|.type TVALUE, TValue -|.type GCOBJ, GCobj -|.type STR, GCstr -|.type TAB, GCtab -|.type LFUNC, GCfuncL -|.type CFUNC, GCfuncC -|.type PROTO, GCproto -|.type UPVAL, GCupval -|.type NODE, Node -|.type NARGS8, int -|.type TRACE, GCtrace -|.type SBUF, SBuf -| -|//----------------------------------------------------------------------- -| -|// Trap for not-yet-implemented parts. -|.macro NYI; .long 0xf0f0f0f0; .endmacro -| -|// Macros to mark delay slots. -|.macro ., a; a; .endmacro -|.macro ., a,b; a,b; .endmacro -|.macro ., a,b,c; a,b,c; .endmacro -| -|//----------------------------------------------------------------------- -| -|// Endian-specific defines. -|.if ENDIAN_LE -|.define FRAME_PC, -4 -|.define FRAME_FUNC, -8 -|.define HI, 4 -|.define LO, 0 -|.define OFS_RD, 2 -|.define OFS_RA, 1 -|.define OFS_OP, 0 -|.else -|.define FRAME_PC, -8 -|.define FRAME_FUNC, -4 -|.define HI, 0 -|.define LO, 4 -|.define OFS_RD, 0 -|.define OFS_RA, 2 -|.define OFS_OP, 3 -|.endif -| -|// Instruction decode. -|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro -|.macro decode_OP4a, dst, ins; andi dst, ins, 0xff; .endmacro -|.macro decode_OP4b, dst; sll dst, dst, 2; .endmacro -|.macro decode_RC4a, dst, ins; srl dst, ins, 14; .endmacro -|.macro decode_RC4b, dst; andi dst, dst, 0x3fc; .endmacro -|.macro decode_RD4b, dst; sll dst, dst, 2; .endmacro -|.macro decode_RA8a, dst, ins; srl dst, ins, 5; .endmacro -|.macro decode_RA8b, dst; andi dst, dst, 0x7f8; .endmacro -|.macro decode_RB8a, dst, ins; srl dst, ins, 21; .endmacro -|.macro decode_RB8b, dst; andi dst, dst, 0x7f8; .endmacro -|.macro decode_RD8a, dst, ins; srl dst, ins, 16; .endmacro -|.macro decode_RD8b, dst; sll dst, dst, 3; .endmacro -|.macro decode_RDtoRC8, dst, src; andi dst, src, 0x7f8; .endmacro -| -|// Instruction fetch. -|.macro ins_NEXT1 -| lw INS, 0(PC) -| addiu PC, PC, 4 -|.endmacro -|// Instruction decode+dispatch. -|.macro ins_NEXT2 -| decode_OP4a TMP1, INS -| decode_OP4b TMP1 -| addu TMP0, DISPATCH, TMP1 -| decode_RD8a RD, INS -| lw AT, 0(TMP0) -| decode_RA8a RA, INS -| decode_RD8b RD -| jr AT -| decode_RA8b RA -|.endmacro -|.macro ins_NEXT -| ins_NEXT1 -| ins_NEXT2 -|.endmacro -| -|// Instruction footer. -|.if 1 -| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. -| .define ins_next, ins_NEXT -| .define ins_next_, ins_NEXT -| .define ins_next1, ins_NEXT1 -| .define ins_next2, ins_NEXT2 -|.else -| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. -| // Affects only certain kinds of benchmarks (and only with -j off). -| .macro ins_next -| b ->ins_next -| .endmacro -| .macro ins_next1 -| .endmacro -| .macro ins_next2 -| b ->ins_next -| .endmacro -| .macro ins_next_ -| ->ins_next: -| ins_NEXT -| .endmacro -|.endif -| -|// Call decode and dispatch. -|.macro ins_callt -| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC -| lw PC, LFUNC:RB->pc -| lw INS, 0(PC) -| addiu PC, PC, 4 -| decode_OP4a TMP1, INS -| decode_RA8a RA, INS -| decode_OP4b TMP1 -| decode_RA8b RA -| addu TMP0, DISPATCH, TMP1 -| lw TMP0, 0(TMP0) -| jr TMP0 -| addu RA, RA, BASE -|.endmacro -| -|.macro ins_call -| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC -| sw PC, FRAME_PC(BASE) -| ins_callt -|.endmacro -| -|//----------------------------------------------------------------------- -| -|.macro branch_RD -| srl TMP0, RD, 1 -| lui AT, (-(BCBIAS_J*4 >> 16) & 65535) -| addu TMP0, TMP0, AT -| addu PC, PC, TMP0 -|.endmacro -| -|// Assumes DISPATCH is relative to GL. -#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) -#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) -#define GG_DISP2GOT (GG_OFS(got) - GG_OFS(dispatch)) -#define DISPATCH_GOT(name) (GG_DISP2GOT + 4*LJ_GOT_##name) -| -#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) -| -|.macro load_got, func -| lw CFUNCADDR, DISPATCH_GOT(func)(DISPATCH) -|.endmacro -|// Much faster. Sadly, there's no easy way to force the required code layout. -|// .macro call_intern, func; bal extern func; .endmacro -|.macro call_intern, func; jalr CFUNCADDR; .endmacro -|.macro call_extern; jalr CFUNCADDR; .endmacro -|.macro jmp_extern; jr CFUNCADDR; .endmacro -| -|.macro hotcheck, delta, target -| srl TMP1, PC, 1 -| andi TMP1, TMP1, 126 -| addu TMP1, TMP1, DISPATCH -| lhu TMP2, GG_DISP2HOT(TMP1) -| addiu TMP2, TMP2, -delta -| bltz TMP2, target -|. sh TMP2, GG_DISP2HOT(TMP1) -|.endmacro -| -|.macro hotloop -| hotcheck HOTCOUNT_LOOP, ->vm_hotloop -|.endmacro -| -|.macro hotcall -| hotcheck HOTCOUNT_CALL, ->vm_hotcall -|.endmacro -| -|// Set current VM state. Uses TMP0. -|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro -|.macro st_vmstate; sw TMP0, DISPATCH_GL(vmstate)(DISPATCH); .endmacro -| -|// Move table write barrier back. Overwrites mark and tmp. -|.macro barrierback, tab, mark, tmp, target -| lw tmp, DISPATCH_GL(gc.grayagain)(DISPATCH) -| andi mark, mark, ~LJ_GC_BLACK & 255 // black2gray(tab) -| sw tab, DISPATCH_GL(gc.grayagain)(DISPATCH) -| sb mark, tab->marked -| b target -|. sw tmp, tab->gclist -|.endmacro -| -|//----------------------------------------------------------------------- - -/* Generate subroutines used by opcodes and other parts of the VM. */ -/* The .code_sub section should be last to help static branch prediction. */ -static void build_subroutines(BuildCtx *ctx) -{ - |.code_sub - | - |//----------------------------------------------------------------------- - |//-- Return handling ---------------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_returnp: - | // See vm_return. Also: TMP2 = previous base. - | andi AT, PC, FRAME_P - | beqz AT, ->cont_dispatch - |. li TMP1, LJ_TTRUE - | - | // Return from pcall or xpcall fast func. - | lw PC, FRAME_PC(TMP2) // Fetch PC of previous frame. - | move BASE, TMP2 // Restore caller base. - | // Prepending may overwrite the pcall frame, so do it at the end. - | sw TMP1, FRAME_PC(RA) // Prepend true to results. - | addiu RA, RA, -8 - | - |->vm_returnc: - | addiu RD, RD, 8 // RD = (nresults+1)*8. - | andi TMP0, PC, FRAME_TYPE - | beqz RD, ->vm_unwind_c_eh - |. li CRET1, LUA_YIELD - | beqz TMP0, ->BC_RET_Z // Handle regular return to Lua. - |. move MULTRES, RD - | - |->vm_return: - | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return - | // TMP0 = PC & FRAME_TYPE - | li TMP2, -8 - | xori AT, TMP0, FRAME_C - | and TMP2, PC, TMP2 - | bnez AT, ->vm_returnp - | subu TMP2, BASE, TMP2 // TMP2 = previous base. - | - | addiu TMP1, RD, -8 - | sw TMP2, L->base - | li_vmstate C - | lw TMP2, SAVE_NRES - | addiu BASE, BASE, -8 - | st_vmstate - | beqz TMP1, >2 - |. sll TMP2, TMP2, 3 - |1: - | addiu TMP1, TMP1, -8 - | lw SFRETHI, HI(RA) - | lw SFRETLO, LO(RA) - | addiu RA, RA, 8 - | sw SFRETHI, HI(BASE) - | sw SFRETLO, LO(BASE) - | bnez TMP1, <1 - |. addiu BASE, BASE, 8 - | - |2: - | bne TMP2, RD, >6 - |3: - |. sw BASE, L->top // Store new top. - | - |->vm_leave_cp: - | lw TMP0, SAVE_CFRAME // Restore previous C frame. - | move CRET1, r0 // Ok return status for vm_pcall. - | sw TMP0, L->cframe - | - |->vm_leave_unw: - | restoreregs_ret - | - |6: - | lw TMP1, L->maxstack - | slt AT, TMP2, RD - | bnez AT, >7 // Less results wanted? - | // More results wanted. Check stack size and fill up results with nil. - |. slt AT, BASE, TMP1 - | beqz AT, >8 - |. nop - | sw TISNIL, HI(BASE) - | addiu RD, RD, 8 - | b <2 - |. addiu BASE, BASE, 8 - | - |7: // Less results wanted. - | subu TMP0, RD, TMP2 - | subu TMP0, BASE, TMP0 // Either keep top or shrink it. - | b <3 - |. movn BASE, TMP0, TMP2 // LUA_MULTRET+1 case? - | - |8: // Corner case: need to grow stack for filling up results. - | // This can happen if: - | // - A C function grows the stack (a lot). - | // - The GC shrinks the stack in between. - | // - A return back from a lua_call() with (high) nresults adjustment. - | load_got lj_state_growstack - | move MULTRES, RD - | srl CARG2, TMP2, 3 - | call_intern lj_state_growstack // (lua_State *L, int n) - |. move CARG1, L - | lw TMP2, SAVE_NRES - | lw BASE, L->top // Need the (realloced) L->top in BASE. - | move RD, MULTRES - | b <2 - |. sll TMP2, TMP2, 3 - | - |->vm_unwind_c: // Unwind C stack, return from vm_pcall. - | // (void *cframe, int errcode) - | move sp, CARG1 - | move CRET1, CARG2 - |->vm_unwind_c_eh: // Landing pad for external unwinder. - | lw L, SAVE_L - | li TMP0, ~LJ_VMST_C - | lw GL:TMP1, L->glref - | b ->vm_leave_unw - |. sw TMP0, GL:TMP1->vmstate - | - |->vm_unwind_ff: // Unwind C stack, return from ff pcall. - | // (void *cframe) - | li AT, -4 - | and sp, CARG1, AT - |->vm_unwind_ff_eh: // Landing pad for external unwinder. - | lw L, SAVE_L - | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | li TISNIL, LJ_TNIL - | lw BASE, L->base - | lw DISPATCH, L->glref // Setup pointer to dispatch table. - | .FPU mtc1 TMP3, TOBIT - | li TMP1, LJ_TFALSE - | li_vmstate INTERP - | lw PC, FRAME_PC(BASE) // Fetch PC of previous frame. - | .FPU cvt.d.s TOBIT, TOBIT - | addiu RA, BASE, -8 // Results start at BASE-8. - | addiu DISPATCH, DISPATCH, GG_G2DISP - | sw TMP1, HI(RA) // Prepend false to error message. - | st_vmstate - | b ->vm_returnc - |. li RD, 16 // 2 results: false + error message. - | - |//----------------------------------------------------------------------- - |//-- Grow stack for calls ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_growstack_c: // Grow stack for C function. - | b >2 - |. li CARG2, LUA_MINSTACK - | - |->vm_growstack_l: // Grow stack for Lua function. - | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC - | addu RC, BASE, RC - | subu RA, RA, BASE - | sw BASE, L->base - | addiu PC, PC, 4 // Must point after first instruction. - | sw RC, L->top - | srl CARG2, RA, 3 - |2: - | // L->base = new base, L->top = top - | load_got lj_state_growstack - | sw PC, SAVE_PC - | call_intern lj_state_growstack // (lua_State *L, int n) - |. move CARG1, L - | lw BASE, L->base - | lw RC, L->top - | lw LFUNC:RB, FRAME_FUNC(BASE) - | subu RC, RC, BASE - | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC - | ins_callt // Just retry the call. - | - |//----------------------------------------------------------------------- - |//-- Entry points into the assembler VM --------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_resume: // Setup C frame and resume thread. - | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) - | saveregs - | move L, CARG1 - | lw DISPATCH, L->glref // Setup pointer to dispatch table. - | move BASE, CARG2 - | lbu TMP1, L->status - | sw L, SAVE_L - | li PC, FRAME_CP - | addiu TMP0, sp, CFRAME_RESUME - | addiu DISPATCH, DISPATCH, GG_G2DISP - | sw r0, SAVE_NRES - | sw r0, SAVE_ERRF - | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. - | sw r0, SAVE_CFRAME - | beqz TMP1, >3 - |. sw TMP0, L->cframe - | - | // Resume after yield (like a return). - | sw L, DISPATCH_GL(cur_L)(DISPATCH) - | move RA, BASE - | lw BASE, L->base - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | lw TMP1, L->top - | lw PC, FRAME_PC(BASE) - | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | subu RD, TMP1, BASE - | .FPU mtc1 TMP3, TOBIT - | sb r0, L->status - | .FPU cvt.d.s TOBIT, TOBIT - | li_vmstate INTERP - | addiu RD, RD, 8 - | st_vmstate - | move MULTRES, RD - | andi TMP0, PC, FRAME_TYPE - | beqz TMP0, ->BC_RET_Z - |. li TISNIL, LJ_TNIL - | b ->vm_return - |. nop - | - |->vm_pcall: // Setup protected C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) - | saveregs - | sw CARG4, SAVE_ERRF - | b >1 - |. li PC, FRAME_CP - | - |->vm_call: // Setup C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1) - | saveregs - | li PC, FRAME_C - | - |1: // Entry point for vm_pcall above (PC = ftype). - | lw TMP1, L:CARG1->cframe - | move L, CARG1 - | sw CARG3, SAVE_NRES - | lw DISPATCH, L->glref // Setup pointer to dispatch table. - | sw CARG1, SAVE_L - | move BASE, CARG2 - | addiu DISPATCH, DISPATCH, GG_G2DISP - | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. - | sw TMP1, SAVE_CFRAME - | sw sp, L->cframe // Add our C frame to cframe chain. - | - |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). - | sw L, DISPATCH_GL(cur_L)(DISPATCH) - | lw TMP2, L->base // TMP2 = old base (used in vmeta_call). - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | lw TMP1, L->top - | .FPU mtc1 TMP3, TOBIT - | addu PC, PC, BASE - | subu NARGS8:RC, TMP1, BASE - | subu PC, PC, TMP2 // PC = frame delta + frame type - | .FPU cvt.d.s TOBIT, TOBIT - | li_vmstate INTERP - | li TISNIL, LJ_TNIL - | st_vmstate - | - |->vm_call_dispatch: - | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC - | lw TMP0, FRAME_PC(BASE) - | li AT, LJ_TFUNC - | bne TMP0, AT, ->vmeta_call - |. lw LFUNC:RB, FRAME_FUNC(BASE) - | - |->vm_call_dispatch_f: - | ins_call - | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC - | - |->vm_cpcall: // Setup protected C frame, call C. - | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) - | saveregs - | move L, CARG1 - | lw TMP0, L:CARG1->stack - | sw CARG1, SAVE_L - | lw TMP1, L->top - | lw DISPATCH, L->glref // Setup pointer to dispatch table. - | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. - | subu TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). - | lw TMP1, L->cframe - | addiu DISPATCH, DISPATCH, GG_G2DISP - | sw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. - | sw r0, SAVE_ERRF // No error function. - | sw TMP1, SAVE_CFRAME - | sw sp, L->cframe // Add our C frame to cframe chain. - | sw L, DISPATCH_GL(cur_L)(DISPATCH) - | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud) - |. move CFUNCADDR, CARG4 - | move BASE, CRET1 - | bnez CRET1, <3 // Else continue with the call. - |. li PC, FRAME_CP - | b ->vm_leave_cp // No base? Just remove C frame. - |. nop - | - |//----------------------------------------------------------------------- - |//-- Metamethod handling ------------------------------------------------ - |//----------------------------------------------------------------------- - | - |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the - |// stack, so BASE doesn't need to be reloaded across these calls. - | - |//-- Continuation dispatch ---------------------------------------------- - | - |->cont_dispatch: - | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8 - | lw TMP0, -16+LO(BASE) // Continuation. - | move RB, BASE - | move BASE, TMP2 // Restore caller BASE. - | lw LFUNC:TMP1, FRAME_FUNC(TMP2) - |.if FFI - | sltiu AT, TMP0, 2 - |.endif - | lw PC, -16+HI(RB) // Restore PC from [cont|PC]. - | addu TMP2, RA, RD - | lw TMP1, LFUNC:TMP1->pc - |.if FFI - | bnez AT, >1 - |.endif - |. sw TISNIL, -8+HI(TMP2) // Ensure one valid arg. - | // BASE = base, RA = resultptr, RB = meta base - | jr TMP0 // Jump to continuation. - |. lw KBASE, PC2PROTO(k)(TMP1) - | - |.if FFI - |1: - | bnez TMP0, ->cont_ffi_callback // cont = 1: return from FFI callback. - | // cont = 0: tailcall from C function. - |. addiu TMP1, RB, -16 - | b ->vm_call_tail - |. subu RC, TMP1, BASE - |.endif - | - |->cont_cat: // RA = resultptr, RB = meta base - | lw INS, -4(PC) - | addiu CARG2, RB, -16 - | lw SFRETHI, HI(RA) - | lw SFRETLO, LO(RA) - | decode_RB8a MULTRES, INS - | decode_RA8a RA, INS - | decode_RB8b MULTRES - | decode_RA8b RA - | addu TMP1, BASE, MULTRES - | sw BASE, L->base - | subu CARG3, CARG2, TMP1 - | sw SFRETHI, HI(CARG2) - | bne TMP1, CARG2, ->BC_CAT_Z - |. sw SFRETLO, LO(CARG2) - | addu RA, BASE, RA - | sw SFRETHI, HI(RA) - | b ->cont_nop - |. sw SFRETLO, LO(RA) - | - |//-- Table indexing metamethods ----------------------------------------- - | - |->vmeta_tgets1: - | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | li TMP0, LJ_TSTR - | sw STR:RC, LO(CARG3) - | b >1 - |. sw TMP0, HI(CARG3) - | - |->vmeta_tgets: - | addiu CARG2, DISPATCH, DISPATCH_GL(tmptv) - | li TMP0, LJ_TTAB - | sw TAB:RB, LO(CARG2) - | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv2) - | sw TMP0, HI(CARG2) - | li TMP1, LJ_TSTR - | sw STR:RC, LO(CARG3) - | b >1 - |. sw TMP1, HI(CARG3) - | - |->vmeta_tgetb: // TMP0 = index - | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | sw TMP0, LO(CARG3) - | sw TISNUM, HI(CARG3) - | - |->vmeta_tgetv: - |1: - | load_got lj_meta_tget - | sw BASE, L->base - | sw PC, SAVE_PC - | call_intern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) - |. move CARG1, L - | // Returns TValue * (finished) or NULL (metamethod). - | beqz CRET1, >3 - |. addiu TMP1, BASE, -FRAME_CONT - | lw SFARG1HI, HI(CRET1) - | lw SFARG2HI, LO(CRET1) - | ins_next1 - | sw SFARG1HI, HI(RA) - | sw SFARG2HI, LO(RA) - | ins_next2 - | - |3: // Call __index metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k - | lw BASE, L->top - | sw PC, -16+HI(BASE) // [cont|PC] - | subu PC, BASE, TMP1 - | lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. - | b ->vm_call_dispatch_f - |. li NARGS8:RC, 16 // 2 args for func(t, k). - | - |->vmeta_tgetr: - | load_got lj_tab_getinth - | call_intern lj_tab_getinth // (GCtab *t, int32_t key) - |. nop - | // Returns cTValue * or NULL. - | beqz CRET1, ->BC_TGETR_Z - |. move SFARG2HI, TISNIL - | lw SFARG2HI, HI(CRET1) - | b ->BC_TGETR_Z - |. lw SFARG2LO, LO(CRET1) - | - |//----------------------------------------------------------------------- - | - |->vmeta_tsets1: - | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | li TMP0, LJ_TSTR - | sw STR:RC, LO(CARG3) - | b >1 - |. sw TMP0, HI(CARG3) - | - |->vmeta_tsets: - | addiu CARG2, DISPATCH, DISPATCH_GL(tmptv) - | li TMP0, LJ_TTAB - | sw TAB:RB, LO(CARG2) - | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv2) - | sw TMP0, HI(CARG2) - | li TMP1, LJ_TSTR - | sw STR:RC, LO(CARG3) - | b >1 - |. sw TMP1, HI(CARG3) - | - |->vmeta_tsetb: // TMP0 = index - | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | sw TMP0, LO(CARG3) - | sw TISNUM, HI(CARG3) - | - |->vmeta_tsetv: - |1: - | load_got lj_meta_tset - | sw BASE, L->base - | sw PC, SAVE_PC - | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) - |. move CARG1, L - | // Returns TValue * (finished) or NULL (metamethod). - | lw SFARG1HI, HI(RA) - | beqz CRET1, >3 - |. lw SFARG1LO, LO(RA) - | // NOBARRIER: lj_meta_tset ensures the table is not black. - | ins_next1 - | sw SFARG1HI, HI(CRET1) - | sw SFARG1LO, LO(CRET1) - | ins_next2 - | - |3: // Call __newindex metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) - | addiu TMP1, BASE, -FRAME_CONT - | lw BASE, L->top - | sw PC, -16+HI(BASE) // [cont|PC] - | subu PC, BASE, TMP1 - | lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. - | sw SFARG1HI, 16+HI(BASE) // Copy value to third argument. - | sw SFARG1LO, 16+LO(BASE) - | b ->vm_call_dispatch_f - |. li NARGS8:RC, 24 // 3 args for func(t, k, v) - | - |->vmeta_tsetr: - | load_got lj_tab_setinth - | sw BASE, L->base - | sw PC, SAVE_PC - | call_intern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) - |. move CARG1, L - | // Returns TValue *. - | b ->BC_TSETR_Z - |. nop - | - |//-- Comparison metamethods --------------------------------------------- - | - |->vmeta_comp: - | // RA/RD point to o1/o2. - | move CARG2, RA - | move CARG3, RD - | load_got lj_meta_comp - | addiu PC, PC, -4 - | sw BASE, L->base - | sw PC, SAVE_PC - | decode_OP1 CARG4, INS - | call_intern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) - |. move CARG1, L - | // Returns 0/1 or TValue * (metamethod). - |3: - | sltiu AT, CRET1, 2 - | beqz AT, ->vmeta_binop - | negu TMP2, CRET1 - |4: - | lhu RD, OFS_RD(PC) - | addiu PC, PC, 4 - | lui TMP1, (-(BCBIAS_J*4 >> 16) & 65535) - | sll RD, RD, 2 - | addu RD, RD, TMP1 - | and RD, RD, TMP2 - | addu PC, PC, RD - |->cont_nop: - | ins_next - | - |->cont_ra: // RA = resultptr - | lbu TMP1, -4+OFS_RA(PC) - | lw SFRETHI, HI(RA) - | lw SFRETLO, LO(RA) - | sll TMP1, TMP1, 3 - | addu TMP1, BASE, TMP1 - | sw SFRETHI, HI(TMP1) - | b ->cont_nop - |. sw SFRETLO, LO(TMP1) - | - |->cont_condt: // RA = resultptr - | lw TMP0, HI(RA) - | sltiu AT, TMP0, LJ_TISTRUECOND - | b <4 - |. negu TMP2, AT // Branch if result is true. - | - |->cont_condf: // RA = resultptr - | lw TMP0, HI(RA) - | sltiu AT, TMP0, LJ_TISTRUECOND - | b <4 - |. addiu TMP2, AT, -1 // Branch if result is false. - | - |->vmeta_equal: - | // SFARG1LO/SFARG2LO point to o1/o2. TMP0 is set to 0/1. - | load_got lj_meta_equal - | move CARG2, SFARG1LO - | move CARG3, SFARG2LO - | move CARG4, TMP0 - | addiu PC, PC, -4 - | sw BASE, L->base - | sw PC, SAVE_PC - | call_intern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) - |. move CARG1, L - | // Returns 0/1 or TValue * (metamethod). - | b <3 - |. nop - | - |->vmeta_equal_cd: - |.if FFI - | load_got lj_meta_equal_cd - | move CARG2, INS - | addiu PC, PC, -4 - | sw BASE, L->base - | sw PC, SAVE_PC - | call_intern lj_meta_equal_cd // (lua_State *L, BCIns op) - |. move CARG1, L - | // Returns 0/1 or TValue * (metamethod). - | b <3 - |. nop - |.endif - | - |->vmeta_istype: - | load_got lj_meta_istype - | addiu PC, PC, -4 - | sw BASE, L->base - | srl CARG2, RA, 3 - | srl CARG3, RD, 3 - | sw PC, SAVE_PC - | call_intern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) - |. move CARG1, L - | b ->cont_nop - |. nop - | - |//-- Arithmetic metamethods --------------------------------------------- - | - |->vmeta_unm: - | move RC, RB - | - |->vmeta_arith: - | load_got lj_meta_arith - | decode_OP1 TMP0, INS - | sw BASE, L->base - | move CARG2, RA - | sw PC, SAVE_PC - | move CARG3, RB - | move CARG4, RC - | sw TMP0, ARG5 - | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) - |. move CARG1, L - | // Returns NULL (finished) or TValue * (metamethod). - | beqz CRET1, ->cont_nop - |. nop - | - | // Call metamethod for binary op. - |->vmeta_binop: - | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 - | subu TMP1, CRET1, BASE - | sw PC, -16+HI(CRET1) // [cont|PC] - | move TMP2, BASE - | addiu PC, TMP1, FRAME_CONT - | move BASE, CRET1 - | b ->vm_call_dispatch - |. li NARGS8:RC, 16 // 2 args for func(o1, o2). - | - |->vmeta_len: - | // CARG2 already set by BC_LEN. -#if LJ_52 - | move MULTRES, CARG1 -#endif - | load_got lj_meta_len - | sw BASE, L->base - | sw PC, SAVE_PC - | call_intern lj_meta_len // (lua_State *L, TValue *o) - |. move CARG1, L - | // Returns NULL (retry) or TValue * (metamethod base). -#if LJ_52 - | bnez CRET1, ->vmeta_binop // Binop call for compatibility. - |. nop - | b ->BC_LEN_Z - |. move CARG1, MULTRES -#else - | b ->vmeta_binop // Binop call for compatibility. - |. nop -#endif - | - |//-- Call metamethod ---------------------------------------------------- - | - |->vmeta_call: // Resolve and call __call metamethod. - | // TMP2 = old base, BASE = new base, RC = nargs*8 - | load_got lj_meta_call - | sw TMP2, L->base // This is the callers base! - | addiu CARG2, BASE, -8 - | sw PC, SAVE_PC - | addu CARG3, BASE, RC - | move MULTRES, NARGS8:RC - | call_intern lj_meta_call // (lua_State *L, TValue *func, TValue *top) - |. move CARG1, L - | lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. - | addiu NARGS8:RC, MULTRES, 8 // Got one more argument now. - | ins_call - | - |->vmeta_callt: // Resolve __call for BC_CALLT. - | // BASE = old base, RA = new base, RC = nargs*8 - | load_got lj_meta_call - | sw BASE, L->base - | addiu CARG2, RA, -8 - | sw PC, SAVE_PC - | addu CARG3, RA, RC - | move MULTRES, NARGS8:RC - | call_intern lj_meta_call // (lua_State *L, TValue *func, TValue *top) - |. move CARG1, L - | lw TMP1, FRAME_PC(BASE) - | lw LFUNC:RB, FRAME_FUNC(RA) // Guaranteed to be a function here. - | b ->BC_CALLT_Z - |. addiu NARGS8:RC, MULTRES, 8 // Got one more argument now. - | - |//-- Argument coercion for 'for' statement ------------------------------ - | - |->vmeta_for: - | load_got lj_meta_for - | sw BASE, L->base - | move CARG2, RA - | sw PC, SAVE_PC - | move MULTRES, INS - | call_intern lj_meta_for // (lua_State *L, TValue *base) - |. move CARG1, L - |.if JIT - | decode_OP1 TMP0, MULTRES - | li AT, BC_JFORI - |.endif - | decode_RA8a RA, MULTRES - | decode_RD8a RD, MULTRES - | decode_RA8b RA - |.if JIT - | beq TMP0, AT, =>BC_JFORI - |. decode_RD8b RD - | b =>BC_FORI - |. nop - |.else - | b =>BC_FORI - |. decode_RD8b RD - |.endif - | - |//----------------------------------------------------------------------- - |//-- Fast functions ----------------------------------------------------- - |//----------------------------------------------------------------------- - | - |.macro .ffunc, name - |->ff_ .. name: - |.endmacro - | - |.macro .ffunc_1, name - |->ff_ .. name: - | lw SFARG1HI, HI(BASE) - | beqz NARGS8:RC, ->fff_fallback - |. lw SFARG1LO, LO(BASE) - |.endmacro - | - |.macro .ffunc_2, name - |->ff_ .. name: - | sltiu AT, NARGS8:RC, 16 - | lw SFARG1HI, HI(BASE) - | bnez AT, ->fff_fallback - |. lw SFARG2HI, 8+HI(BASE) - | lw SFARG1LO, LO(BASE) - | lw SFARG2LO, 8+LO(BASE) - |.endmacro - | - |.macro .ffunc_n, name // Caveat: has delay slot! - |->ff_ .. name: - | lw SFARG1HI, HI(BASE) - |.if FPU - | ldc1 FARG1, 0(BASE) - |.else - | lw SFARG1LO, LO(BASE) - |.endif - | beqz NARGS8:RC, ->fff_fallback - |. sltiu AT, SFARG1HI, LJ_TISNUM - | beqz AT, ->fff_fallback - |.endmacro - | - |.macro .ffunc_nn, name // Caveat: has delay slot! - |->ff_ .. name: - | sltiu AT, NARGS8:RC, 16 - | lw SFARG1HI, HI(BASE) - | bnez AT, ->fff_fallback - |. lw SFARG2HI, 8+HI(BASE) - | sltiu TMP0, SFARG1HI, LJ_TISNUM - |.if FPU - | ldc1 FARG1, 0(BASE) - |.else - | lw SFARG1LO, LO(BASE) - |.endif - | sltiu TMP1, SFARG2HI, LJ_TISNUM - |.if FPU - | ldc1 FARG2, 8(BASE) - |.else - | lw SFARG2LO, 8+LO(BASE) - |.endif - | and TMP0, TMP0, TMP1 - | beqz TMP0, ->fff_fallback - |.endmacro - | - |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1 and has delay slot! - |.macro ffgccheck - | lw TMP0, DISPATCH_GL(gc.total)(DISPATCH) - | lw TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) - | subu AT, TMP0, TMP1 - | bgezal AT, ->fff_gcstep - |.endmacro - | - |//-- Base library: checks ----------------------------------------------- - | - |.ffunc_1 assert - | sltiu AT, SFARG1HI, LJ_TISTRUECOND - | beqz AT, ->fff_fallback - |. addiu RA, BASE, -8 - | lw PC, FRAME_PC(BASE) - | addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8. - | addu TMP2, RA, NARGS8:RC - | sw SFARG1HI, HI(RA) - | addiu TMP1, BASE, 8 - | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument. - |. sw SFARG1LO, LO(RA) - |1: - | lw SFRETHI, HI(TMP1) - | lw SFRETLO, LO(TMP1) - | sw SFRETHI, -8+HI(TMP1) - | sw SFRETLO, -8+LO(TMP1) - | bne TMP1, TMP2, <1 - |. addiu TMP1, TMP1, 8 - | b ->fff_res - |. nop - | - |.ffunc type - | lw SFARG1HI, HI(BASE) - | beqz NARGS8:RC, ->fff_fallback - |. sltiu TMP0, SFARG1HI, LJ_TISNUM - | movn SFARG1HI, TISNUM, TMP0 - | not TMP1, SFARG1HI - | sll TMP1, TMP1, 3 - | addu TMP1, CFUNC:RB, TMP1 - | lw SFARG1HI, CFUNC:TMP1->upvalue[0].u32.hi - | b ->fff_restv - |. lw SFARG1LO, CFUNC:TMP1->upvalue[0].u32.lo - | - |//-- Base library: getters and setters --------------------------------- - | - |.ffunc_1 getmetatable - | li AT, LJ_TTAB - | bne SFARG1HI, AT, >6 - |. li AT, LJ_TUDATA - |1: // Field metatable must be at same offset for GCtab and GCudata! - | lw TAB:SFARG1LO, TAB:SFARG1LO->metatable - |2: - | lw STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) - | beqz TAB:SFARG1LO, ->fff_restv - |. li SFARG1HI, LJ_TNIL - | lw TMP0, TAB:SFARG1LO->hmask - | li SFARG1HI, LJ_TTAB // Use metatable as default result. - | lw TMP1, STR:RC->hash - | lw NODE:TMP2, TAB:SFARG1LO->node - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask - | sll TMP0, TMP1, 5 - | sll TMP1, TMP1, 3 - | subu TMP1, TMP0, TMP1 - | addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) - | li AT, LJ_TSTR - |3: // Rearranged logic, because we expect _not_ to find the key. - | lw CARG4, offsetof(Node, key)+HI(NODE:TMP2) - | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) - | lw NODE:TMP3, NODE:TMP2->next - | bne CARG4, AT, >4 - |. lw CARG3, offsetof(Node, val)+HI(NODE:TMP2) - | beq TMP0, STR:RC, >5 - |. lw TMP1, offsetof(Node, val)+LO(NODE:TMP2) - |4: - | beqz NODE:TMP3, ->fff_restv // Not found, keep default result. - |. move NODE:TMP2, NODE:TMP3 - | b <3 - |. nop - |5: - | beq CARG3, TISNIL, ->fff_restv // Ditto for nil value. - |. nop - | move SFARG1HI, CARG3 // Return value of mt.__metatable. - | b ->fff_restv - |. move SFARG1LO, TMP1 - | - |6: - | beq SFARG1HI, AT, <1 - |. sltu AT, TISNUM, SFARG1HI - | movz SFARG1HI, TISNUM, AT - | not TMP1, SFARG1HI - | sll TMP1, TMP1, 2 - | addu TMP1, DISPATCH, TMP1 - | b <2 - |. lw TAB:SFARG1LO, DISPATCH_GL(gcroot[GCROOT_BASEMT])(TMP1) - | - |.ffunc_2 setmetatable - | // Fast path: no mt for table yet and not clearing the mt. - | li AT, LJ_TTAB - | bne SFARG1HI, AT, ->fff_fallback - |. addiu SFARG2HI, SFARG2HI, -LJ_TTAB - | lw TAB:TMP1, TAB:SFARG1LO->metatable - | lbu TMP3, TAB:SFARG1LO->marked - | or AT, SFARG2HI, TAB:TMP1 - | bnez AT, ->fff_fallback - |. andi AT, TMP3, LJ_GC_BLACK // isblack(table) - | beqz AT, ->fff_restv - |. sw TAB:SFARG2LO, TAB:SFARG1LO->metatable - | barrierback TAB:SFARG1LO, TMP3, TMP0, ->fff_restv - | - |.ffunc rawget - | lw CARG4, HI(BASE) - | sltiu AT, NARGS8:RC, 16 - | lw TAB:CARG2, LO(BASE) - | load_got lj_tab_get - | addiu CARG4, CARG4, -LJ_TTAB - | or AT, AT, CARG4 - | bnez AT, ->fff_fallback - | addiu CARG3, BASE, 8 - | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) - |. move CARG1, L - | // Returns cTValue *. - | lw SFARG1HI, HI(CRET1) - | b ->fff_restv - |. lw SFARG1LO, LO(CRET1) - | - |//-- Base library: conversions ------------------------------------------ - | - |.ffunc tonumber - | // Only handles the number case inline (without a base argument). - | lw CARG1, HI(BASE) - | xori AT, NARGS8:RC, 8 // Exactly one number argument. - | sltu TMP0, TISNUM, CARG1 - | or AT, AT, TMP0 - | bnez AT, ->fff_fallback - |. lw SFARG1HI, HI(BASE) - | b ->fff_restv - |. lw SFARG1LO, LO(BASE) - | - |.ffunc_1 tostring - | // Only handles the string or number case inline. - | li AT, LJ_TSTR - | // A __tostring method in the string base metatable is ignored. - | beq SFARG1HI, AT, ->fff_restv // String key? - | // Handle numbers inline, unless a number base metatable is present. - |. lw TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) - | sltu TMP0, TISNUM, SFARG1HI - | or TMP0, TMP0, TMP1 - | bnez TMP0, ->fff_fallback - |. sw BASE, L->base // Add frame since C call can throw. - | ffgccheck - |. sw PC, SAVE_PC // Redundant (but a defined value). - | load_got lj_strfmt_number - | move CARG1, L - | call_intern lj_strfmt_number // (lua_State *L, cTValue *o) - |. move CARG2, BASE - | // Returns GCstr *. - | li SFARG1HI, LJ_TSTR - | b ->fff_restv - |. move SFARG1LO, CRET1 - | - |//-- Base library: iterators ------------------------------------------- - | - |.ffunc next - | lw CARG1, HI(BASE) - | lw TAB:CARG2, LO(BASE) - | beqz NARGS8:RC, ->fff_fallback - |. addu TMP2, BASE, NARGS8:RC - | li AT, LJ_TTAB - | sw TISNIL, HI(TMP2) // Set missing 2nd arg to nil. - | bne CARG1, AT, ->fff_fallback - |. lw PC, FRAME_PC(BASE) - | load_got lj_tab_next - | sw BASE, L->base // Add frame since C call can throw. - | sw BASE, L->top // Dummy frame length is ok. - | addiu CARG3, BASE, 8 - | sw PC, SAVE_PC - | call_intern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) - |. move CARG1, L - | // Returns 0 at end of traversal. - | beqz CRET1, ->fff_restv // End of traversal: return nil. - |. li SFARG1HI, LJ_TNIL - | lw TMP0, 8+HI(BASE) - | lw TMP1, 8+LO(BASE) - | addiu RA, BASE, -8 - | lw TMP2, 16+HI(BASE) - | lw TMP3, 16+LO(BASE) - | sw TMP0, HI(RA) - | sw TMP1, LO(RA) - | sw TMP2, 8+HI(RA) - | sw TMP3, 8+LO(RA) - | b ->fff_res - |. li RD, (2+1)*8 - | - |.ffunc_1 pairs - | li AT, LJ_TTAB - | bne SFARG1HI, AT, ->fff_fallback - |. lw PC, FRAME_PC(BASE) -#if LJ_52 - | lw TAB:TMP2, TAB:SFARG1LO->metatable - | lw TMP0, CFUNC:RB->upvalue[0].u32.hi - | lw TMP1, CFUNC:RB->upvalue[0].u32.lo - | bnez TAB:TMP2, ->fff_fallback -#else - | lw TMP0, CFUNC:RB->upvalue[0].u32.hi - | lw TMP1, CFUNC:RB->upvalue[0].u32.lo -#endif - |. addiu RA, BASE, -8 - | sw TISNIL, 8+HI(BASE) - | sw TMP0, HI(RA) - | sw TMP1, LO(RA) - | b ->fff_res - |. li RD, (3+1)*8 - | - |.ffunc ipairs_aux - | sltiu AT, NARGS8:RC, 16 - | lw CARG3, HI(BASE) - | lw TAB:CARG1, LO(BASE) - | lw CARG4, 8+HI(BASE) - | bnez AT, ->fff_fallback - |. addiu CARG3, CARG3, -LJ_TTAB - | xor CARG4, CARG4, TISNUM - | and AT, CARG3, CARG4 - | bnez AT, ->fff_fallback - |. lw PC, FRAME_PC(BASE) - | lw TMP2, 8+LO(BASE) - | lw TMP0, TAB:CARG1->asize - | lw TMP1, TAB:CARG1->array - | addiu TMP2, TMP2, 1 - | sw TISNUM, -8+HI(BASE) - | sltu AT, TMP2, TMP0 - | sw TMP2, -8+LO(BASE) - | beqz AT, >2 // Not in array part? - |. addiu RA, BASE, -8 - | sll TMP3, TMP2, 3 - | addu TMP3, TMP1, TMP3 - | lw TMP1, HI(TMP3) - | lw TMP2, LO(TMP3) - |1: - | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results. - |. li RD, (0+1)*8 - | sw TMP1, 8+HI(RA) - | sw TMP2, 8+LO(RA) - | b ->fff_res - |. li RD, (2+1)*8 - | - |2: // Check for empty hash part first. Otherwise call C function. - | lw TMP0, TAB:CARG1->hmask - | load_got lj_tab_getinth - | beqz TMP0, ->fff_res - |. li RD, (0+1)*8 - | call_intern lj_tab_getinth // (GCtab *t, int32_t key) - |. move CARG2, TMP2 - | // Returns cTValue * or NULL. - | beqz CRET1, ->fff_res - |. li RD, (0+1)*8 - | lw TMP1, HI(CRET1) - | b <1 - |. lw TMP2, LO(CRET1) - | - |.ffunc_1 ipairs - | li AT, LJ_TTAB - | bne SFARG1HI, AT, ->fff_fallback - |. lw PC, FRAME_PC(BASE) -#if LJ_52 - | lw TAB:TMP2, TAB:SFARG1LO->metatable - | lw TMP0, CFUNC:RB->upvalue[0].u32.hi - | lw TMP1, CFUNC:RB->upvalue[0].u32.lo - | bnez TAB:TMP2, ->fff_fallback -#else - | lw TMP0, CFUNC:RB->upvalue[0].u32.hi - | lw TMP1, CFUNC:RB->upvalue[0].u32.lo -#endif - |. addiu RA, BASE, -8 - | sw TISNUM, 8+HI(BASE) - | sw r0, 8+LO(BASE) - | sw TMP0, HI(RA) - | sw TMP1, LO(RA) - | b ->fff_res - |. li RD, (3+1)*8 - | - |//-- Base library: catch errors ---------------------------------------- - | - |.ffunc pcall - | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | beqz NARGS8:RC, ->fff_fallback - | move TMP2, BASE - | addiu BASE, BASE, 8 - | // Remember active hook before pcall. - | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT - | andi TMP3, TMP3, 1 - | addiu PC, TMP3, 8+FRAME_PCALL - | b ->vm_call_dispatch - |. addiu NARGS8:RC, NARGS8:RC, -8 - | - |.ffunc xpcall - | sltiu AT, NARGS8:RC, 16 - | lw CARG4, 8+HI(BASE) - | bnez AT, ->fff_fallback - |. lw CARG3, 8+LO(BASE) - | lw CARG1, LO(BASE) - | lw CARG2, HI(BASE) - | lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH) - | li AT, LJ_TFUNC - | move TMP2, BASE - | bne CARG4, AT, ->fff_fallback // Traceback must be a function. - | addiu BASE, BASE, 16 - | // Remember active hook before pcall. - | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT - | sw CARG3, LO(TMP2) // Swap function and traceback. - | sw CARG4, HI(TMP2) - | andi TMP3, TMP3, 1 - | sw CARG1, 8+LO(TMP2) - | sw CARG2, 8+HI(TMP2) - | addiu PC, TMP3, 16+FRAME_PCALL - | b ->vm_call_dispatch - |. addiu NARGS8:RC, NARGS8:RC, -16 - | - |//-- Coroutine library -------------------------------------------------- - | - |.macro coroutine_resume_wrap, resume - |.if resume - |.ffunc coroutine_resume - | lw CARG3, HI(BASE) - | beqz NARGS8:RC, ->fff_fallback - |. lw CARG1, LO(BASE) - | li AT, LJ_TTHREAD - | bne CARG3, AT, ->fff_fallback - |.else - |.ffunc coroutine_wrap_aux - | lw L:CARG1, CFUNC:RB->upvalue[0].gcr - |.endif - | lbu TMP0, L:CARG1->status - | lw TMP1, L:CARG1->cframe - | lw CARG2, L:CARG1->top - | lw TMP2, L:CARG1->base - | addiu TMP3, TMP0, -LUA_YIELD - | bgtz TMP3, ->fff_fallback // st > LUA_YIELD? - |. xor TMP2, TMP2, CARG2 - | bnez TMP1, ->fff_fallback // cframe != 0? - |. or AT, TMP2, TMP0 - | lw TMP0, L:CARG1->maxstack - | beqz AT, ->fff_fallback // base == top && st == 0? - |. lw PC, FRAME_PC(BASE) - | addu TMP2, CARG2, NARGS8:RC - | sltu AT, TMP0, TMP2 - | bnez AT, ->fff_fallback // Stack overflow? - |. sw PC, SAVE_PC - | sw BASE, L->base - |1: - |.if resume - | addiu BASE, BASE, 8 // Keep resumed thread in stack for GC. - | addiu NARGS8:RC, NARGS8:RC, -8 - | addiu TMP2, TMP2, -8 - |.endif - | sw TMP2, L:CARG1->top - | addu TMP1, BASE, NARGS8:RC - | move CARG3, CARG2 - | sw BASE, L->top - |2: // Move args to coroutine. - | lw SFRETHI, HI(BASE) - | lw SFRETLO, LO(BASE) - | sltu AT, BASE, TMP1 - | beqz AT, >3 - |. addiu BASE, BASE, 8 - | sw SFRETHI, HI(CARG3) - | sw SFRETLO, LO(CARG3) - | b <2 - |. addiu CARG3, CARG3, 8 - |3: - | bal ->vm_resume // (lua_State *L, TValue *base, 0, 0) - |. move L:RA, L:CARG1 - | // Returns thread status. - |4: - | lw TMP2, L:RA->base - | sltiu AT, CRET1, LUA_YIELD+1 - | lw TMP3, L:RA->top - | li_vmstate INTERP - | lw BASE, L->base - | sw L, DISPATCH_GL(cur_L)(DISPATCH) - | st_vmstate - | beqz AT, >8 - |. subu RD, TMP3, TMP2 - | lw TMP0, L->maxstack - | beqz RD, >6 // No results? - |. addu TMP1, BASE, RD - | sltu AT, TMP0, TMP1 - | bnez AT, >9 // Need to grow stack? - |. addu TMP3, TMP2, RD - | sw TMP2, L:RA->top // Clear coroutine stack. - | move TMP1, BASE - |5: // Move results from coroutine. - | lw SFRETHI, HI(TMP2) - | lw SFRETLO, LO(TMP2) - | addiu TMP2, TMP2, 8 - | sltu AT, TMP2, TMP3 - | sw SFRETHI, HI(TMP1) - | sw SFRETLO, LO(TMP1) - | bnez AT, <5 - |. addiu TMP1, TMP1, 8 - |6: - | andi TMP0, PC, FRAME_TYPE - |.if resume - | li TMP1, LJ_TTRUE - | addiu RA, BASE, -8 - | sw TMP1, -8+HI(BASE) // Prepend true to results. - | addiu RD, RD, 16 - |.else - | move RA, BASE - | addiu RD, RD, 8 - |.endif - |7: - | sw PC, SAVE_PC - | beqz TMP0, ->BC_RET_Z - |. move MULTRES, RD - | b ->vm_return - |. nop - | - |8: // Coroutine returned with error (at co->top-1). - |.if resume - | addiu TMP3, TMP3, -8 - | li TMP1, LJ_TFALSE - | lw SFRETHI, HI(TMP3) - | lw SFRETLO, LO(TMP3) - | sw TMP3, L:RA->top // Remove error from coroutine stack. - | li RD, (2+1)*8 - | sw TMP1, -8+HI(BASE) // Prepend false to results. - | addiu RA, BASE, -8 - | sw SFRETHI, HI(BASE) // Copy error message. - | sw SFRETLO, LO(BASE) - | b <7 - |. andi TMP0, PC, FRAME_TYPE - |.else - | load_got lj_ffh_coroutine_wrap_err - | move CARG2, L:RA - | call_intern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) - |. move CARG1, L - |.endif - | - |9: // Handle stack expansion on return from yield. - | load_got lj_state_growstack - | srl CARG2, RD, 3 - | call_intern lj_state_growstack // (lua_State *L, int n) - |. move CARG1, L - | b <4 - |. li CRET1, 0 - |.endmacro - | - | coroutine_resume_wrap 1 // coroutine.resume - | coroutine_resume_wrap 0 // coroutine.wrap - | - |.ffunc coroutine_yield - | lw TMP0, L->cframe - | addu TMP1, BASE, NARGS8:RC - | sw BASE, L->base - | andi TMP0, TMP0, CFRAME_RESUME - | sw TMP1, L->top - | beqz TMP0, ->fff_fallback - |. li CRET1, LUA_YIELD - | sw r0, L->cframe - | b ->vm_leave_unw - |. sb CRET1, L->status - | - |//-- Math library ------------------------------------------------------- - | - |.ffunc_1 math_abs - | bne SFARG1HI, TISNUM, >1 - |. sra TMP0, SFARG1LO, 31 - | xor TMP1, SFARG1LO, TMP0 - | subu SFARG1LO, TMP1, TMP0 - | bgez SFARG1LO, ->fff_restv - |. nop - | lui SFARG1HI, 0x41e0 // 2^31 as a double. - | b ->fff_restv - |. li SFARG1LO, 0 - |1: - | sltiu AT, SFARG1HI, LJ_TISNUM - | beqz AT, ->fff_fallback - |. sll SFARG1HI, SFARG1HI, 1 - | srl SFARG1HI, SFARG1HI, 1 - |// fallthrough - | - |->fff_restv: - | // SFARG1LO/SFARG1HI = TValue result. - | lw PC, FRAME_PC(BASE) - | sw SFARG1HI, -8+HI(BASE) - | addiu RA, BASE, -8 - | sw SFARG1LO, -8+LO(BASE) - |->fff_res1: - | // RA = results, PC = return. - | li RD, (1+1)*8 - |->fff_res: - | // RA = results, RD = (nresults+1)*8, PC = return. - | andi TMP0, PC, FRAME_TYPE - | bnez TMP0, ->vm_return - |. move MULTRES, RD - | lw INS, -4(PC) - | decode_RB8a RB, INS - | decode_RB8b RB - |5: - | sltu AT, RD, RB - | bnez AT, >6 // More results expected? - |. decode_RA8a TMP0, INS - | decode_RA8b TMP0 - | ins_next1 - | // Adjust BASE. KBASE is assumed to be set for the calling frame. - | subu BASE, RA, TMP0 - | ins_next2 - | - |6: // Fill up results with nil. - | addu TMP1, RA, RD - | addiu RD, RD, 8 - | b <5 - |. sw TISNIL, -8+HI(TMP1) - | - |.macro math_extern, func - | .ffunc math_ .. func - | lw SFARG1HI, HI(BASE) - | beqz NARGS8:RC, ->fff_fallback - |. load_got func - | sltiu AT, SFARG1HI, LJ_TISNUM - | beqz AT, ->fff_fallback - |.if FPU - |. ldc1 FARG1, 0(BASE) - |.else - |. lw SFARG1LO, LO(BASE) - |.endif - | call_extern - |. nop - | b ->fff_resn - |. nop - |.endmacro - | - |.macro math_extern2, func - | .ffunc_nn math_ .. func - |. load_got func - | call_extern - |. nop - | b ->fff_resn - |. nop - |.endmacro - | - |// TODO: Return integer type if result is integer (own sf implementation). - |.macro math_round, func - |->ff_math_ .. func: - | lw SFARG1HI, HI(BASE) - | beqz NARGS8:RC, ->fff_fallback - |. lw SFARG1LO, LO(BASE) - | beq SFARG1HI, TISNUM, ->fff_restv - |. sltu AT, SFARG1HI, TISNUM - | beqz AT, ->fff_fallback - |.if FPU - |. ldc1 FARG1, 0(BASE) - | bal ->vm_ .. func - |.else - |. load_got func - | call_extern - |.endif - |. nop - | b ->fff_resn - |. nop - |.endmacro - | - | math_round floor - | math_round ceil - | - |.ffunc math_log - | li AT, 8 - | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. - |. lw SFARG1HI, HI(BASE) - | sltiu AT, SFARG1HI, LJ_TISNUM - | beqz AT, ->fff_fallback - |. load_got log - |.if FPU - | call_extern - |. ldc1 FARG1, 0(BASE) - |.else - | call_extern - |. lw SFARG1LO, LO(BASE) - |.endif - | b ->fff_resn - |. nop - | - | math_extern log10 - | math_extern exp - | math_extern sin - | math_extern cos - | math_extern tan - | math_extern asin - | math_extern acos - | math_extern atan - | math_extern sinh - | math_extern cosh - | math_extern tanh - | math_extern2 pow - | math_extern2 atan2 - | math_extern2 fmod - | - |.if FPU - |.ffunc_n math_sqrt - |. sqrt.d FRET1, FARG1 - |// fallthrough to ->fff_resn - |.else - | math_extern sqrt - |.endif - | - |->fff_resn: - | lw PC, FRAME_PC(BASE) - | addiu RA, BASE, -8 - |.if FPU - | b ->fff_res1 - |. sdc1 FRET1, -8(BASE) - |.else - | sw SFRETHI, -8+HI(BASE) - | b ->fff_res1 - |. sw SFRETLO, -8+LO(BASE) - |.endif - | - | - |.ffunc math_ldexp - | sltiu AT, NARGS8:RC, 16 - | lw SFARG1HI, HI(BASE) - | bnez AT, ->fff_fallback - |. lw CARG4, 8+HI(BASE) - | bne CARG4, TISNUM, ->fff_fallback - | load_got ldexp - |. sltu AT, SFARG1HI, TISNUM - | beqz AT, ->fff_fallback - |.if FPU - |. ldc1 FARG1, 0(BASE) - |.else - |. lw SFARG1LO, LO(BASE) - |.endif - | call_extern - |. lw CARG3, 8+LO(BASE) - | b ->fff_resn - |. nop - | - |.ffunc_n math_frexp - | load_got frexp - | lw PC, FRAME_PC(BASE) - | call_extern - |. addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH) - | addiu RA, BASE, -8 - |.if FPU - | mtc1 TMP1, FARG2 - | sdc1 FRET1, 0(RA) - | cvt.d.w FARG2, FARG2 - | sdc1 FARG2, 8(RA) - |.else - | sw SFRETLO, LO(RA) - | sw SFRETHI, HI(RA) - | sw TMP1, 8+LO(RA) - | sw TISNUM, 8+HI(RA) - |.endif - | b ->fff_res - |. li RD, (2+1)*8 - | - |.ffunc_n math_modf - | load_got modf - | lw PC, FRAME_PC(BASE) - | call_extern - |. addiu CARG3, BASE, -8 - | addiu RA, BASE, -8 - |.if FPU - | sdc1 FRET1, 0(BASE) - |.else - | sw SFRETLO, LO(BASE) - | sw SFRETHI, HI(BASE) - |.endif - | b ->fff_res - |. li RD, (2+1)*8 - | - |.macro math_minmax, name, intins, fpins - | .ffunc_1 name - | addu TMP3, BASE, NARGS8:RC - | bne SFARG1HI, TISNUM, >5 - |. addiu TMP2, BASE, 8 - |1: // Handle integers. - |. lw SFARG2HI, HI(TMP2) - | beq TMP2, TMP3, ->fff_restv - |. lw SFARG2LO, LO(TMP2) - | bne SFARG2HI, TISNUM, >3 - |. slt AT, SFARG1LO, SFARG2LO - | intins SFARG1LO, SFARG2LO, AT - | b <1 - |. addiu TMP2, TMP2, 8 - | - |3: // Convert intermediate result to number and continue with number loop. - | sltiu AT, SFARG2HI, LJ_TISNUM - | beqz AT, ->fff_fallback - |.if FPU - |. mtc1 SFARG1LO, FRET1 - | cvt.d.w FRET1, FRET1 - | b >7 - |. ldc1 FARG1, 0(TMP2) - |.else - |. nop - | bal ->vm_sfi2d_1 - |. nop - | b >7 - |. nop - |.endif - | - |5: - |. sltiu AT, SFARG1HI, LJ_TISNUM - | beqz AT, ->fff_fallback - |.if FPU - |. ldc1 FRET1, 0(BASE) - |.endif - | - |6: // Handle numbers. - |. lw SFARG2HI, HI(TMP2) - |.if FPU - | beq TMP2, TMP3, ->fff_resn - |.else - | beq TMP2, TMP3, ->fff_restv - |.endif - |. sltiu AT, SFARG2HI, LJ_TISNUM - | beqz AT, >8 - |.if FPU - |. ldc1 FARG1, 0(TMP2) - |.else - |. lw SFARG2LO, LO(TMP2) - |.endif - |7: - |.if FPU - | c.olt.d FRET1, FARG1 - | fpins FRET1, FARG1 - |.else - | bal ->vm_sfcmpolt - |. nop - | intins SFARG1LO, SFARG2LO, CRET1 - | intins SFARG1HI, SFARG2HI, CRET1 - |.endif - | b <6 - |. addiu TMP2, TMP2, 8 - | - |8: // Convert integer to number and continue with number loop. - | bne SFARG2HI, TISNUM, ->fff_fallback - |.if FPU - |. lwc1 FARG1, LO(TMP2) - | b <7 - |. cvt.d.w FARG1, FARG1 - |.else - |. nop - | bal ->vm_sfi2d_2 - |. nop - | b <7 - |. nop - |.endif - | - |.endmacro - | - | math_minmax math_min, movz, movf.d - | math_minmax math_max, movn, movt.d - | - |//-- String library ----------------------------------------------------- - | - |.ffunc string_byte // Only handle the 1-arg case here. - | lw CARG3, HI(BASE) - | lw STR:CARG1, LO(BASE) - | xori AT, NARGS8:RC, 8 - | addiu CARG3, CARG3, -LJ_TSTR - | or AT, AT, CARG3 - | bnez AT, ->fff_fallback // Need exactly 1 string argument. - |. nop - | lw TMP0, STR:CARG1->len - | addiu RA, BASE, -8 - | lw PC, FRAME_PC(BASE) - | sltu RD, r0, TMP0 - | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end). - | addiu RD, RD, 1 - | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8 - | sw TISNUM, HI(RA) - | b ->fff_res - |. sw TMP1, LO(RA) - | - |.ffunc string_char // Only handle the 1-arg case here. - | ffgccheck - |. nop - | lw CARG3, HI(BASE) - | lw CARG1, LO(BASE) - | li TMP1, 255 - | xori AT, NARGS8:RC, 8 // Exactly 1 argument. - | xor TMP0, CARG3, TISNUM // Integer. - | sltu TMP1, TMP1, CARG1 // !(255 < n). - | or AT, AT, TMP0 - | or AT, AT, TMP1 - | bnez AT, ->fff_fallback - |. li CARG3, 1 - | addiu CARG2, sp, ARG5_OFS - | sb CARG1, ARG5 - |->fff_newstr: - | load_got lj_str_new - | sw BASE, L->base - | sw PC, SAVE_PC - | call_intern lj_str_new // (lua_State *L, char *str, size_t l) - |. move CARG1, L - | // Returns GCstr *. - | lw BASE, L->base - |->fff_resstr: - | move SFARG1LO, CRET1 - | b ->fff_restv - |. li SFARG1HI, LJ_TSTR - | - |.ffunc string_sub - | ffgccheck - |. nop - | addiu AT, NARGS8:RC, -16 - | lw CARG3, 16+HI(BASE) - | lw TMP0, HI(BASE) - | lw STR:CARG1, LO(BASE) - | bltz AT, ->fff_fallback - |. lw CARG2, 8+HI(BASE) - | beqz AT, >1 - |. li CARG4, -1 - | bne CARG3, TISNUM, ->fff_fallback - |. lw CARG4, 16+LO(BASE) - |1: - | bne CARG2, TISNUM, ->fff_fallback - |. li AT, LJ_TSTR - | bne TMP0, AT, ->fff_fallback - |. lw CARG3, 8+LO(BASE) - | lw CARG2, STR:CARG1->len - | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end - | slt AT, CARG4, r0 - | addiu TMP0, CARG2, 1 - | addu TMP1, CARG4, TMP0 - | slt TMP3, CARG3, r0 - | movn CARG4, TMP1, AT // if (end < 0) end += len+1 - | addu TMP1, CARG3, TMP0 - | movn CARG3, TMP1, TMP3 // if (start < 0) start += len+1 - | li TMP2, 1 - | slt AT, CARG4, r0 - | slt TMP3, r0, CARG3 - | movn CARG4, r0, AT // if (end < 0) end = 0 - | movz CARG3, TMP2, TMP3 // if (start < 1) start = 1 - | slt AT, CARG2, CARG4 - | movn CARG4, CARG2, AT // if (end > len) end = len - | addu CARG2, STR:CARG1, CARG3 - | subu CARG3, CARG4, CARG3 // len = end - start - | addiu CARG2, CARG2, sizeof(GCstr)-1 - | bgez CARG3, ->fff_newstr - |. addiu CARG3, CARG3, 1 // len++ - |->fff_emptystr: // Return empty string. - | addiu STR:SFARG1LO, DISPATCH, DISPATCH_GL(strempty) - | b ->fff_restv - |. li SFARG1HI, LJ_TSTR - | - |.macro ffstring_op, name - | .ffunc string_ .. name - | ffgccheck - |. nop - | lw CARG3, HI(BASE) - | lw STR:CARG2, LO(BASE) - | beqz NARGS8:RC, ->fff_fallback - |. li AT, LJ_TSTR - | bne CARG3, AT, ->fff_fallback - |. addiu SBUF:CARG1, DISPATCH, DISPATCH_GL(tmpbuf) - | load_got lj_buf_putstr_ .. name - | lw TMP0, SBUF:CARG1->b - | sw L, SBUF:CARG1->L - | sw BASE, L->base - | sw TMP0, SBUF:CARG1->p - | call_intern extern lj_buf_putstr_ .. name - |. sw PC, SAVE_PC - | load_got lj_buf_tostr - | call_intern lj_buf_tostr - |. move SBUF:CARG1, SBUF:CRET1 - | b ->fff_resstr - |. lw BASE, L->base - |.endmacro - | - |ffstring_op reverse - |ffstring_op lower - |ffstring_op upper - | - |//-- Bit library -------------------------------------------------------- - | - |->vm_tobit_fb: - | beqz TMP1, ->fff_fallback - |.if FPU - |. ldc1 FARG1, 0(BASE) - | add.d FARG1, FARG1, TOBIT - | jr ra - |. mfc1 CRET1, FARG1 - |.else - |// FP number to bit conversion for soft-float. - |->vm_tobit: - | sll TMP0, SFARG1HI, 1 - | lui AT, 0x0020 - | addu TMP0, TMP0, AT - | slt AT, TMP0, r0 - | movz SFARG1LO, r0, AT - | beqz AT, >2 - |. li TMP1, 0x3e0 - | not TMP1, TMP1 - | sra TMP0, TMP0, 21 - | subu TMP0, TMP1, TMP0 - | slt AT, TMP0, r0 - | bnez AT, >1 - |. sll TMP1, SFARG1HI, 11 - | lui AT, 0x8000 - | or TMP1, TMP1, AT - | srl AT, SFARG1LO, 21 - | or TMP1, TMP1, AT - | slt AT, SFARG1HI, r0 - | beqz AT, >2 - |. srlv SFARG1LO, TMP1, TMP0 - | subu SFARG1LO, r0, SFARG1LO - |2: - | jr ra - |. move CRET1, SFARG1LO - |1: - | addiu TMP0, TMP0, 21 - | srlv TMP1, SFARG1LO, TMP0 - | li AT, 20 - | subu TMP0, AT, TMP0 - | sll SFARG1LO, SFARG1HI, 12 - | sllv AT, SFARG1LO, TMP0 - | or SFARG1LO, TMP1, AT - | slt AT, SFARG1HI, r0 - | beqz AT, <2 - |. nop - | jr ra - |. subu CRET1, r0, SFARG1LO - |.endif - | - |.macro .ffunc_bit, name - | .ffunc_1 bit_..name - | beq SFARG1HI, TISNUM, >6 - |. move CRET1, SFARG1LO - | bal ->vm_tobit_fb - |. sltu TMP1, SFARG1HI, TISNUM - |6: - |.endmacro - | - |.macro .ffunc_bit_op, name, ins - | .ffunc_bit name - | addiu TMP2, BASE, 8 - | addu TMP3, BASE, NARGS8:RC - |1: - | lw SFARG1HI, HI(TMP2) - | beq TMP2, TMP3, ->fff_resi - |. lw SFARG1LO, LO(TMP2) - |.if FPU - | bne SFARG1HI, TISNUM, >2 - |. addiu TMP2, TMP2, 8 - | b <1 - |. ins CRET1, CRET1, SFARG1LO - |2: - | ldc1 FARG1, -8(TMP2) - | sltu TMP1, SFARG1HI, TISNUM - | beqz TMP1, ->fff_fallback - |. add.d FARG1, FARG1, TOBIT - | mfc1 SFARG1LO, FARG1 - | b <1 - |. ins CRET1, CRET1, SFARG1LO - |.else - | beq SFARG1HI, TISNUM, >2 - |. move CRET2, CRET1 - | bal ->vm_tobit_fb - |. sltu TMP1, SFARG1HI, TISNUM - | move SFARG1LO, CRET2 - |2: - | ins CRET1, CRET1, SFARG1LO - | b <1 - |. addiu TMP2, TMP2, 8 - |.endif - |.endmacro - | - |.ffunc_bit_op band, and - |.ffunc_bit_op bor, or - |.ffunc_bit_op bxor, xor - | - |.ffunc_bit bswap - | srl TMP0, CRET1, 24 - | srl TMP2, CRET1, 8 - | sll TMP1, CRET1, 24 - | andi TMP2, TMP2, 0xff00 - | or TMP0, TMP0, TMP1 - | andi CRET1, CRET1, 0xff00 - | or TMP0, TMP0, TMP2 - | sll CRET1, CRET1, 8 - | b ->fff_resi - |. or CRET1, TMP0, CRET1 - | - |.ffunc_bit bnot - | b ->fff_resi - |. not CRET1, CRET1 - | - |.macro .ffunc_bit_sh, name, ins, shmod - | .ffunc_2 bit_..name - | beq SFARG1HI, TISNUM, >1 - |. nop - | bal ->vm_tobit_fb - |. sltu TMP1, SFARG1HI, TISNUM - | move SFARG1LO, CRET1 - |1: - | bne SFARG2HI, TISNUM, ->fff_fallback - |. nop - |.if shmod == 1 - | li AT, 32 - | subu TMP0, AT, SFARG2LO - | sllv SFARG2LO, SFARG1LO, SFARG2LO - | srlv SFARG1LO, SFARG1LO, TMP0 - |.elif shmod == 2 - | li AT, 32 - | subu TMP0, AT, SFARG2LO - | srlv SFARG2LO, SFARG1LO, SFARG2LO - | sllv SFARG1LO, SFARG1LO, TMP0 - |.endif - | b ->fff_resi - |. ins CRET1, SFARG1LO, SFARG2LO - |.endmacro - | - |.ffunc_bit_sh lshift, sllv, 0 - |.ffunc_bit_sh rshift, srlv, 0 - |.ffunc_bit_sh arshift, srav, 0 - |// Can't use rotrv, since it's only in MIPS32R2. - |.ffunc_bit_sh rol, or, 1 - |.ffunc_bit_sh ror, or, 2 - | - |.ffunc_bit tobit - |->fff_resi: - | lw PC, FRAME_PC(BASE) - | addiu RA, BASE, -8 - | sw TISNUM, -8+HI(BASE) - | b ->fff_res1 - |. sw CRET1, -8+LO(BASE) - | - |//----------------------------------------------------------------------- - | - |->fff_fallback: // Call fast function fallback handler. - | // BASE = new base, RB = CFUNC, RC = nargs*8 - | lw TMP3, CFUNC:RB->f - | addu TMP1, BASE, NARGS8:RC - | lw PC, FRAME_PC(BASE) // Fallback may overwrite PC. - | addiu TMP0, TMP1, 8*LUA_MINSTACK - | lw TMP2, L->maxstack - | sw PC, SAVE_PC // Redundant (but a defined value). - | sltu AT, TMP2, TMP0 - | sw BASE, L->base - | sw TMP1, L->top - | bnez AT, >5 // Need to grow stack. - |. move CFUNCADDR, TMP3 - | jalr TMP3 // (lua_State *L) - |. move CARG1, L - | // Either throws an error, or recovers and returns -1, 0 or nresults+1. - | lw BASE, L->base - | sll RD, CRET1, 3 - | bgtz CRET1, ->fff_res // Returned nresults+1? - |. addiu RA, BASE, -8 - |1: // Returned 0 or -1: retry fast path. - | lw TMP0, L->top - | lw LFUNC:RB, FRAME_FUNC(BASE) - | bnez CRET1, ->vm_call_tail // Returned -1? - |. subu NARGS8:RC, TMP0, BASE - | ins_callt // Returned 0: retry fast path. - | - |// Reconstruct previous base for vmeta_call during tailcall. - |->vm_call_tail: - | andi TMP0, PC, FRAME_TYPE - | li AT, -4 - | bnez TMP0, >3 - |. and TMP1, PC, AT - | lbu TMP1, OFS_RA(PC) - | sll TMP1, TMP1, 3 - | addiu TMP1, TMP1, 8 - |3: - | b ->vm_call_dispatch // Resolve again for tailcall. - |. subu TMP2, BASE, TMP1 - | - |5: // Grow stack for fallback handler. - | load_got lj_state_growstack - | li CARG2, LUA_MINSTACK - | call_intern lj_state_growstack // (lua_State *L, int n) - |. move CARG1, L - | lw BASE, L->base - | b <1 - |. li CRET1, 0 // Force retry. - | - |->fff_gcstep: // Call GC step function. - | // BASE = new base, RC = nargs*8 - | move MULTRES, ra - | load_got lj_gc_step - | sw BASE, L->base - | addu TMP0, BASE, NARGS8:RC - | sw PC, SAVE_PC // Redundant (but a defined value). - | sw TMP0, L->top - | call_intern lj_gc_step // (lua_State *L) - |. move CARG1, L - | lw BASE, L->base - | move ra, MULTRES - | lw TMP0, L->top - | lw CFUNC:RB, FRAME_FUNC(BASE) - | jr ra - |. subu NARGS8:RC, TMP0, BASE - | - |//----------------------------------------------------------------------- - |//-- Special dispatch targets ------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_record: // Dispatch target for recording phase. - |.if JIT - | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | andi AT, TMP3, HOOK_VMEVENT // No recording while in vmevent. - | bnez AT, >5 - | // Decrement the hookcount for consistency, but always do the call. - |. lw TMP2, DISPATCH_GL(hookcount)(DISPATCH) - | andi AT, TMP3, HOOK_ACTIVE - | bnez AT, >1 - |. addiu TMP2, TMP2, -1 - | andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT - | beqz AT, >1 - |. nop - | b >1 - |. sw TMP2, DISPATCH_GL(hookcount)(DISPATCH) - |.endif - | - |->vm_rethook: // Dispatch target for return hooks. - | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | andi AT, TMP3, HOOK_ACTIVE // Hook already active? - | beqz AT, >1 - |5: // Re-dispatch to static ins. - |. lw AT, GG_DISP2STATIC(TMP0) // Assumes TMP0 holds DISPATCH+OP*4. - | jr AT - |. nop - | - |->vm_inshook: // Dispatch target for instr/line hooks. - | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | lw TMP2, DISPATCH_GL(hookcount)(DISPATCH) - | andi AT, TMP3, HOOK_ACTIVE // Hook already active? - | bnez AT, <5 - |. andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT - | beqz AT, <5 - |. addiu TMP2, TMP2, -1 - | beqz TMP2, >1 - |. sw TMP2, DISPATCH_GL(hookcount)(DISPATCH) - | andi AT, TMP3, LUA_MASKLINE - | beqz AT, <5 - |1: - |. load_got lj_dispatch_ins - | sw MULTRES, SAVE_MULTRES - | move CARG2, PC - | sw BASE, L->base - | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. - | call_intern lj_dispatch_ins // (lua_State *L, const BCIns *pc) - |. move CARG1, L - |3: - | lw BASE, L->base - |4: // Re-dispatch to static ins. - | lw INS, -4(PC) - | decode_OP4a TMP1, INS - | decode_OP4b TMP1 - | addu TMP0, DISPATCH, TMP1 - | decode_RD8a RD, INS - | lw AT, GG_DISP2STATIC(TMP0) - | decode_RA8a RA, INS - | decode_RD8b RD - | jr AT - | decode_RA8b RA - | - |->cont_hook: // Continue from hook yield. - | addiu PC, PC, 4 - | b <4 - |. lw MULTRES, -24+LO(RB) // Restore MULTRES for *M ins. - | - |->vm_hotloop: // Hot loop counter underflow. - |.if JIT - | lw LFUNC:TMP1, FRAME_FUNC(BASE) - | addiu CARG1, DISPATCH, GG_DISP2J - | sw PC, SAVE_PC - | lw TMP1, LFUNC:TMP1->pc - | move CARG2, PC - | sw L, DISPATCH_J(L)(DISPATCH) - | lbu TMP1, PC2PROTO(framesize)(TMP1) - | load_got lj_trace_hot - | sw BASE, L->base - | sll TMP1, TMP1, 3 - | addu TMP1, BASE, TMP1 - | call_intern lj_trace_hot // (jit_State *J, const BCIns *pc) - |. sw TMP1, L->top - | b <3 - |. nop - |.endif - | - |->vm_callhook: // Dispatch target for call hooks. - |.if JIT - | b >1 - |.endif - |. move CARG2, PC - | - |->vm_hotcall: // Hot call counter underflow. - |.if JIT - | ori CARG2, PC, 1 - |1: - |.endif - | load_got lj_dispatch_call - | addu TMP0, BASE, RC - | sw PC, SAVE_PC - | sw BASE, L->base - | subu RA, RA, BASE - | sw TMP0, L->top - | call_intern lj_dispatch_call // (lua_State *L, const BCIns *pc) - |. move CARG1, L - | // Returns ASMFunction. - | lw BASE, L->base - | lw TMP0, L->top - | sw r0, SAVE_PC // Invalidate for subsequent line hook. - | subu NARGS8:RC, TMP0, BASE - | addu RA, BASE, RA - | lw LFUNC:RB, FRAME_FUNC(BASE) - | jr CRET1 - |. lw INS, -4(PC) - | - |->cont_stitch: // Trace stitching. - |.if JIT - | // RA = resultptr, RB = meta base - | lw INS, -4(PC) - | lw TMP2, -24+LO(RB) // Save previous trace. - | decode_RA8a RC, INS - | addiu AT, MULTRES, -8 - | decode_RA8b RC - | beqz AT, >2 - |. addu RC, BASE, RC // Call base. - |1: // Move results down. - | lw SFRETHI, HI(RA) - | lw SFRETLO, LO(RA) - | addiu AT, AT, -8 - | addiu RA, RA, 8 - | sw SFRETHI, HI(RC) - | sw SFRETLO, LO(RC) - | bnez AT, <1 - |. addiu RC, RC, 8 - |2: - | decode_RA8a RA, INS - | decode_RB8a RB, INS - | decode_RA8b RA - | decode_RB8b RB - | addu RA, RA, RB - | addu RA, BASE, RA - |3: - | sltu AT, RC, RA - | bnez AT, >9 // More results wanted? - |. nop - | - | lhu TMP3, TRACE:TMP2->traceno - | lhu RD, TRACE:TMP2->link - | beq RD, TMP3, ->cont_nop // Blacklisted. - |. load_got lj_dispatch_stitch - | bnez RD, =>BC_JLOOP // Jump to stitched trace. - |. sll RD, RD, 3 - | - | // Stitch a new trace to the previous trace. - | sw TMP3, DISPATCH_J(exitno)(DISPATCH) - | sw L, DISPATCH_J(L)(DISPATCH) - | sw BASE, L->base - | addiu CARG1, DISPATCH, GG_DISP2J - | call_intern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) - |. move CARG2, PC - | b ->cont_nop - |. lw BASE, L->base - | - |9: - | sw TISNIL, HI(RC) - | b <3 - |. addiu RC, RC, 8 - |.endif - | - |->vm_profhook: // Dispatch target for profiler hook. -#if LJ_HASPROFILE - | load_got lj_dispatch_profile - | sw MULTRES, SAVE_MULTRES - | move CARG2, PC - | sw BASE, L->base - | call_intern lj_dispatch_profile // (lua_State *L, const BCIns *pc) - |. move CARG1, L - | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. - | addiu PC, PC, -4 - | b ->cont_nop - |. lw BASE, L->base -#endif - | - |//----------------------------------------------------------------------- - |//-- Trace exit handler ------------------------------------------------- - |//----------------------------------------------------------------------- - | - |.macro savex_, a, b - |.if FPU - | sdc1 f..a, 16+a*8(sp) - | sw r..a, 16+32*8+a*4(sp) - | sw r..b, 16+32*8+b*4(sp) - |.else - | sw r..a, 16+a*4(sp) - | sw r..b, 16+b*4(sp) - |.endif - |.endmacro - | - |->vm_exit_handler: - |.if JIT - |.if FPU - | addiu sp, sp, -(16+32*8+32*4) - |.else - | addiu sp, sp, -(16+32*4) - |.endif - | savex_ 0, 1 - | savex_ 2, 3 - | savex_ 4, 5 - | savex_ 6, 7 - | savex_ 8, 9 - | savex_ 10, 11 - | savex_ 12, 13 - | savex_ 14, 15 - | savex_ 16, 17 - | savex_ 18, 19 - | savex_ 20, 21 - | savex_ 22, 23 - | savex_ 24, 25 - | savex_ 26, 27 - |.if FPU - | sdc1 f28, 16+28*8(sp) - | sdc1 f30, 16+30*8(sp) - | sw r28, 16+32*8+28*4(sp) - | sw r30, 16+32*8+30*4(sp) - | sw r0, 16+32*8+31*4(sp) // Clear RID_TMP. - | addiu TMP2, sp, 16+32*8+32*4 // Recompute original value of sp. - | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP - |.else - | sw r28, 16+28*4(sp) - | sw r30, 16+30*4(sp) - | sw r0, 16+31*4(sp) // Clear RID_TMP. - | addiu TMP2, sp, 16+32*4 // Recompute original value of sp. - | sw TMP2, 16+29*4(sp) // Store sp in RID_SP - |.endif - | li_vmstate EXIT - | addiu DISPATCH, JGL, -GG_DISP2G-32768 - | lw TMP1, 0(TMP2) // Load exit number. - | st_vmstate - | lw L, DISPATCH_GL(cur_L)(DISPATCH) - | lw BASE, DISPATCH_GL(jit_base)(DISPATCH) - | load_got lj_trace_exit - | sw L, DISPATCH_J(L)(DISPATCH) - | sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number. - | sw BASE, L->base - | sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number. - | addiu CARG1, DISPATCH, GG_DISP2J - | sw r0, DISPATCH_GL(jit_base)(DISPATCH) - | call_intern lj_trace_exit // (jit_State *J, ExitState *ex) - |. addiu CARG2, sp, 16 - | // Returns MULTRES (unscaled) or negated error code. - | lw TMP1, L->cframe - | li AT, -4 - | lw BASE, L->base - | and sp, TMP1, AT - | lw PC, SAVE_PC // Get SAVE_PC. - | b >1 - |. sw L, SAVE_L // Set SAVE_L (on-trace resume/yield). - |.endif - |->vm_exit_interp: - |.if JIT - | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set. - | lw L, SAVE_L - | addiu DISPATCH, JGL, -GG_DISP2G-32768 - | sw BASE, L->base - |1: - | bltz CRET1, >9 // Check for error from exit. - |. lw LFUNC:RB, FRAME_FUNC(BASE) - | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | sll MULTRES, CRET1, 3 - | li TISNIL, LJ_TNIL - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | sw MULTRES, SAVE_MULTRES - | .FPU mtc1 TMP3, TOBIT - | lw TMP1, LFUNC:RB->pc - | sw r0, DISPATCH_GL(jit_base)(DISPATCH) - | lw KBASE, PC2PROTO(k)(TMP1) - | .FPU cvt.d.s TOBIT, TOBIT - | // Modified copy of ins_next which handles function header dispatch, too. - | lw INS, 0(PC) - | addiu PC, PC, 4 - | // Assumes TISNIL == ~LJ_VMST_INTERP == -1 - | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) - | decode_OP4a TMP1, INS - | decode_OP4b TMP1 - | sltiu TMP2, TMP1, BC_FUNCF*4 - | addu TMP0, DISPATCH, TMP1 - | decode_RD8a RD, INS - | lw AT, 0(TMP0) - | decode_RA8a RA, INS - | beqz TMP2, >2 - |. decode_RA8b RA - | jr AT - |. decode_RD8b RD - |2: - | sltiu TMP2, TMP1, (BC_FUNCC+2)*4 // Fast function? - | bnez TMP2, >3 - |. lw TMP1, FRAME_PC(BASE) - | // Check frame below fast function. - | andi TMP0, TMP1, FRAME_TYPE - | bnez TMP0, >3 // Trace stitching continuation? - |. nop - | // Otherwise set KBASE for Lua function below fast function. - | lw TMP2, -4(TMP1) - | decode_RA8a TMP0, TMP2 - | decode_RA8b TMP0 - | subu TMP1, BASE, TMP0 - | lw LFUNC:TMP2, -8+FRAME_FUNC(TMP1) - | lw TMP1, LFUNC:TMP2->pc - | lw KBASE, PC2PROTO(k)(TMP1) - |3: - | addiu RC, MULTRES, -8 - | jr AT - |. addu RA, RA, BASE - | - |9: // Rethrow error from the right C frame. - | load_got lj_err_throw - | negu CARG2, CRET1 - | call_intern lj_err_throw // (lua_State *L, int errcode) - |. move CARG1, L - |.endif - | - |//----------------------------------------------------------------------- - |//-- Math helper functions ---------------------------------------------- - |//----------------------------------------------------------------------- - | - |// Hard-float round to integer. - |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1. - |.macro vm_round_hf, func - | lui TMP0, 0x4330 // Hiword of 2^52 (double). - | mtc1 r0, f4 - | mtc1 TMP0, f5 - | abs.d FRET2, FARG1 // |x| - | mfc1 AT, f13 - | c.olt.d 0, FRET2, f4 - | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52 - | bc1f 0, >1 // Truncate only if |x| < 2^52. - |. sub.d FRET1, FRET1, f4 - | slt AT, AT, r0 - |.if "func" == "ceil" - | lui TMP0, 0xbff0 // Hiword of -1 (double). Preserves -0. - |.else - | lui TMP0, 0x3ff0 // Hiword of +1 (double). - |.endif - |.if "func" == "trunc" - | mtc1 TMP0, f5 - | c.olt.d 0, FRET2, FRET1 // |x| < result? - | sub.d FRET2, FRET1, f4 - | movt.d FRET1, FRET2, 0 // If yes, subtract +1. - | neg.d FRET2, FRET1 - | jr ra - |. movn.d FRET1, FRET2, AT // Merge sign bit back in. - |.else - | neg.d FRET2, FRET1 - | mtc1 TMP0, f5 - | movn.d FRET1, FRET2, AT // Merge sign bit back in. - |.if "func" == "ceil" - | c.olt.d 0, FRET1, FARG1 // x > result? - |.else - | c.olt.d 0, FARG1, FRET1 // x < result? - |.endif - | sub.d FRET2, FRET1, f4 // If yes, subtract +-1. - | jr ra - |. movt.d FRET1, FRET2, 0 - |.endif - |1: - | jr ra - |. mov.d FRET1, FARG1 - |.endmacro - | - |.macro vm_round, func - |.if FPU - | vm_round_hf, func - |.endif - |.endmacro - | - |->vm_floor: - | vm_round floor - |->vm_ceil: - | vm_round ceil - |->vm_trunc: - |.if JIT - | vm_round trunc - |.endif - | - |// Soft-float integer to number conversion. - |.macro sfi2d, AHI, ALO - |.if not FPU - | beqz ALO, >9 // Handle zero first. - |. sra TMP0, ALO, 31 - | xor TMP1, ALO, TMP0 - | subu TMP1, TMP1, TMP0 // Absolute value in TMP1. - | clz AHI, TMP1 - | andi TMP0, TMP0, 0x800 // Mask sign bit. - | li AT, 0x3ff+31-1 - | sllv TMP1, TMP1, AHI // Align mantissa left with leading 1. - | subu AHI, AT, AHI // Exponent - 1 in AHI. - | sll ALO, TMP1, 21 - | or AHI, AHI, TMP0 // Sign | Exponent. - | srl TMP1, TMP1, 11 - | sll AHI, AHI, 20 // Align left. - | jr ra - |. addu AHI, AHI, TMP1 // Add mantissa, increment exponent. - |9: - | jr ra - |. li AHI, 0 - |.endif - |.endmacro - | - |// Input SFARG1LO. Output: SFARG1*. Temporaries: AT, TMP0, TMP1. - |->vm_sfi2d_1: - | sfi2d SFARG1HI, SFARG1LO - | - |// Input SFARG2LO. Output: SFARG2*. Temporaries: AT, TMP0, TMP1. - |->vm_sfi2d_2: - | sfi2d SFARG2HI, SFARG2LO - | - |// Soft-float comparison. Equivalent to c.eq.d. - |// Input: SFARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1. - |->vm_sfcmpeq: - |.if not FPU - | sll AT, SFARG1HI, 1 - | sll TMP0, SFARG2HI, 1 - | or CRET1, SFARG1LO, SFARG2LO - | or TMP1, AT, TMP0 - | or TMP1, TMP1, CRET1 - | beqz TMP1, >8 // Both args +-0: return 1. - |. sltu CRET1, r0, SFARG1LO - | lui TMP1, 0xffe0 - | addu AT, AT, CRET1 - | sltu CRET1, r0, SFARG2LO - | sltu AT, TMP1, AT - | addu TMP0, TMP0, CRET1 - | sltu TMP0, TMP1, TMP0 - | or TMP1, AT, TMP0 - | bnez TMP1, >9 // Either arg is NaN: return 0; - |. xor TMP0, SFARG1HI, SFARG2HI - | xor TMP1, SFARG1LO, SFARG2LO - | or AT, TMP0, TMP1 - | jr ra - |. sltiu CRET1, AT, 1 // Same values: return 1. - |8: - | jr ra - |. li CRET1, 1 - |9: - | jr ra - |. li CRET1, 0 - |.endif - | - |// Soft-float comparison. Equivalent to c.ult.d and c.olt.d. - |// Input: SFARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1, CRET2. - |->vm_sfcmpult: - |.if not FPU - | b >1 - |. li CRET2, 1 - |.endif - | - |->vm_sfcmpolt: - |.if not FPU - | li CRET2, 0 - |1: - | sll AT, SFARG1HI, 1 - | sll TMP0, SFARG2HI, 1 - | or CRET1, SFARG1LO, SFARG2LO - | or TMP1, AT, TMP0 - | or TMP1, TMP1, CRET1 - | beqz TMP1, >8 // Both args +-0: return 0. - |. sltu CRET1, r0, SFARG1LO - | lui TMP1, 0xffe0 - | addu AT, AT, CRET1 - | sltu CRET1, r0, SFARG2LO - | sltu AT, TMP1, AT - | addu TMP0, TMP0, CRET1 - | sltu TMP0, TMP1, TMP0 - | or TMP1, AT, TMP0 - | bnez TMP1, >9 // Either arg is NaN: return 0 or 1; - |. and AT, SFARG1HI, SFARG2HI - | bltz AT, >5 // Both args negative? - |. nop - | beq SFARG1HI, SFARG2HI, >8 - |. sltu CRET1, SFARG1LO, SFARG2LO - | jr ra - |. slt CRET1, SFARG1HI, SFARG2HI - |5: // Swap conditions if both operands are negative. - | beq SFARG1HI, SFARG2HI, >8 - |. sltu CRET1, SFARG2LO, SFARG1LO - | jr ra - |. slt CRET1, SFARG2HI, SFARG1HI - |8: - | jr ra - |. nop - |9: - | jr ra - |. move CRET1, CRET2 - |.endif - | - |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a. - |// Input: SFARG*, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1. - |->vm_sfcmpolex: - |.if not FPU - | sll AT, SFARG1HI, 1 - | sll TMP0, SFARG2HI, 1 - | or CRET1, SFARG1LO, SFARG2LO - | or TMP1, AT, TMP0 - | or TMP1, TMP1, CRET1 - | beqz TMP1, >8 // Both args +-0: return 1. - |. sltu CRET1, r0, SFARG1LO - | lui TMP1, 0xffe0 - | addu AT, AT, CRET1 - | sltu CRET1, r0, SFARG2LO - | sltu AT, TMP1, AT - | addu TMP0, TMP0, CRET1 - | sltu TMP0, TMP1, TMP0 - | or TMP1, AT, TMP0 - | bnez TMP1, >9 // Either arg is NaN: return 0; - |. and AT, SFARG1HI, SFARG2HI - | xor AT, AT, TMP3 - | bltz AT, >5 // Both args negative? - |. nop - | beq SFARG1HI, SFARG2HI, >6 - |. sltu CRET1, SFARG2LO, SFARG1LO - | jr ra - |. slt CRET1, SFARG2HI, SFARG1HI - |5: // Swap conditions if both operands are negative. - | beq SFARG1HI, SFARG2HI, >6 - |. sltu CRET1, SFARG1LO, SFARG2LO - | slt CRET1, SFARG1HI, SFARG2HI - |6: - | jr ra - |. nop - |8: - | jr ra - |. li CRET1, 1 - |9: - | jr ra - |. li CRET1, 0 - |.endif - | - |.macro sfmin_max, name, intins - |->vm_sf .. name: - |.if JIT and not FPU - | move TMP2, ra - | bal ->vm_sfcmpolt - |. nop - | move TMP0, CRET1 - | move SFRETHI, SFARG1HI - | move SFRETLO, SFARG1LO - | move ra, TMP2 - | intins SFRETHI, SFARG2HI, TMP0 - | jr ra - |. intins SFRETLO, SFARG2LO, TMP0 - |.endif - |.endmacro - | - | sfmin_max min, movz - | sfmin_max max, movn - | - |//----------------------------------------------------------------------- - |//-- Miscellaneous functions -------------------------------------------- - |//----------------------------------------------------------------------- - | - |//----------------------------------------------------------------------- - |//-- FFI helper functions ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |// Handler for callback functions. Callback slot number in r1, g in r2. - |->vm_ffi_callback: - |.if FFI - |.type CTSTATE, CTState, PC - | saveregs - | lw CTSTATE, GL:r2->ctype_state - | addiu DISPATCH, r2, GG_G2DISP - | load_got lj_ccallback_enter - | sw r1, CTSTATE->cb.slot - | sw CARG1, CTSTATE->cb.gpr[0] - | sw CARG2, CTSTATE->cb.gpr[1] - | .FPU sdc1 FARG1, CTSTATE->cb.fpr[0] - | sw CARG3, CTSTATE->cb.gpr[2] - | sw CARG4, CTSTATE->cb.gpr[3] - | .FPU sdc1 FARG2, CTSTATE->cb.fpr[1] - | addiu TMP0, sp, CFRAME_SPACE+16 - | sw TMP0, CTSTATE->cb.stack - | sw r0, SAVE_PC // Any value outside of bytecode is ok. - | move CARG2, sp - | call_intern lj_ccallback_enter // (CTState *cts, void *cf) - |. move CARG1, CTSTATE - | // Returns lua_State *. - | lw BASE, L:CRET1->base - | lw RC, L:CRET1->top - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | move L, CRET1 - | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | lw LFUNC:RB, FRAME_FUNC(BASE) - | .FPU mtc1 TMP3, TOBIT - | li_vmstate INTERP - | li TISNIL, LJ_TNIL - | subu RC, RC, BASE - | st_vmstate - | .FPU cvt.d.s TOBIT, TOBIT - | ins_callt - |.endif - | - |->cont_ffi_callback: // Return from FFI callback. - |.if FFI - | load_got lj_ccallback_leave - | lw CTSTATE, DISPATCH_GL(ctype_state)(DISPATCH) - | sw BASE, L->base - | sw RB, L->top - | sw L, CTSTATE->L - | move CARG2, RA - | call_intern lj_ccallback_leave // (CTState *cts, TValue *o) - |. move CARG1, CTSTATE - | .FPU ldc1 FRET1, CTSTATE->cb.fpr[0] - | lw CRET1, CTSTATE->cb.gpr[0] - | .FPU ldc1 FRET2, CTSTATE->cb.fpr[1] - | b ->vm_leave_unw - |. lw CRET2, CTSTATE->cb.gpr[1] - |.endif - | - |->vm_ffi_call: // Call C function via FFI. - | // Caveat: needs special frame unwinding, see below. - |.if FFI - | .type CCSTATE, CCallState, CARG1 - | lw TMP1, CCSTATE->spadj - | lbu CARG2, CCSTATE->nsp - | move TMP2, sp - | subu sp, sp, TMP1 - | sw ra, -4(TMP2) - | sll CARG2, CARG2, 2 - | sw r16, -8(TMP2) - | sw CCSTATE, -12(TMP2) - | move r16, TMP2 - | addiu TMP1, CCSTATE, offsetof(CCallState, stack) - | addiu TMP2, sp, 16 - | beqz CARG2, >2 - |. addu TMP3, TMP1, CARG2 - |1: - | lw TMP0, 0(TMP1) - | addiu TMP1, TMP1, 4 - | sltu AT, TMP1, TMP3 - | sw TMP0, 0(TMP2) - | bnez AT, <1 - |. addiu TMP2, TMP2, 4 - |2: - | lw CFUNCADDR, CCSTATE->func - | lw CARG2, CCSTATE->gpr[1] - | lw CARG3, CCSTATE->gpr[2] - | lw CARG4, CCSTATE->gpr[3] - | .FPU ldc1 FARG1, CCSTATE->fpr[0] - | .FPU ldc1 FARG2, CCSTATE->fpr[1] - | jalr CFUNCADDR - |. lw CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. - | lw CCSTATE:TMP1, -12(r16) - | lw TMP2, -8(r16) - | lw ra, -4(r16) - | sw CRET1, CCSTATE:TMP1->gpr[0] - | sw CRET2, CCSTATE:TMP1->gpr[1] - |.if FPU - | sdc1 FRET1, CCSTATE:TMP1->fpr[0] - | sdc1 FRET2, CCSTATE:TMP1->fpr[1] - |.else - | sw CARG1, CCSTATE:TMP1->gpr[2] // Soft-float: complex double .im part. - | sw CARG2, CCSTATE:TMP1->gpr[3] - |.endif - | move sp, r16 - | jr ra - |. move r16, TMP2 - |.endif - |// Note: vm_ffi_call must be the last function in this object file! - | - |//----------------------------------------------------------------------- -} - -/* Generate the code for a single instruction. */ -static void build_ins(BuildCtx *ctx, BCOp op, int defop) -{ - int vk = 0; - |=>defop: - - switch (op) { - - /* -- Comparison ops ---------------------------------------------------- */ - - /* Remember: all ops branch for a true comparison, fall through otherwise. */ - - case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: - | // RA = src1*8, RD = src2*8, JMP with RD = target - |.macro bc_comp, FRA, FRD, RAHI, RALO, RDHI, RDLO, movop, fmovop, fcomp, sfcomp - | addu RA, BASE, RA - | addu RD, BASE, RD - | lw RAHI, HI(RA) - | lw RDHI, HI(RD) - | lhu TMP2, OFS_RD(PC) - | addiu PC, PC, 4 - | bne RAHI, TISNUM, >2 - |. lw RALO, LO(RA) - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | lw RDLO, LO(RD) - | bne RDHI, TISNUM, >5 - |. decode_RD4b TMP2 - | slt AT, SFARG1LO, SFARG2LO - | addu TMP2, TMP2, TMP3 - | movop TMP2, r0, AT - |1: - | addu PC, PC, TMP2 - | ins_next - | - |2: // RA is not an integer. - | sltiu AT, RAHI, LJ_TISNUM - | beqz AT, ->vmeta_comp - |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | sltiu AT, RDHI, LJ_TISNUM - |.if FPU - | ldc1 FRA, 0(RA) - | ldc1 FRD, 0(RD) - |.else - | lw RDLO, LO(RD) - |.endif - | beqz AT, >4 - |. decode_RD4b TMP2 - |3: // RA and RD are both numbers. - |.if FPU - | fcomp f20, f22 - | addu TMP2, TMP2, TMP3 - | b <1 - |. fmovop TMP2, r0 - |.else - | bal sfcomp - |. addu TMP2, TMP2, TMP3 - | b <1 - |. movop TMP2, r0, CRET1 - |.endif - | - |4: // RA is a number, RD is not a number. - | bne RDHI, TISNUM, ->vmeta_comp - | // RA is a number, RD is an integer. Convert RD to a number. - |.if FPU - |. lwc1 FRD, LO(RD) - | b <3 - |. cvt.d.w FRD, FRD - |.else - |. nop - |.if "RDHI" == "SFARG1HI" - | bal ->vm_sfi2d_1 - |.else - | bal ->vm_sfi2d_2 - |.endif - |. nop - | b <3 - |. nop - |.endif - | - |5: // RA is an integer, RD is not an integer - | sltiu AT, RDHI, LJ_TISNUM - | beqz AT, ->vmeta_comp - | // RA is an integer, RD is a number. Convert RA to a number. - |.if FPU - |. mtc1 RALO, FRA - | ldc1 FRD, 0(RD) - | b <3 - | cvt.d.w FRA, FRA - |.else - |. nop - |.if "RAHI" == "SFARG1HI" - | bal ->vm_sfi2d_1 - |.else - | bal ->vm_sfi2d_2 - |.endif - |. nop - | b <3 - |. nop - |.endif - |.endmacro - | - if (op == BC_ISLT) { - | bc_comp f20, f22, SFARG1HI, SFARG1LO, SFARG2HI, SFARG2LO, movz, movf, c.olt.d, ->vm_sfcmpolt - } else if (op == BC_ISGE) { - | bc_comp f20, f22, SFARG1HI, SFARG1LO, SFARG2HI, SFARG2LO, movn, movt, c.olt.d, ->vm_sfcmpolt - } else if (op == BC_ISLE) { - | bc_comp f22, f20, SFARG2HI, SFARG2LO, SFARG1HI, SFARG1LO, movn, movt, c.ult.d, ->vm_sfcmpult - } else { - | bc_comp f22, f20, SFARG2HI, SFARG2LO, SFARG1HI, SFARG1LO, movz, movf, c.ult.d, ->vm_sfcmpult - } - break; - - case BC_ISEQV: case BC_ISNEV: - vk = op == BC_ISEQV; - | // RA = src1*8, RD = src2*8, JMP with RD = target - | addu RA, BASE, RA - | addiu PC, PC, 4 - | addu RD, BASE, RD - | lw SFARG1HI, HI(RA) - | lhu TMP2, -4+OFS_RD(PC) - | lw SFARG2HI, HI(RD) - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | sltu AT, TISNUM, SFARG1HI - | sltu TMP0, TISNUM, SFARG2HI - | or AT, AT, TMP0 - if (vk) { - | beqz AT, ->BC_ISEQN_Z - } else { - | beqz AT, ->BC_ISNEN_Z - } - |. decode_RD4b TMP2 - | // Either or both types are not numbers. - | lw SFARG1LO, LO(RA) - | lw SFARG2LO, LO(RD) - | addu TMP2, TMP2, TMP3 - |.if FFI - | li TMP3, LJ_TCDATA - | beq SFARG1HI, TMP3, ->vmeta_equal_cd - |.endif - |. sltiu AT, SFARG1HI, LJ_TISPRI // Not a primitive? - |.if FFI - | beq SFARG2HI, TMP3, ->vmeta_equal_cd - |.endif - |. xor TMP3, SFARG1LO, SFARG2LO // Same tv? - | xor SFARG2HI, SFARG2HI, SFARG1HI // Same type? - | sltiu TMP0, SFARG1HI, LJ_TISTABUD+1 // Table or userdata? - | movz TMP3, r0, AT // Ignore tv if primitive. - | movn TMP0, r0, SFARG2HI // Tab/ud and same type? - | or AT, SFARG2HI, TMP3 // Same type && (pri||same tv). - | movz TMP0, r0, AT - | beqz TMP0, >1 // Done if not tab/ud or not same type or same tv. - if (vk) { - |. movn TMP2, r0, AT - } else { - |. movz TMP2, r0, AT - } - | // Different tables or userdatas. Need to check __eq metamethod. - | // Field metatable must be at same offset for GCtab and GCudata! - | lw TAB:TMP1, TAB:SFARG1LO->metatable - | beqz TAB:TMP1, >1 // No metatable? - |. nop - | lbu TMP1, TAB:TMP1->nomm - | andi TMP1, TMP1, 1<1 // Or 'no __eq' flag set? - |. nop - | b ->vmeta_equal // Handle __eq metamethod. - |. li TMP0, 1-vk // ne = 0 or 1. - |1: - | addu PC, PC, TMP2 - | ins_next - break; - - case BC_ISEQS: case BC_ISNES: - vk = op == BC_ISEQS; - | // RA = src*8, RD = str_const*8 (~), JMP with RD = target - | addu RA, BASE, RA - | addiu PC, PC, 4 - | lw TMP0, HI(RA) - | srl RD, RD, 1 - | lw STR:TMP3, LO(RA) - | subu RD, KBASE, RD - | lhu TMP2, -4+OFS_RD(PC) - |.if FFI - | li AT, LJ_TCDATA - | beq TMP0, AT, ->vmeta_equal_cd - |.endif - |. lw STR:TMP1, -4(RD) // KBASE-4-str_const*4 - | addiu TMP0, TMP0, -LJ_TSTR - | decode_RD4b TMP2 - | xor TMP1, STR:TMP1, STR:TMP3 - | or TMP0, TMP0, TMP1 - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | addu TMP2, TMP2, TMP3 - if (vk) { - | movn TMP2, r0, TMP0 - } else { - | movz TMP2, r0, TMP0 - } - | addu PC, PC, TMP2 - | ins_next - break; - - case BC_ISEQN: case BC_ISNEN: - vk = op == BC_ISEQN; - | // RA = src*8, RD = num_const*8, JMP with RD = target - | addu RA, BASE, RA - | addu RD, KBASE, RD - | lw SFARG1HI, HI(RA) - | lw SFARG2HI, HI(RD) - | lhu TMP2, OFS_RD(PC) - | addiu PC, PC, 4 - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | decode_RD4b TMP2 - if (vk) { - |->BC_ISEQN_Z: - } else { - |->BC_ISNEN_Z: - } - | bne SFARG1HI, TISNUM, >3 - |. lw SFARG1LO, LO(RA) - | lw SFARG2LO, LO(RD) - | addu TMP2, TMP2, TMP3 - | bne SFARG2HI, TISNUM, >6 - |. xor AT, SFARG1LO, SFARG2LO - if (vk) { - | movn TMP2, r0, AT - |1: - | addu PC, PC, TMP2 - |2: - } else { - | movz TMP2, r0, AT - |1: - |2: - | addu PC, PC, TMP2 - } - | ins_next - | - |3: // RA is not an integer. - | sltiu AT, SFARG1HI, LJ_TISNUM - |.if FFI - | beqz AT, >8 - |.else - | beqz AT, <2 - |.endif - |. addu TMP2, TMP2, TMP3 - | sltiu AT, SFARG2HI, LJ_TISNUM - |.if FPU - | ldc1 f20, 0(RA) - | ldc1 f22, 0(RD) - |.endif - | beqz AT, >5 - |. lw SFARG2LO, LO(RD) - |4: // RA and RD are both numbers. - |.if FPU - | c.eq.d f20, f22 - | b <1 - if (vk) { - |. movf TMP2, r0 - } else { - |. movt TMP2, r0 - } - |.else - | bal ->vm_sfcmpeq - |. nop - | b <1 - if (vk) { - |. movz TMP2, r0, CRET1 - } else { - |. movn TMP2, r0, CRET1 - } - |.endif - | - |5: // RA is a number, RD is not a number. - |.if FFI - | bne SFARG2HI, TISNUM, >9 - |.else - | bne SFARG2HI, TISNUM, <2 - |.endif - | // RA is a number, RD is an integer. Convert RD to a number. - |.if FPU - |. lwc1 f22, LO(RD) - | b <4 - |. cvt.d.w f22, f22 - |.else - |. nop - | bal ->vm_sfi2d_2 - |. nop - | b <4 - |. nop - |.endif - | - |6: // RA is an integer, RD is not an integer - | sltiu AT, SFARG2HI, LJ_TISNUM - |.if FFI - | beqz AT, >9 - |.else - | beqz AT, <2 - |.endif - | // RA is an integer, RD is a number. Convert RA to a number. - |.if FPU - |. mtc1 SFARG1LO, f20 - | ldc1 f22, 0(RD) - | b <4 - | cvt.d.w f20, f20 - |.else - |. nop - | bal ->vm_sfi2d_1 - |. nop - | b <4 - |. nop - |.endif - | - |.if FFI - |8: - | li AT, LJ_TCDATA - | bne SFARG1HI, AT, <2 - |. nop - | b ->vmeta_equal_cd - |. nop - |9: - | li AT, LJ_TCDATA - | bne SFARG2HI, AT, <2 - |. nop - | b ->vmeta_equal_cd - |. nop - |.endif - break; - - case BC_ISEQP: case BC_ISNEP: - vk = op == BC_ISEQP; - | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target - | addu RA, BASE, RA - | srl TMP1, RD, 3 - | lw TMP0, HI(RA) - | lhu TMP2, OFS_RD(PC) - | not TMP1, TMP1 - | addiu PC, PC, 4 - |.if FFI - | li AT, LJ_TCDATA - | beq TMP0, AT, ->vmeta_equal_cd - |.endif - |. xor TMP0, TMP0, TMP1 - | decode_RD4b TMP2 - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | addu TMP2, TMP2, TMP3 - if (vk) { - | movn TMP2, r0, TMP0 - } else { - | movz TMP2, r0, TMP0 - } - | addu PC, PC, TMP2 - | ins_next - break; - - /* -- Unary test and copy ops ------------------------------------------- */ - - case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: - | // RA = dst*8 or unused, RD = src*8, JMP with RD = target - | addu RD, BASE, RD - | lhu TMP2, OFS_RD(PC) - | lw TMP0, HI(RD) - | addiu PC, PC, 4 - if (op == BC_IST || op == BC_ISF) { - | sltiu TMP0, TMP0, LJ_TISTRUECOND - | decode_RD4b TMP2 - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | addu TMP2, TMP2, TMP3 - if (op == BC_IST) { - | movz TMP2, r0, TMP0 - } else { - | movn TMP2, r0, TMP0 - } - | addu PC, PC, TMP2 - } else { - | sltiu TMP0, TMP0, LJ_TISTRUECOND - | lw SFRETHI, HI(RD) - | lw SFRETLO, LO(RD) - if (op == BC_ISTC) { - | beqz TMP0, >1 - } else { - | bnez TMP0, >1 - } - |. addu RA, BASE, RA - | decode_RD4b TMP2 - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | addu TMP2, TMP2, TMP3 - | sw SFRETHI, HI(RA) - | sw SFRETLO, LO(RA) - | addu PC, PC, TMP2 - |1: - } - | ins_next - break; - - case BC_ISTYPE: - | // RA = src*8, RD = -type*8 - | addu TMP2, BASE, RA - | srl TMP1, RD, 3 - | lw TMP0, HI(TMP2) - | ins_next1 - | addu AT, TMP0, TMP1 - | bnez AT, ->vmeta_istype - |. ins_next2 - break; - case BC_ISNUM: - | // RA = src*8, RD = -(TISNUM-1)*8 - | addu TMP2, BASE, RA - | lw TMP0, HI(TMP2) - | ins_next1 - | sltiu AT, TMP0, LJ_TISNUM - | beqz AT, ->vmeta_istype - |. ins_next2 - break; - - /* -- Unary ops --------------------------------------------------------- */ - - case BC_MOV: - | // RA = dst*8, RD = src*8 - | addu RD, BASE, RD - | addu RA, BASE, RA - | lw SFRETHI, HI(RD) - | lw SFRETLO, LO(RD) - | ins_next1 - | sw SFRETHI, HI(RA) - | sw SFRETLO, LO(RA) - | ins_next2 - break; - case BC_NOT: - | // RA = dst*8, RD = src*8 - | addu RD, BASE, RD - | addu RA, BASE, RA - | lw TMP0, HI(RD) - | li TMP1, LJ_TFALSE - | sltiu TMP0, TMP0, LJ_TISTRUECOND - | addiu TMP1, TMP0, LJ_TTRUE - | ins_next1 - | sw TMP1, HI(RA) - | ins_next2 - break; - case BC_UNM: - | // RA = dst*8, RD = src*8 - | addu RB, BASE, RD - | lw SFARG1HI, HI(RB) - | addu RA, BASE, RA - | bne SFARG1HI, TISNUM, >2 - |. lw SFARG1LO, LO(RB) - | lui TMP1, 0x8000 - | beq SFARG1LO, TMP1, ->vmeta_unm // Meta handler deals with -2^31. - |. negu SFARG1LO, SFARG1LO - |1: - | ins_next1 - | sw SFARG1HI, HI(RA) - | sw SFARG1LO, LO(RA) - | ins_next2 - |2: - | sltiu AT, SFARG1HI, LJ_TISNUM - | beqz AT, ->vmeta_unm - |. lui TMP1, 0x8000 - | b <1 - |. xor SFARG1HI, SFARG1HI, TMP1 - break; - case BC_LEN: - | // RA = dst*8, RD = src*8 - | addu CARG2, BASE, RD - | addu RA, BASE, RA - | lw TMP0, HI(CARG2) - | lw CARG1, LO(CARG2) - | li AT, LJ_TSTR - | bne TMP0, AT, >2 - |. li AT, LJ_TTAB - | lw CRET1, STR:CARG1->len - |1: - | ins_next1 - | sw TISNUM, HI(RA) - | sw CRET1, LO(RA) - | ins_next2 - |2: - | bne TMP0, AT, ->vmeta_len - |. nop -#if LJ_52 - | lw TAB:TMP2, TAB:CARG1->metatable - | bnez TAB:TMP2, >9 - |. nop - |3: -#endif - |->BC_LEN_Z: - | load_got lj_tab_len - | call_intern lj_tab_len // (GCtab *t) - |. nop - | // Returns uint32_t (but less than 2^31). - | b <1 - |. nop -#if LJ_52 - |9: - | lbu TMP0, TAB:TMP2->nomm - | andi TMP0, TMP0, 1<vmeta_len - |. nop -#endif - break; - - /* -- Binary ops -------------------------------------------------------- */ - - |.macro fpmod, a, b, c - | bal ->vm_floor // floor(b/c) - |. div.d FARG1, b, c - | mul.d a, FRET1, c - | sub.d a, b, a // b - floor(b/c)*c - |.endmacro - - |.macro sfpmod - | addiu sp, sp, -16 - | - | load_got __divdf3 - | sw SFARG1HI, HI(sp) - | sw SFARG1LO, LO(sp) - | sw SFARG2HI, 8+HI(sp) - | call_extern - |. sw SFARG2LO, 8+LO(sp) - | - | load_got floor - | move SFARG1HI, SFRETHI - | call_extern - |. move SFARG1LO, SFRETLO - | - | load_got __muldf3 - | move SFARG1HI, SFRETHI - | move SFARG1LO, SFRETLO - | lw SFARG2HI, 8+HI(sp) - | call_extern - |. lw SFARG2LO, 8+LO(sp) - | - | load_got __subdf3 - | lw SFARG1HI, HI(sp) - | lw SFARG1LO, LO(sp) - | move SFARG2HI, SFRETHI - | call_extern - |. move SFARG2LO, SFRETLO - | - | addiu sp, sp, 16 - |.endmacro - - |.macro ins_arithpre, label - ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); - | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 - ||switch (vk) { - ||case 0: - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | // RA = dst*8, RB = src1*8, RC = num_const*8 - | addu RB, BASE, RB - |.if "label" ~= "none" - | b label - |.endif - |. addu RC, KBASE, RC - || break; - ||case 1: - | decode_RB8a RC, INS - | decode_RB8b RC - | decode_RDtoRC8 RB, RD - | // RA = dst*8, RB = num_const*8, RC = src1*8 - | addu RC, BASE, RC - |.if "label" ~= "none" - | b label - |.endif - |. addu RB, KBASE, RB - || break; - ||default: - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | // RA = dst*8, RB = src1*8, RC = src2*8 - | addu RB, BASE, RB - |.if "label" ~= "none" - | b label - |.endif - |. addu RC, BASE, RC - || break; - ||} - |.endmacro - | - |.macro ins_arith, intins, fpins, fpcall, label - | ins_arithpre none - | - |.if "label" ~= "none" - |label: - |.endif - | - | lw SFARG1HI, HI(RB) - | lw SFARG2HI, HI(RC) - | - |.if "intins" ~= "div" - | - | // Check for two integers. - | lw SFARG1LO, LO(RB) - | bne SFARG1HI, TISNUM, >5 - |. lw SFARG2LO, LO(RC) - | bne SFARG2HI, TISNUM, >5 - | - |.if "intins" == "addu" - |. intins CRET1, SFARG1LO, SFARG2LO - | xor TMP1, CRET1, SFARG1LO // ((y^a) & (y^b)) < 0: overflow. - | xor TMP2, CRET1, SFARG2LO - | and TMP1, TMP1, TMP2 - | bltz TMP1, ->vmeta_arith - |. addu RA, BASE, RA - |.elif "intins" == "subu" - |. intins CRET1, SFARG1LO, SFARG2LO - | xor TMP1, CRET1, SFARG1LO // ((y^a) & (a^b)) < 0: overflow. - | xor TMP2, SFARG1LO, SFARG2LO - | and TMP1, TMP1, TMP2 - | bltz TMP1, ->vmeta_arith - |. addu RA, BASE, RA - |.elif "intins" == "mult" - |. intins SFARG1LO, SFARG2LO - | mflo CRET1 - | mfhi TMP2 - | sra TMP1, CRET1, 31 - | bne TMP1, TMP2, ->vmeta_arith - |. addu RA, BASE, RA - |.else - |. load_got lj_vm_modi - | beqz SFARG2LO, ->vmeta_arith - |. addu RA, BASE, RA - |.if ENDIAN_BE - | move CARG1, SFARG1LO - |.endif - | call_extern - |. move CARG2, SFARG2LO - |.endif - | - | ins_next1 - | sw TISNUM, HI(RA) - | sw CRET1, LO(RA) - |3: - | ins_next2 - | - |.elif not FPU - | - | lw SFARG1LO, LO(RB) - | lw SFARG2LO, LO(RC) - | - |.endif - | - |5: // Check for two numbers. - | .FPU ldc1 f20, 0(RB) - | sltiu AT, SFARG1HI, LJ_TISNUM - | sltiu TMP0, SFARG2HI, LJ_TISNUM - | .FPU ldc1 f22, 0(RC) - | and AT, AT, TMP0 - | beqz AT, ->vmeta_arith - |. addu RA, BASE, RA - | - |.if FPU - | fpins FRET1, f20, f22 - |.elif "fpcall" == "sfpmod" - | sfpmod - |.else - | load_got fpcall - | call_extern - |. nop - |.endif - | - | ins_next1 - |.if not FPU - | sw SFRETHI, HI(RA) - |.endif - |.if "intins" ~= "div" - | b <3 - |.endif - |.if FPU - |. sdc1 FRET1, 0(RA) - |.else - |. sw SFRETLO, LO(RA) - |.endif - |.if "intins" == "div" - | ins_next2 - |.endif - | - |.endmacro - - case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: - | ins_arith addu, add.d, __adddf3, none - break; - case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: - | ins_arith subu, sub.d, __subdf3, none - break; - case BC_MULVN: case BC_MULNV: case BC_MULVV: - | ins_arith mult, mul.d, __muldf3, none - break; - case BC_DIVVN: - | ins_arith div, div.d, __divdf3, ->BC_DIVVN_Z - break; - case BC_DIVNV: case BC_DIVVV: - | ins_arithpre ->BC_DIVVN_Z - break; - case BC_MODVN: - | ins_arith modi, fpmod, sfpmod, ->BC_MODVN_Z - break; - case BC_MODNV: case BC_MODVV: - | ins_arithpre ->BC_MODVN_Z - break; - case BC_POW: - | ins_arithpre none - | lw SFARG1HI, HI(RB) - | lw SFARG2HI, HI(RC) - | sltiu AT, SFARG1HI, LJ_TISNUM - | sltiu TMP0, SFARG2HI, LJ_TISNUM - | and AT, AT, TMP0 - | load_got pow - | beqz AT, ->vmeta_arith - |. addu RA, BASE, RA - |.if FPU - | ldc1 FARG1, 0(RB) - | ldc1 FARG2, 0(RC) - |.else - | lw SFARG1LO, LO(RB) - | lw SFARG2LO, LO(RC) - |.endif - | call_extern - |. nop - | ins_next1 - |.if FPU - | sdc1 FRET1, 0(RA) - |.else - | sw SFRETHI, HI(RA) - | sw SFRETLO, LO(RA) - |.endif - | ins_next2 - break; - - case BC_CAT: - | // RA = dst*8, RB = src_start*8, RC = src_end*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | subu CARG3, RC, RB - | sw BASE, L->base - | addu CARG2, BASE, RC - | move MULTRES, RB - |->BC_CAT_Z: - | load_got lj_meta_cat - | srl CARG3, CARG3, 3 - | sw PC, SAVE_PC - | call_intern lj_meta_cat // (lua_State *L, TValue *top, int left) - |. move CARG1, L - | // Returns NULL (finished) or TValue * (metamethod). - | bnez CRET1, ->vmeta_binop - |. lw BASE, L->base - | addu RB, BASE, MULTRES - | lw SFRETHI, HI(RB) - | lw SFRETLO, LO(RB) - | addu RA, BASE, RA - | ins_next1 - | sw SFRETHI, HI(RA) - | sw SFRETLO, LO(RA) - | ins_next2 - break; - - /* -- Constant ops ------------------------------------------------------ */ - - case BC_KSTR: - | // RA = dst*8, RD = str_const*8 (~) - | srl TMP1, RD, 1 - | subu TMP1, KBASE, TMP1 - | ins_next1 - | lw TMP0, -4(TMP1) // KBASE-4-str_const*4 - | addu RA, BASE, RA - | li TMP2, LJ_TSTR - | sw TMP0, LO(RA) - | sw TMP2, HI(RA) - | ins_next2 - break; - case BC_KCDATA: - |.if FFI - | // RA = dst*8, RD = cdata_const*8 (~) - | srl TMP1, RD, 1 - | subu TMP1, KBASE, TMP1 - | ins_next1 - | lw TMP0, -4(TMP1) // KBASE-4-cdata_const*4 - | addu RA, BASE, RA - | li TMP2, LJ_TCDATA - | sw TMP0, LO(RA) - | sw TMP2, HI(RA) - | ins_next2 - |.endif - break; - case BC_KSHORT: - | // RA = dst*8, RD = int16_literal*8 - | sra RD, INS, 16 - | addu RA, BASE, RA - | ins_next1 - | sw TISNUM, HI(RA) - | sw RD, LO(RA) - | ins_next2 - break; - case BC_KNUM: - | // RA = dst*8, RD = num_const*8 - | addu RD, KBASE, RD - | addu RA, BASE, RA - | lw SFRETHI, HI(RD) - | lw SFRETLO, LO(RD) - | ins_next1 - | sw SFRETHI, HI(RA) - | sw SFRETLO, LO(RA) - | ins_next2 - break; - case BC_KPRI: - | // RA = dst*8, RD = primitive_type*8 (~) - | srl TMP1, RD, 3 - | addu RA, BASE, RA - | not TMP0, TMP1 - | ins_next1 - | sw TMP0, HI(RA) - | ins_next2 - break; - case BC_KNIL: - | // RA = base*8, RD = end*8 - | addu RA, BASE, RA - | sw TISNIL, HI(RA) - | addiu RA, RA, 8 - | addu RD, BASE, RD - |1: - | sw TISNIL, HI(RA) - | slt AT, RA, RD - | bnez AT, <1 - |. addiu RA, RA, 8 - | ins_next_ - break; - - /* -- Upvalue and function ops ------------------------------------------ */ - - case BC_UGET: - | // RA = dst*8, RD = uvnum*8 - | lw LFUNC:RB, FRAME_FUNC(BASE) - | srl RD, RD, 1 - | addu RD, RD, LFUNC:RB - | lw UPVAL:RB, LFUNC:RD->uvptr - | ins_next1 - | lw TMP1, UPVAL:RB->v - | lw SFRETHI, HI(TMP1) - | lw SFRETLO, LO(TMP1) - | addu RA, BASE, RA - | sw SFRETHI, HI(RA) - | sw SFRETLO, LO(RA) - | ins_next2 - break; - case BC_USETV: - | // RA = uvnum*8, RD = src*8 - | lw LFUNC:RB, FRAME_FUNC(BASE) - | srl RA, RA, 1 - | addu RD, BASE, RD - | addu RA, RA, LFUNC:RB - | lw UPVAL:RB, LFUNC:RA->uvptr - | lw SFRETHI, HI(RD) - | lw SFRETLO, LO(RD) - | lbu TMP3, UPVAL:RB->marked - | lw CARG2, UPVAL:RB->v - | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv) - | lbu TMP0, UPVAL:RB->closed - | sw SFRETHI, HI(CARG2) - | sw SFRETLO, LO(CARG2) - | li AT, LJ_GC_BLACK|1 - | or TMP3, TMP3, TMP0 - | beq TMP3, AT, >2 // Upvalue is closed and black? - |. addiu TMP2, SFRETHI, -(LJ_TNUMX+1) - |1: - | ins_next - | - |2: // Check if new value is collectable. - | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1) - | beqz AT, <1 // tvisgcv(v) - |. nop - | lbu TMP3, GCOBJ:SFRETLO->gch.marked - | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v) - | beqz TMP3, <1 - |. load_got lj_gc_barrieruv - | // Crossed a write barrier. Move the barrier forward. - | call_intern lj_gc_barrieruv // (global_State *g, TValue *tv) - |. addiu CARG1, DISPATCH, GG_DISP2G - | b <1 - |. nop - break; - case BC_USETS: - | // RA = uvnum*8, RD = str_const*8 (~) - | lw LFUNC:RB, FRAME_FUNC(BASE) - | srl RA, RA, 1 - | srl TMP1, RD, 1 - | addu RA, RA, LFUNC:RB - | subu TMP1, KBASE, TMP1 - | lw UPVAL:RB, LFUNC:RA->uvptr - | lw STR:TMP1, -4(TMP1) // KBASE-4-str_const*4 - | lbu TMP2, UPVAL:RB->marked - | lw CARG2, UPVAL:RB->v - | lbu TMP3, STR:TMP1->marked - | andi AT, TMP2, LJ_GC_BLACK // isblack(uv) - | lbu TMP2, UPVAL:RB->closed - | li TMP0, LJ_TSTR - | sw STR:TMP1, LO(CARG2) - | bnez AT, >2 - |. sw TMP0, HI(CARG2) - |1: - | ins_next - | - |2: // Check if string is white and ensure upvalue is closed. - | beqz TMP2, <1 - |. andi AT, TMP3, LJ_GC_WHITES // iswhite(str) - | beqz AT, <1 - |. load_got lj_gc_barrieruv - | // Crossed a write barrier. Move the barrier forward. - | call_intern lj_gc_barrieruv // (global_State *g, TValue *tv) - |. addiu CARG1, DISPATCH, GG_DISP2G - | b <1 - |. nop - break; - case BC_USETN: - | // RA = uvnum*8, RD = num_const*8 - | lw LFUNC:RB, FRAME_FUNC(BASE) - | srl RA, RA, 1 - | addu RD, KBASE, RD - | addu RA, RA, LFUNC:RB - | lw UPVAL:RB, LFUNC:RA->uvptr - | lw SFRETHI, HI(RD) - | lw SFRETLO, LO(RD) - | lw TMP1, UPVAL:RB->v - | ins_next1 - | sw SFRETHI, HI(TMP1) - | sw SFRETLO, LO(TMP1) - | ins_next2 - break; - case BC_USETP: - | // RA = uvnum*8, RD = primitive_type*8 (~) - | lw LFUNC:RB, FRAME_FUNC(BASE) - | srl RA, RA, 1 - | srl TMP0, RD, 3 - | addu RA, RA, LFUNC:RB - | not TMP0, TMP0 - | lw UPVAL:RB, LFUNC:RA->uvptr - | ins_next1 - | lw TMP1, UPVAL:RB->v - | sw TMP0, HI(TMP1) - | ins_next2 - break; - - case BC_UCLO: - | // RA = level*8, RD = target - | lw TMP2, L->openupval - | branch_RD // Do this first since RD is not saved. - | load_got lj_func_closeuv - | sw BASE, L->base - | beqz TMP2, >1 - |. move CARG1, L - | call_intern lj_func_closeuv // (lua_State *L, TValue *level) - |. addu CARG2, BASE, RA - | lw BASE, L->base - |1: - | ins_next - break; - - case BC_FNEW: - | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype) - | srl TMP1, RD, 1 - | load_got lj_func_newL_gc - | subu TMP1, KBASE, TMP1 - | lw CARG3, FRAME_FUNC(BASE) - | lw CARG2, -4(TMP1) // KBASE-4-tab_const*4 - | sw BASE, L->base - | sw PC, SAVE_PC - | // (lua_State *L, GCproto *pt, GCfuncL *parent) - | call_intern lj_func_newL_gc - |. move CARG1, L - | // Returns GCfuncL *. - | lw BASE, L->base - | li TMP0, LJ_TFUNC - | ins_next1 - | addu RA, BASE, RA - | sw LFUNC:CRET1, LO(RA) - | sw TMP0, HI(RA) - | ins_next2 - break; - - /* -- Table ops --------------------------------------------------------- */ - - case BC_TNEW: - case BC_TDUP: - | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~) - | lw TMP0, DISPATCH_GL(gc.total)(DISPATCH) - | lw TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) - | sw BASE, L->base - | sw PC, SAVE_PC - | sltu AT, TMP0, TMP1 - | beqz AT, >5 - |1: - if (op == BC_TNEW) { - | load_got lj_tab_new - | srl CARG2, RD, 3 - | andi CARG2, CARG2, 0x7ff - | li TMP0, 0x801 - | addiu AT, CARG2, -0x7ff - | srl CARG3, RD, 14 - | movz CARG2, TMP0, AT - | // (lua_State *L, int32_t asize, uint32_t hbits) - | call_intern lj_tab_new - |. move CARG1, L - | // Returns Table *. - } else { - | load_got lj_tab_dup - | srl TMP1, RD, 1 - | subu TMP1, KBASE, TMP1 - | move CARG1, L - | call_intern lj_tab_dup // (lua_State *L, Table *kt) - |. lw CARG2, -4(TMP1) // KBASE-4-str_const*4 - | // Returns Table *. - } - | lw BASE, L->base - | ins_next1 - | addu RA, BASE, RA - | li TMP0, LJ_TTAB - | sw TAB:CRET1, LO(RA) - | sw TMP0, HI(RA) - | ins_next2 - |5: - | load_got lj_gc_step_fixtop - | move MULTRES, RD - | call_intern lj_gc_step_fixtop // (lua_State *L) - |. move CARG1, L - | b <1 - |. move RD, MULTRES - break; - - case BC_GGET: - | // RA = dst*8, RD = str_const*8 (~) - case BC_GSET: - | // RA = src*8, RD = str_const*8 (~) - | lw LFUNC:TMP2, FRAME_FUNC(BASE) - | srl TMP1, RD, 1 - | subu TMP1, KBASE, TMP1 - | lw TAB:RB, LFUNC:TMP2->env - | lw STR:RC, -4(TMP1) // KBASE-4-str_const*4 - if (op == BC_GGET) { - | b ->BC_TGETS_Z - } else { - | b ->BC_TSETS_Z - } - |. addu RA, BASE, RA - break; - - case BC_TGETV: - | // RA = dst*8, RB = table*8, RC = key*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | addu CARG2, BASE, RB - | addu CARG3, BASE, RC - | lw TMP1, HI(CARG2) - | lw TMP2, HI(CARG3) - | lw TAB:RB, LO(CARG2) - | li AT, LJ_TTAB - | bne TMP1, AT, ->vmeta_tgetv - |. addu RA, BASE, RA - | bne TMP2, TISNUM, >5 - |. lw RC, LO(CARG3) - | lw TMP0, TAB:RB->asize - | lw TMP1, TAB:RB->array - | sltu AT, RC, TMP0 - | sll TMP2, RC, 3 - | beqz AT, ->vmeta_tgetv // Integer key and in array part? - |. addu TMP2, TMP1, TMP2 - | lw SFRETHI, HI(TMP2) - | beq SFRETHI, TISNIL, >2 - |. lw SFRETLO, LO(TMP2) - |1: - | ins_next1 - | sw SFRETHI, HI(RA) - | sw SFRETLO, LO(RA) - | ins_next2 - | - |2: // Check for __index if table value is nil. - | lw TAB:TMP2, TAB:RB->metatable - | beqz TAB:TMP2, <1 // No metatable: done. - |. nop - | lbu TMP0, TAB:TMP2->nomm - | andi TMP0, TMP0, 1<vmeta_tgetv - |. nop - | - |5: - | li AT, LJ_TSTR - | bne TMP2, AT, ->vmeta_tgetv - |. nop - | b ->BC_TGETS_Z // String key? - |. nop - break; - case BC_TGETS: - | // RA = dst*8, RB = table*8, RC = str_const*4 (~) - | decode_RB8a RB, INS - | decode_RB8b RB - | addu CARG2, BASE, RB - | decode_RC4a RC, INS - | lw TMP0, HI(CARG2) - | decode_RC4b RC - | li AT, LJ_TTAB - | lw TAB:RB, LO(CARG2) - | subu CARG3, KBASE, RC - | lw STR:RC, -4(CARG3) // KBASE-4-str_const*4 - | bne TMP0, AT, ->vmeta_tgets1 - |. addu RA, BASE, RA - |->BC_TGETS_Z: - | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 - | lw TMP0, TAB:RB->hmask - | lw TMP1, STR:RC->hash - | lw NODE:TMP2, TAB:RB->node - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask - | sll TMP0, TMP1, 5 - | sll TMP1, TMP1, 3 - | subu TMP1, TMP0, TMP1 - | addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) - |1: - | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2) - | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) - | lw NODE:TMP1, NODE:TMP2->next - | lw SFRETHI, offsetof(Node, val)+HI(NODE:TMP2) - | addiu CARG1, CARG1, -LJ_TSTR - | xor TMP0, TMP0, STR:RC - | or AT, CARG1, TMP0 - | bnez AT, >4 - |. lw TAB:TMP3, TAB:RB->metatable - | beq SFRETHI, TISNIL, >5 // Key found, but nil value? - |. lw SFRETLO, offsetof(Node, val)+LO(NODE:TMP2) - |3: - | ins_next1 - | sw SFRETHI, HI(RA) - | sw SFRETLO, LO(RA) - | ins_next2 - | - |4: // Follow hash chain. - | bnez NODE:TMP1, <1 - |. move NODE:TMP2, NODE:TMP1 - | // End of hash chain: key not found, nil result. - | - |5: // Check for __index if table value is nil. - | beqz TAB:TMP3, <3 // No metatable: done. - |. li SFRETHI, LJ_TNIL - | lbu TMP0, TAB:TMP3->nomm - | andi TMP0, TMP0, 1<vmeta_tgets - |. nop - break; - case BC_TGETB: - | // RA = dst*8, RB = table*8, RC = index*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | addu CARG2, BASE, RB - | decode_RDtoRC8 RC, RD - | lw CARG1, HI(CARG2) - | li AT, LJ_TTAB - | lw TAB:RB, LO(CARG2) - | addu RA, BASE, RA - | bne CARG1, AT, ->vmeta_tgetb - |. srl TMP0, RC, 3 - | lw TMP1, TAB:RB->asize - | lw TMP2, TAB:RB->array - | sltu AT, TMP0, TMP1 - | beqz AT, ->vmeta_tgetb - |. addu RC, TMP2, RC - | lw SFRETHI, HI(RC) - | beq SFRETHI, TISNIL, >5 - |. lw SFRETLO, LO(RC) - |1: - | ins_next1 - | sw SFRETHI, HI(RA) - | sw SFRETLO, LO(RA) - | ins_next2 - | - |5: // Check for __index if table value is nil. - | lw TAB:TMP2, TAB:RB->metatable - | beqz TAB:TMP2, <1 // No metatable: done. - |. nop - | lbu TMP1, TAB:TMP2->nomm - | andi TMP1, TMP1, 1<vmeta_tgetb // Caveat: preserve TMP0 and CARG2! - |. nop - break; - case BC_TGETR: - | // RA = dst*8, RB = table*8, RC = key*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | addu RB, BASE, RB - | addu RC, BASE, RC - | lw TAB:CARG1, LO(RB) - | lw CARG2, LO(RC) - | addu RA, BASE, RA - | lw TMP0, TAB:CARG1->asize - | lw TMP1, TAB:CARG1->array - | sltu AT, CARG2, TMP0 - | sll TMP2, CARG2, 3 - | beqz AT, ->vmeta_tgetr // In array part? - |. addu CRET1, TMP1, TMP2 - | lw SFARG2HI, HI(CRET1) - | lw SFARG2LO, LO(CRET1) - |->BC_TGETR_Z: - | ins_next1 - | sw SFARG2HI, HI(RA) - | sw SFARG2LO, LO(RA) - | ins_next2 - break; - - case BC_TSETV: - | // RA = src*8, RB = table*8, RC = key*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | addu CARG2, BASE, RB - | addu CARG3, BASE, RC - | lw TMP1, HI(CARG2) - | lw TMP2, HI(CARG3) - | lw TAB:RB, LO(CARG2) - | li AT, LJ_TTAB - | bne TMP1, AT, ->vmeta_tsetv - |. addu RA, BASE, RA - | bne TMP2, TISNUM, >5 - |. lw RC, LO(CARG3) - | lw TMP0, TAB:RB->asize - | lw TMP1, TAB:RB->array - | sltu AT, RC, TMP0 - | sll TMP2, RC, 3 - | beqz AT, ->vmeta_tsetv // Integer key and in array part? - |. addu TMP1, TMP1, TMP2 - | lw TMP0, HI(TMP1) - | lbu TMP3, TAB:RB->marked - | lw SFRETHI, HI(RA) - | beq TMP0, TISNIL, >3 - |. lw SFRETLO, LO(RA) - |1: - | andi AT, TMP3, LJ_GC_BLACK // isblack(table) - | sw SFRETHI, HI(TMP1) - | bnez AT, >7 - |. sw SFRETLO, LO(TMP1) - |2: - | ins_next - | - |3: // Check for __newindex if previous value is nil. - | lw TAB:TMP2, TAB:RB->metatable - | beqz TAB:TMP2, <1 // No metatable: done. - |. nop - | lbu TMP2, TAB:TMP2->nomm - | andi TMP2, TMP2, 1<vmeta_tsetv - |. nop - | - |5: - | li AT, LJ_TSTR - | bne TMP2, AT, ->vmeta_tsetv - |. nop - | b ->BC_TSETS_Z // String key? - |. nop - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, TMP3, TMP0, <2 - break; - case BC_TSETS: - | // RA = src*8, RB = table*8, RC = str_const*8 (~) - | decode_RB8a RB, INS - | decode_RB8b RB - | addu CARG2, BASE, RB - | decode_RC4a RC, INS - | lw TMP0, HI(CARG2) - | decode_RC4b RC - | li AT, LJ_TTAB - | subu CARG3, KBASE, RC - | lw TAB:RB, LO(CARG2) - | lw STR:RC, -4(CARG3) // KBASE-4-str_const*4 - | bne TMP0, AT, ->vmeta_tsets1 - |. addu RA, BASE, RA - |->BC_TSETS_Z: - | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8 - | lw TMP0, TAB:RB->hmask - | lw TMP1, STR:RC->hash - | lw NODE:TMP2, TAB:RB->node - | sb r0, TAB:RB->nomm // Clear metamethod cache. - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask - | sll TMP0, TMP1, 5 - | sll TMP1, TMP1, 3 - | subu TMP1, TMP0, TMP1 - | addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) - |.if FPU - | ldc1 f20, 0(RA) - |.else - | lw SFRETHI, HI(RA) - | lw SFRETLO, LO(RA) - |.endif - |1: - | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2) - | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) - | li AT, LJ_TSTR - | lw NODE:TMP1, NODE:TMP2->next - | bne CARG1, AT, >5 - |. lw CARG2, offsetof(Node, val)+HI(NODE:TMP2) - | bne TMP0, STR:RC, >5 - |. lbu TMP3, TAB:RB->marked - | beq CARG2, TISNIL, >4 // Key found, but nil value? - |. lw TAB:TMP0, TAB:RB->metatable - |2: - | andi AT, TMP3, LJ_GC_BLACK // isblack(table) - |.if FPU - | bnez AT, >7 - |. sdc1 f20, NODE:TMP2->val - |.else - | sw SFRETHI, NODE:TMP2->val.u32.hi - | bnez AT, >7 - |. sw SFRETLO, NODE:TMP2->val.u32.lo - |.endif - |3: - | ins_next - | - |4: // Check for __newindex if previous value is nil. - | beqz TAB:TMP0, <2 // No metatable: done. - |. nop - | lbu TMP0, TAB:TMP0->nomm - | andi TMP0, TMP0, 1<vmeta_tsets - |. nop - | - |5: // Follow hash chain. - | bnez NODE:TMP1, <1 - |. move NODE:TMP2, NODE:TMP1 - | // End of hash chain: key not found, add a new one - | - | // But check for __newindex first. - | lw TAB:TMP2, TAB:RB->metatable - | beqz TAB:TMP2, >6 // No metatable: continue. - |. addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | lbu TMP0, TAB:TMP2->nomm - | andi TMP0, TMP0, 1<vmeta_tsets // 'no __newindex' flag NOT set: check. - |. li AT, LJ_TSTR - |6: - | load_got lj_tab_newkey - | sw STR:RC, LO(CARG3) - | sw AT, HI(CARG3) - | sw BASE, L->base - | move CARG2, TAB:RB - | sw PC, SAVE_PC - | call_intern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k - |. move CARG1, L - | // Returns TValue *. - | lw BASE, L->base - |.if FPU - | b <3 // No 2nd write barrier needed. - |. sdc1 f20, 0(CRET1) - |.else - | lw SFARG1HI, HI(RA) - | lw SFARG1LO, LO(RA) - | sw SFARG1HI, HI(CRET1) - | b <3 // No 2nd write barrier needed. - |. sw SFARG1LO, LO(CRET1) - |.endif - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, TMP3, TMP0, <3 - break; - case BC_TSETB: - | // RA = src*8, RB = table*8, RC = index*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | addu CARG2, BASE, RB - | decode_RDtoRC8 RC, RD - | lw CARG1, HI(CARG2) - | li AT, LJ_TTAB - | lw TAB:RB, LO(CARG2) - | addu RA, BASE, RA - | bne CARG1, AT, ->vmeta_tsetb - |. srl TMP0, RC, 3 - | lw TMP1, TAB:RB->asize - | lw TMP2, TAB:RB->array - | sltu AT, TMP0, TMP1 - | beqz AT, ->vmeta_tsetb - |. addu RC, TMP2, RC - | lw TMP1, HI(RC) - | lbu TMP3, TAB:RB->marked - | beq TMP1, TISNIL, >5 - |1: - |. lw SFRETHI, HI(RA) - | lw SFRETLO, LO(RA) - | andi AT, TMP3, LJ_GC_BLACK // isblack(table) - | sw SFRETHI, HI(RC) - | bnez AT, >7 - |. sw SFRETLO, LO(RC) - |2: - | ins_next - | - |5: // Check for __newindex if previous value is nil. - | lw TAB:TMP2, TAB:RB->metatable - | beqz TAB:TMP2, <1 // No metatable: done. - |. nop - | lbu TMP1, TAB:TMP2->nomm - | andi TMP1, TMP1, 1<vmeta_tsetb // Caveat: preserve TMP0 and CARG2! - |. nop - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, TMP3, TMP0, <2 - break; - case BC_TSETR: - | // RA = dst*8, RB = table*8, RC = key*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | addu CARG1, BASE, RB - | addu CARG3, BASE, RC - | lw TAB:CARG2, LO(CARG1) - | lw CARG3, LO(CARG3) - | lbu TMP3, TAB:CARG2->marked - | lw TMP0, TAB:CARG2->asize - | lw TMP1, TAB:CARG2->array - | andi AT, TMP3, LJ_GC_BLACK // isblack(table) - | bnez AT, >7 - |. addu RA, BASE, RA - |2: - | sltu AT, CARG3, TMP0 - | sll TMP2, CARG3, 3 - | beqz AT, ->vmeta_tsetr // In array part? - |. addu CRET1, TMP1, TMP2 - |->BC_TSETR_Z: - | lw SFARG1HI, HI(RA) - | lw SFARG1LO, LO(RA) - | ins_next1 - | sw SFARG1HI, HI(CRET1) - | sw SFARG1LO, LO(CRET1) - | ins_next2 - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP3, TMP0, <2 - break; - - case BC_TSETM: - | // RA = base*8 (table at base-1), RD = num_const*8 (start index) - | addu RA, BASE, RA - |1: - | addu TMP3, KBASE, RD - | lw TAB:CARG2, -8+LO(RA) // Guaranteed to be a table. - | addiu TMP0, MULTRES, -8 - | lw TMP3, LO(TMP3) // Integer constant is in lo-word. - | beqz TMP0, >4 // Nothing to copy? - |. srl CARG3, TMP0, 3 - | addu CARG3, CARG3, TMP3 - | lw TMP2, TAB:CARG2->asize - | sll TMP1, TMP3, 3 - | lbu TMP3, TAB:CARG2->marked - | lw CARG1, TAB:CARG2->array - | sltu AT, TMP2, CARG3 - | bnez AT, >5 - |. addu TMP2, RA, TMP0 - | addu TMP1, TMP1, CARG1 - | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table) - |3: // Copy result slots to table. - | lw SFRETHI, HI(RA) - | lw SFRETLO, LO(RA) - | addiu RA, RA, 8 - | sltu AT, RA, TMP2 - | sw SFRETHI, HI(TMP1) - | sw SFRETLO, LO(TMP1) - | bnez AT, <3 - |. addiu TMP1, TMP1, 8 - | bnez TMP0, >7 - |. nop - |4: - | ins_next - | - |5: // Need to resize array part. - | load_got lj_tab_reasize - | sw BASE, L->base - | sw PC, SAVE_PC - | move BASE, RD - | call_intern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) - |. move CARG1, L - | // Must not reallocate the stack. - | move RD, BASE - | b <1 - |. lw BASE, L->base // Reload BASE for lack of a saved register. - | - |7: // Possible table write barrier for any value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP3, TMP0, <4 - break; - - /* -- Calls and vararg handling ----------------------------------------- */ - - case BC_CALLM: - | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8 - | decode_RDtoRC8 NARGS8:RC, RD - | b ->BC_CALL_Z - |. addu NARGS8:RC, NARGS8:RC, MULTRES - break; - case BC_CALL: - | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8 - | decode_RDtoRC8 NARGS8:RC, RD - |->BC_CALL_Z: - | move TMP2, BASE - | addu BASE, BASE, RA - | li AT, LJ_TFUNC - | lw TMP0, HI(BASE) - | lw LFUNC:RB, LO(BASE) - | addiu BASE, BASE, 8 - | bne TMP0, AT, ->vmeta_call - |. addiu NARGS8:RC, NARGS8:RC, -8 - | ins_call - break; - - case BC_CALLMT: - | // RA = base*8, (RB = 0,) RC = extra_nargs*8 - | addu NARGS8:RD, NARGS8:RD, MULTRES // BC_CALLT gets RC from RD. - | // Fall through. Assumes BC_CALLT follows. - break; - case BC_CALLT: - | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 - | addu RA, BASE, RA - | li AT, LJ_TFUNC - | lw TMP0, HI(RA) - | lw LFUNC:RB, LO(RA) - | move NARGS8:RC, RD - | lw TMP1, FRAME_PC(BASE) - | addiu RA, RA, 8 - | bne TMP0, AT, ->vmeta_callt - |. addiu NARGS8:RC, NARGS8:RC, -8 - |->BC_CALLT_Z: - | andi TMP0, TMP1, FRAME_TYPE // Caveat: preserve TMP0 until the 'or'. - | lbu TMP3, LFUNC:RB->ffid - | bnez TMP0, >7 - |. xori TMP2, TMP1, FRAME_VARG - |1: - | sw LFUNC:RB, FRAME_FUNC(BASE) // Copy function down, but keep PC. - | sltiu AT, TMP3, 2 // (> FF_C) Calling a fast function? - | move TMP2, BASE - | beqz NARGS8:RC, >3 - |. move TMP3, NARGS8:RC - |2: - | lw SFRETHI, HI(RA) - | lw SFRETLO, LO(RA) - | addiu RA, RA, 8 - | addiu TMP3, TMP3, -8 - | sw SFRETHI, HI(TMP2) - | sw SFRETLO, LO(TMP2) - | bnez TMP3, <2 - |. addiu TMP2, TMP2, 8 - |3: - | or TMP0, TMP0, AT - | beqz TMP0, >5 - |. nop - |4: - | ins_callt - | - |5: // Tailcall to a fast function with a Lua frame below. - | lw INS, -4(TMP1) - | decode_RA8a RA, INS - | decode_RA8b RA - | subu TMP1, BASE, RA - | lw LFUNC:TMP1, -8+FRAME_FUNC(TMP1) - | lw TMP1, LFUNC:TMP1->pc - | b <4 - |. lw KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE. - | - |7: // Tailcall from a vararg function. - | andi AT, TMP2, FRAME_TYPEP - | bnez AT, <1 // Vararg frame below? - |. subu TMP2, BASE, TMP2 // Relocate BASE down. - | move BASE, TMP2 - | lw TMP1, FRAME_PC(TMP2) - | b <1 - |. andi TMP0, TMP1, FRAME_TYPE - break; - - case BC_ITERC: - | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8)) - | move TMP2, BASE - | addu BASE, BASE, RA - | li AT, LJ_TFUNC - | lw TMP1, -24+HI(BASE) - | lw LFUNC:RB, -24+LO(BASE) - | lw SFARG1HI, -16+HI(BASE) - | lw SFARG1LO, -16+LO(BASE) - | lw SFARG2HI, -8+HI(BASE) - | lw SFARG2LO, -8+LO(BASE) - | sw TMP1, HI(BASE) // Copy callable. - | sw LFUNC:RB, LO(BASE) - | sw SFARG1HI, 8+HI(BASE) // Copy state. - | sw SFARG1LO, 8+LO(BASE) - | sw SFARG2HI, 16+HI(BASE) // Copy control var. - | sw SFARG2LO, 16+LO(BASE) - | addiu BASE, BASE, 8 - | bne TMP1, AT, ->vmeta_call - |. li NARGS8:RC, 16 // Iterators get 2 arguments. - | ins_call - break; - - case BC_ITERN: - | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) - |.if JIT - | // NYI: add hotloop, record BC_ITERN. - |.endif - | addu RA, BASE, RA - | lw TAB:RB, -16+LO(RA) - | lw RC, -8+LO(RA) // Get index from control var. - | lw TMP0, TAB:RB->asize - | lw TMP1, TAB:RB->array - | addiu PC, PC, 4 - |1: // Traverse array part. - | sltu AT, RC, TMP0 - | beqz AT, >5 // Index points after array part? - |. sll TMP3, RC, 3 - | addu TMP3, TMP1, TMP3 - | lw SFARG1HI, HI(TMP3) - | lw SFARG1LO, LO(TMP3) - | lhu RD, -4+OFS_RD(PC) - | sw TISNUM, HI(RA) - | sw RC, LO(RA) - | beq SFARG1HI, TISNIL, <1 // Skip holes in array part. - |. addiu RC, RC, 1 - | sw SFARG1HI, 8+HI(RA) - | sw SFARG1LO, 8+LO(RA) - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | decode_RD4b RD - | addu RD, RD, TMP3 - | sw RC, -8+LO(RA) // Update control var. - | addu PC, PC, RD - |3: - | ins_next - | - |5: // Traverse hash part. - | lw TMP1, TAB:RB->hmask - | subu RC, RC, TMP0 - | lw TMP2, TAB:RB->node - |6: - | sltu AT, TMP1, RC // End of iteration? Branch to ITERL+1. - | bnez AT, <3 - |. sll TMP3, RC, 5 - | sll RB, RC, 3 - | subu TMP3, TMP3, RB - | addu NODE:TMP3, TMP3, TMP2 - | lw SFARG1HI, NODE:TMP3->val.u32.hi - | lw SFARG1LO, NODE:TMP3->val.u32.lo - | lhu RD, -4+OFS_RD(PC) - | beq SFARG1HI, TISNIL, <6 // Skip holes in hash part. - |. addiu RC, RC, 1 - | lw SFARG2HI, NODE:TMP3->key.u32.hi - | lw SFARG2LO, NODE:TMP3->key.u32.lo - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | sw SFARG1HI, 8+HI(RA) - | sw SFARG1LO, 8+LO(RA) - | addu RC, RC, TMP0 - | decode_RD4b RD - | addu RD, RD, TMP3 - | sw SFARG2HI, HI(RA) - | sw SFARG2LO, LO(RA) - | addu PC, PC, RD - | b <3 - |. sw RC, -8+LO(RA) // Update control var. - break; - - case BC_ISNEXT: - | // RA = base*8, RD = target (points to ITERN) - | addu RA, BASE, RA - | srl TMP0, RD, 1 - | lw CARG1, -24+HI(RA) - | lw CFUNC:CARG2, -24+LO(RA) - | addu TMP0, PC, TMP0 - | lw CARG3, -16+HI(RA) - | lw CARG4, -8+HI(RA) - | li AT, LJ_TFUNC - | bne CARG1, AT, >5 - |. lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535) - | lbu CARG2, CFUNC:CARG2->ffid - | addiu CARG3, CARG3, -LJ_TTAB - | addiu CARG4, CARG4, -LJ_TNIL - | or CARG3, CARG3, CARG4 - | addiu CARG2, CARG2, -FF_next_N - | or CARG2, CARG2, CARG3 - | bnez CARG2, >5 - |. lui TMP1, 0xfffe - | addu PC, TMP0, TMP2 - | ori TMP1, TMP1, 0x7fff - | sw r0, -8+LO(RA) // Initialize control var. - | sw TMP1, -8+HI(RA) - |1: - | ins_next - |5: // Despecialize bytecode if any of the checks fail. - | li TMP3, BC_JMP - | li TMP1, BC_ITERC - | sb TMP3, -4+OFS_OP(PC) - | addu PC, TMP0, TMP2 - | b <1 - |. sb TMP1, OFS_OP(PC) - break; - - case BC_VARG: - | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 - | lw TMP0, FRAME_PC(BASE) - | decode_RDtoRC8 RC, RD - | decode_RB8a RB, INS - | addu RC, BASE, RC - | decode_RB8b RB - | addu RA, BASE, RA - | addiu RC, RC, FRAME_VARG - | addu TMP2, RA, RB - | addiu TMP3, BASE, -8 // TMP3 = vtop - | subu RC, RC, TMP0 // RC = vbase - | // Note: RC may now be even _above_ BASE if nargs was < numparams. - | beqz RB, >5 // Copy all varargs? - |. subu TMP1, TMP3, RC - | addiu TMP2, TMP2, -16 - |1: // Copy vararg slots to destination slots. - | lw CARG1, HI(RC) - | sltu AT, RC, TMP3 - | lw CARG2, LO(RC) - | addiu RC, RC, 8 - | movz CARG1, TISNIL, AT - | sw CARG1, HI(RA) - | sw CARG2, LO(RA) - | sltu AT, RA, TMP2 - | bnez AT, <1 - |. addiu RA, RA, 8 - |3: - | ins_next - | - |5: // Copy all varargs. - | lw TMP0, L->maxstack - | blez TMP1, <3 // No vararg slots? - |. li MULTRES, 8 // MULTRES = (0+1)*8 - | addu TMP2, RA, TMP1 - | sltu AT, TMP0, TMP2 - | bnez AT, >7 - |. addiu MULTRES, TMP1, 8 - |6: - | lw SFRETHI, HI(RC) - | lw SFRETLO, LO(RC) - | addiu RC, RC, 8 - | sw SFRETHI, HI(RA) - | sw SFRETLO, LO(RA) - | sltu AT, RC, TMP3 - | bnez AT, <6 // More vararg slots? - |. addiu RA, RA, 8 - | b <3 - |. nop - | - |7: // Grow stack for varargs. - | load_got lj_state_growstack - | sw RA, L->top - | subu RA, RA, BASE - | sw BASE, L->base - | subu BASE, RC, BASE // Need delta, because BASE may change. - | sw PC, SAVE_PC - | srl CARG2, TMP1, 3 - | call_intern lj_state_growstack // (lua_State *L, int n) - |. move CARG1, L - | move RC, BASE - | lw BASE, L->base - | addu RA, BASE, RA - | addu RC, BASE, RC - | b <6 - |. addiu TMP3, BASE, -8 - break; - - /* -- Returns ----------------------------------------------------------- */ - - case BC_RETM: - | // RA = results*8, RD = extra_nresults*8 - | addu RD, RD, MULTRES // MULTRES >= 8, so RD >= 8. - | // Fall through. Assumes BC_RET follows. - break; - - case BC_RET: - | // RA = results*8, RD = (nresults+1)*8 - | lw PC, FRAME_PC(BASE) - | addu RA, BASE, RA - | move MULTRES, RD - |1: - | andi TMP0, PC, FRAME_TYPE - | bnez TMP0, ->BC_RETV_Z - |. xori TMP1, PC, FRAME_VARG - | - |->BC_RET_Z: - | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return - | lw INS, -4(PC) - | addiu TMP2, BASE, -8 - | addiu RC, RD, -8 - | decode_RA8a TMP0, INS - | decode_RB8a RB, INS - | decode_RA8b TMP0 - | decode_RB8b RB - | addu TMP3, TMP2, RB - | beqz RC, >3 - |. subu BASE, TMP2, TMP0 - |2: - | lw SFRETHI, HI(RA) - | lw SFRETLO, LO(RA) - | addiu RA, RA, 8 - | addiu RC, RC, -8 - | sw SFRETHI, HI(TMP2) - | sw SFRETLO, LO(TMP2) - | bnez RC, <2 - |. addiu TMP2, TMP2, 8 - |3: - | addiu TMP3, TMP3, -8 - |5: - | sltu AT, TMP2, TMP3 - | bnez AT, >6 - |. lw LFUNC:TMP1, FRAME_FUNC(BASE) - | ins_next1 - | lw TMP1, LFUNC:TMP1->pc - | lw KBASE, PC2PROTO(k)(TMP1) - | ins_next2 - | - |6: // Fill up results with nil. - | sw TISNIL, HI(TMP2) - | b <5 - |. addiu TMP2, TMP2, 8 - | - |->BC_RETV_Z: // Non-standard return case. - | andi TMP2, TMP1, FRAME_TYPEP - | bnez TMP2, ->vm_return - |. nop - | // Return from vararg function: relocate BASE down. - | subu BASE, BASE, TMP1 - | b <1 - |. lw PC, FRAME_PC(BASE) - break; - - case BC_RET0: case BC_RET1: - | // RA = results*8, RD = (nresults+1)*8 - | lw PC, FRAME_PC(BASE) - | addu RA, BASE, RA - | move MULTRES, RD - | andi TMP0, PC, FRAME_TYPE - | bnez TMP0, ->BC_RETV_Z - |. xori TMP1, PC, FRAME_VARG - | - | lw INS, -4(PC) - | addiu TMP2, BASE, -8 - if (op == BC_RET1) { - | lw SFRETHI, HI(RA) - | lw SFRETLO, LO(RA) - } - | decode_RB8a RB, INS - | decode_RA8a RA, INS - | decode_RB8b RB - | decode_RA8b RA - if (op == BC_RET1) { - | sw SFRETHI, HI(TMP2) - | sw SFRETLO, LO(TMP2) - } - | subu BASE, TMP2, RA - |5: - | sltu AT, RD, RB - | bnez AT, >6 - |. lw LFUNC:TMP1, FRAME_FUNC(BASE) - | ins_next1 - | lw TMP1, LFUNC:TMP1->pc - | lw KBASE, PC2PROTO(k)(TMP1) - | ins_next2 - | - |6: // Fill up results with nil. - | addiu TMP2, TMP2, 8 - | addiu RD, RD, 8 - | b <5 - if (op == BC_RET1) { - |. sw TISNIL, HI(TMP2) - } else { - |. sw TISNIL, -8+HI(TMP2) - } - break; - - /* -- Loops and branches ------------------------------------------------ */ - - case BC_FORL: - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_IFORL follows. - break; - - case BC_JFORI: - case BC_JFORL: -#if !LJ_HASJIT - break; -#endif - case BC_FORI: - case BC_IFORL: - | // RA = base*8, RD = target (after end of loop or start of loop) - vk = (op == BC_IFORL || op == BC_JFORL); - | addu RA, BASE, RA - | lw SFARG1HI, FORL_IDX*8+HI(RA) - | lw SFARG1LO, FORL_IDX*8+LO(RA) - if (op != BC_JFORL) { - | srl RD, RD, 1 - | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535) - | addu TMP2, RD, TMP2 - } - if (!vk) { - | lw SFARG2HI, FORL_STOP*8+HI(RA) - | lw SFARG2LO, FORL_STOP*8+LO(RA) - | bne SFARG1HI, TISNUM, >5 - |. lw SFRETHI, FORL_STEP*8+HI(RA) - | xor AT, SFARG2HI, TISNUM - | lw SFRETLO, FORL_STEP*8+LO(RA) - | xor TMP0, SFRETHI, TISNUM - | or AT, AT, TMP0 - | bnez AT, ->vmeta_for - |. slt AT, SFRETLO, r0 - | slt CRET1, SFARG2LO, SFARG1LO - | slt TMP1, SFARG1LO, SFARG2LO - | movn CRET1, TMP1, AT - } else { - | bne SFARG1HI, TISNUM, >5 - |. lw SFARG2LO, FORL_STEP*8+LO(RA) - | lw SFRETLO, FORL_STOP*8+LO(RA) - | move TMP3, SFARG1LO - | addu SFARG1LO, SFARG1LO, SFARG2LO - | xor TMP0, SFARG1LO, TMP3 - | xor TMP1, SFARG1LO, SFARG2LO - | and TMP0, TMP0, TMP1 - | slt TMP1, SFARG1LO, SFRETLO - | slt CRET1, SFRETLO, SFARG1LO - | slt AT, SFARG2LO, r0 - | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow. - | movn CRET1, TMP1, AT - | or CRET1, CRET1, TMP0 - } - |1: - if (op == BC_FORI) { - | movz TMP2, r0, CRET1 - | addu PC, PC, TMP2 - } else if (op == BC_JFORI) { - | addu PC, PC, TMP2 - | lhu RD, -4+OFS_RD(PC) - } else if (op == BC_IFORL) { - | movn TMP2, r0, CRET1 - | addu PC, PC, TMP2 - } - if (vk) { - | sw SFARG1HI, FORL_IDX*8+HI(RA) - | sw SFARG1LO, FORL_IDX*8+LO(RA) - } - | ins_next1 - | sw SFARG1HI, FORL_EXT*8+HI(RA) - | sw SFARG1LO, FORL_EXT*8+LO(RA) - |2: - if (op == BC_JFORI) { - | beqz CRET1, =>BC_JLOOP - |. decode_RD8b RD - } else if (op == BC_JFORL) { - | beqz CRET1, =>BC_JLOOP - } - | ins_next2 - | - |5: // FP loop. - |.if FPU - if (!vk) { - | ldc1 f0, FORL_IDX*8(RA) - | ldc1 f2, FORL_STOP*8(RA) - | sltiu TMP0, SFARG1HI, LJ_TISNUM - | sltiu TMP1, SFARG2HI, LJ_TISNUM - | sltiu AT, SFRETHI, LJ_TISNUM - | and TMP0, TMP0, TMP1 - | and AT, AT, TMP0 - | beqz AT, ->vmeta_for - |. slt TMP3, SFRETHI, r0 - | c.ole.d 0, f0, f2 - | c.ole.d 1, f2, f0 - | li CRET1, 1 - | movt CRET1, r0, 0 - | movt AT, r0, 1 - | b <1 - |. movn CRET1, AT, TMP3 - } else { - | ldc1 f0, FORL_IDX*8(RA) - | ldc1 f4, FORL_STEP*8(RA) - | ldc1 f2, FORL_STOP*8(RA) - | lw SFARG2HI, FORL_STEP*8+HI(RA) - | add.d f0, f0, f4 - | c.ole.d 0, f0, f2 - | c.ole.d 1, f2, f0 - | slt TMP3, SFARG2HI, r0 - | li CRET1, 1 - | li AT, 1 - | movt CRET1, r0, 0 - | movt AT, r0, 1 - | movn CRET1, AT, TMP3 - if (op == BC_IFORL) { - | movn TMP2, r0, CRET1 - | addu PC, PC, TMP2 - } - | sdc1 f0, FORL_IDX*8(RA) - | ins_next1 - | b <2 - |. sdc1 f0, FORL_EXT*8(RA) - } - |.else - if (!vk) { - | sltiu TMP0, SFARG1HI, LJ_TISNUM - | sltiu TMP1, SFARG2HI, LJ_TISNUM - | sltiu AT, SFRETHI, LJ_TISNUM - | and TMP0, TMP0, TMP1 - | and AT, AT, TMP0 - | beqz AT, ->vmeta_for - |. nop - | bal ->vm_sfcmpolex - |. move TMP3, SFRETHI - | b <1 - |. nop - } else { - | lw SFARG2HI, FORL_STEP*8+HI(RA) - | load_got __adddf3 - | call_extern - |. sw TMP2, ARG5 - | lw SFARG2HI, FORL_STOP*8+HI(RA) - | lw SFARG2LO, FORL_STOP*8+LO(RA) - | move SFARG1HI, SFRETHI - | move SFARG1LO, SFRETLO - | bal ->vm_sfcmpolex - |. lw TMP3, FORL_STEP*8+HI(RA) - if ( op == BC_JFORL ) { - | lhu RD, -4+OFS_RD(PC) - | lw TMP2, ARG5 - | b <1 - |. decode_RD8b RD - } else { - | b <1 - |. lw TMP2, ARG5 - } - } - |.endif - break; - - case BC_ITERL: - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_IITERL follows. - break; - - case BC_JITERL: -#if !LJ_HASJIT - break; -#endif - case BC_IITERL: - | // RA = base*8, RD = target - | addu RA, BASE, RA - | lw TMP1, HI(RA) - | beq TMP1, TISNIL, >1 // Stop if iterator returned nil. - |. lw TMP2, LO(RA) - if (op == BC_JITERL) { - | sw TMP1, -8+HI(RA) - | b =>BC_JLOOP - |. sw TMP2, -8+LO(RA) - } else { - | branch_RD // Otherwise save control var + branch. - | sw TMP1, -8+HI(RA) - | sw TMP2, -8+LO(RA) - } - |1: - | ins_next - break; - - case BC_LOOP: - | // RA = base*8, RD = target (loop extent) - | // Note: RA/RD is only used by trace recorder to determine scope/extent - | // This opcode does NOT jump, it's only purpose is to detect a hot loop. - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_ILOOP follows. - break; - - case BC_ILOOP: - | // RA = base*8, RD = target (loop extent) - | ins_next - break; - - case BC_JLOOP: - |.if JIT - | // RA = base*8 (ignored), RD = traceno*8 - | lw TMP1, DISPATCH_J(trace)(DISPATCH) - | srl RD, RD, 1 - | li AT, 0 - | addu TMP1, TMP1, RD - | // Traces on MIPS don't store the trace number, so use 0. - | sw AT, DISPATCH_GL(vmstate)(DISPATCH) - | lw TRACE:TMP2, 0(TMP1) - | sw BASE, DISPATCH_GL(jit_base)(DISPATCH) - | lw TMP2, TRACE:TMP2->mcode - | sw L, DISPATCH_GL(tmpbuf.L)(DISPATCH) - | jr TMP2 - |. addiu JGL, DISPATCH, GG_DISP2G+32768 - |.endif - break; - - case BC_JMP: - | // RA = base*8 (only used by trace recorder), RD = target - | branch_RD - | ins_next - break; - - /* -- Function headers -------------------------------------------------- */ - - case BC_FUNCF: - |.if JIT - | hotcall - |.endif - case BC_FUNCV: /* NYI: compiled vararg functions. */ - | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. - break; - - case BC_JFUNCF: -#if !LJ_HASJIT - break; -#endif - case BC_IFUNCF: - | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 - | lw TMP2, L->maxstack - | lbu TMP1, -4+PC2PROTO(numparams)(PC) - | lw KBASE, -4+PC2PROTO(k)(PC) - | sltu AT, TMP2, RA - | bnez AT, ->vm_growstack_l - |. sll TMP1, TMP1, 3 - if (op != BC_JFUNCF) { - | ins_next1 - } - |2: - | sltu AT, NARGS8:RC, TMP1 // Check for missing parameters. - | bnez AT, >3 - |. addu AT, BASE, NARGS8:RC - if (op == BC_JFUNCF) { - | decode_RD8a RD, INS - | b =>BC_JLOOP - |. decode_RD8b RD - } else { - | ins_next2 - } - | - |3: // Clear missing parameters. - | sw TISNIL, HI(AT) - | b <2 - |. addiu NARGS8:RC, NARGS8:RC, 8 - break; - - case BC_JFUNCV: -#if !LJ_HASJIT - break; -#endif - | NYI // NYI: compiled vararg functions - break; /* NYI: compiled vararg functions. */ - - case BC_IFUNCV: - | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 - | addu TMP1, BASE, RC - | lw TMP2, L->maxstack - | addu TMP0, RA, RC - | sw LFUNC:RB, LO(TMP1) // Store copy of LFUNC. - | addiu TMP3, RC, 8+FRAME_VARG - | sltu AT, TMP0, TMP2 - | lw KBASE, -4+PC2PROTO(k)(PC) - | beqz AT, ->vm_growstack_l - |. sw TMP3, HI(TMP1) // Store delta + FRAME_VARG. - | lbu TMP2, -4+PC2PROTO(numparams)(PC) - | move RA, BASE - | move RC, TMP1 - | ins_next1 - | beqz TMP2, >3 - |. addiu BASE, TMP1, 8 - |1: - | lw TMP0, HI(RA) - | lw TMP3, LO(RA) - | sltu AT, RA, RC // Less args than parameters? - | move CARG1, TMP0 - | movz TMP0, TISNIL, AT // Clear missing parameters. - | movn CARG1, TISNIL, AT // Clear old fixarg slot (help the GC). - | sw TMP3, 8+LO(TMP1) - | addiu TMP2, TMP2, -1 - | sw TMP0, 8+HI(TMP1) - | addiu TMP1, TMP1, 8 - | sw CARG1, HI(RA) - | bnez TMP2, <1 - |. addiu RA, RA, 8 - |3: - | ins_next2 - break; - - case BC_FUNCC: - case BC_FUNCCW: - | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8 - if (op == BC_FUNCC) { - | lw CFUNCADDR, CFUNC:RB->f - } else { - | lw CFUNCADDR, DISPATCH_GL(wrapf)(DISPATCH) - } - | addu TMP1, RA, NARGS8:RC - | lw TMP2, L->maxstack - | addu RC, BASE, NARGS8:RC - | sw BASE, L->base - | sltu AT, TMP2, TMP1 - | sw RC, L->top - | li_vmstate C - if (op == BC_FUNCCW) { - | lw CARG2, CFUNC:RB->f - } - | bnez AT, ->vm_growstack_c // Need to grow stack. - |. move CARG1, L - | jalr CFUNCADDR // (lua_State *L [, lua_CFunction f]) - |. st_vmstate - | // Returns nresults. - | lw BASE, L->base - | sll RD, CRET1, 3 - | lw TMP1, L->top - | li_vmstate INTERP - | lw PC, FRAME_PC(BASE) // Fetch PC of caller. - | subu RA, TMP1, RD // RA = L->top - nresults*8 - | sw L, DISPATCH_GL(cur_L)(DISPATCH) - | b ->vm_returnc - |. st_vmstate - break; - - /* ---------------------------------------------------------------------- */ - - default: - fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); - exit(2); - break; - } -} - -static int build_backend(BuildCtx *ctx) -{ - int op; - - dasm_growpc(Dst, BC__MAX); - - build_subroutines(ctx); - - |.code_op - for (op = 0; op < BC__MAX; op++) - build_ins(ctx, (BCOp)op, op); - - return BC__MAX; -} - -/* Emit pseudo frame-info for all assembler functions. */ -static void emit_asm_debug(BuildCtx *ctx) -{ - int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); - int i; - switch (ctx->mode) { - case BUILD_elfasm: - fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); - fprintf(ctx->fp, - ".Lframe0:\n" - "\t.4byte .LECIE0-.LSCIE0\n" - ".LSCIE0:\n" - "\t.4byte 0xffffffff\n" - "\t.byte 0x1\n" - "\t.string \"\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -4\n" - "\t.byte 31\n" - "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 0\n" - "\t.align 2\n" - ".LECIE0:\n\n"); - fprintf(ctx->fp, - ".LSFDE0:\n" - "\t.4byte .LEFDE0-.LASFDE0\n" - ".LASFDE0:\n" - "\t.4byte .Lframe0\n" - "\t.4byte .Lbegin\n" - "\t.4byte %d\n" - "\t.byte 0xe\n\t.uleb128 %d\n" - "\t.byte 0x9f\n\t.sleb128 1\n" - "\t.byte 0x9e\n\t.sleb128 2\n", - fcofs, CFRAME_SIZE); - for (i = 23; i >= 16; i--) - fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i); -#if !LJ_SOFTFP - for (i = 30; i >= 20; i -= 2) - fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i); -#endif - fprintf(ctx->fp, - "\t.align 2\n" - ".LEFDE0:\n\n"); -#if LJ_HASFFI - fprintf(ctx->fp, - ".LSFDE1:\n" - "\t.4byte .LEFDE1-.LASFDE1\n" - ".LASFDE1:\n" - "\t.4byte .Lframe0\n" - "\t.4byte lj_vm_ffi_call\n" - "\t.4byte %d\n" - "\t.byte 0x9f\n\t.uleb128 1\n" - "\t.byte 0x90\n\t.uleb128 2\n" - "\t.byte 0xd\n\t.uleb128 0x10\n" - "\t.align 2\n" - ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); -#endif -#if !LJ_NO_UNWIND - fprintf(ctx->fp, "\t.section .eh_frame,\"aw\",@progbits\n"); - fprintf(ctx->fp, - "\t.globl lj_err_unwind_dwarf\n" - ".Lframe1:\n" - "\t.4byte .LECIE1-.LSCIE1\n" - ".LSCIE1:\n" - "\t.4byte 0\n" - "\t.byte 0x1\n" - "\t.string \"zPR\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -4\n" - "\t.byte 31\n" - "\t.uleb128 6\n" /* augmentation length */ - "\t.byte 0\n" - "\t.4byte lj_err_unwind_dwarf\n" - "\t.byte 0\n" - "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 0\n" - "\t.align 2\n" - ".LECIE1:\n\n"); - fprintf(ctx->fp, - ".LSFDE2:\n" - "\t.4byte .LEFDE2-.LASFDE2\n" - ".LASFDE2:\n" - "\t.4byte .LASFDE2-.Lframe1\n" - "\t.4byte .Lbegin\n" - "\t.4byte %d\n" - "\t.uleb128 0\n" /* augmentation length */ - "\t.byte 0xe\n\t.uleb128 %d\n" - "\t.byte 0x9f\n\t.sleb128 1\n" - "\t.byte 0x9e\n\t.sleb128 2\n", - fcofs, CFRAME_SIZE); - for (i = 23; i >= 16; i--) - fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i); -#if !LJ_SOFTFP - for (i = 30; i >= 20; i -= 2) - fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i); -#endif - fprintf(ctx->fp, - "\t.align 2\n" - ".LEFDE2:\n\n"); -#if LJ_HASFFI - fprintf(ctx->fp, - ".Lframe2:\n" - "\t.4byte .LECIE2-.LSCIE2\n" - ".LSCIE2:\n" - "\t.4byte 0\n" - "\t.byte 0x1\n" - "\t.string \"zR\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -4\n" - "\t.byte 31\n" - "\t.uleb128 1\n" /* augmentation length */ - "\t.byte 0\n" - "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 0\n" - "\t.align 2\n" - ".LECIE2:\n\n"); - fprintf(ctx->fp, - ".LSFDE3:\n" - "\t.4byte .LEFDE3-.LASFDE3\n" - ".LASFDE3:\n" - "\t.4byte .LASFDE3-.Lframe2\n" - "\t.4byte lj_vm_ffi_call\n" - "\t.4byte %d\n" - "\t.uleb128 0\n" /* augmentation length */ - "\t.byte 0x9f\n\t.uleb128 1\n" - "\t.byte 0x90\n\t.uleb128 2\n" - "\t.byte 0xd\n\t.uleb128 0x10\n" - "\t.align 2\n" - ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); -#endif -#endif - break; - default: - break; - } -} - diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc deleted file mode 100644 index f0c22a74df..0000000000 --- a/src/vm_mips64.dasc +++ /dev/null @@ -1,5062 +0,0 @@ -|// Low-level VM code for MIPS64 CPUs. -|// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -|// -|// Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. -|// Sponsored by Cisco Systems, Inc. -| -|.arch mips64 -|.section code_op, code_sub -| -|.actionlist build_actionlist -|.globals GLOB_ -|.globalnames globnames -|.externnames extnames -| -|// Note: The ragged indentation of the instructions is intentional. -|// The starting columns indicate data dependencies. -| -|//----------------------------------------------------------------------- -| -|// Fixed register assignments for the interpreter. -|// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra -| -|.macro .FPU, a, b -|.if FPU -| a, b -|.endif -|.endmacro -| -|// The following must be C callee-save (but BASE is often refetched). -|.define BASE, r16 // Base of current Lua stack frame. -|.define KBASE, r17 // Constants of current Lua function. -|.define PC, r18 // Next PC. -|.define DISPATCH, r19 // Opcode dispatch table. -|.define LREG, r20 // Register holding lua_State (also in SAVE_L). -|.define MULTRES, r21 // Size of multi-result: (nresults+1)*8. -| -|.define JGL, r30 // On-trace: global_State + 32768. -| -|// Constants for type-comparisons, stores and conversions. C callee-save. -|.define TISNIL, r30 -|.define TISNUM, r22 -|.if FPU -|.define TOBIT, f30 // 2^52 + 2^51. -|.endif -| -|// The following temporaries are not saved across C calls, except for RA. -|.define RA, r23 // Callee-save. -|.define RB, r8 -|.define RC, r9 -|.define RD, r10 -|.define INS, r11 -| -|.define AT, r1 // Assembler temporary. -|.define TMP0, r12 -|.define TMP1, r13 -|.define TMP2, r14 -|.define TMP3, r15 -| -|// MIPS n64 calling convention. -|.define CFUNCADDR, r25 -|.define CARG1, r4 -|.define CARG2, r5 -|.define CARG3, r6 -|.define CARG4, r7 -|.define CARG5, r8 -|.define CARG6, r9 -|.define CARG7, r10 -|.define CARG8, r11 -| -|.define CRET1, r2 -|.define CRET2, r3 -| -|.if FPU -|.define FARG1, f12 -|.define FARG2, f13 -|.define FARG3, f14 -|.define FARG4, f15 -|.define FARG5, f16 -|.define FARG6, f17 -|.define FARG7, f18 -|.define FARG8, f19 -| -|.define FRET1, f0 -|.define FRET2, f2 -|.endif -| -|// Stack layout while in interpreter. Must match with lj_frame.h. -|.if FPU // MIPS64 hard-float. -| -|.define CFRAME_SPACE, 192 // Delta for sp. -| -|//----- 16 byte aligned, <-- sp entering interpreter -|.define SAVE_ERRF, 188(sp) // 32 bit values. -|.define SAVE_NRES, 184(sp) -|.define SAVE_CFRAME, 176(sp) // 64 bit values. -|.define SAVE_L, 168(sp) -|.define SAVE_PC, 160(sp) -|//----- 16 byte aligned -|.define SAVE_GPR_, 80 // .. 80+10*8: 64 bit GPR saves. -|.define SAVE_FPR_, 16 // .. 16+8*8: 64 bit FPR saves. -| -|.else // MIPS64 soft-float -| -|.define CFRAME_SPACE, 128 // Delta for sp. -| -|//----- 16 byte aligned, <-- sp entering interpreter -|.define SAVE_ERRF, 124(sp) // 32 bit values. -|.define SAVE_NRES, 120(sp) -|.define SAVE_CFRAME, 112(sp) // 64 bit values. -|.define SAVE_L, 104(sp) -|.define SAVE_PC, 96(sp) -|//----- 16 byte aligned -|.define SAVE_GPR_, 16 // .. 16+10*8: 64 bit GPR saves. -| -|.endif -| -|.define TMPX, 8(sp) // Unused by interpreter, temp for JIT code. -|.define TMPD, 0(sp) -|//----- 16 byte aligned -| -|.define TMPD_OFS, 0 -| -|.define SAVE_MULTRES, TMPD -| -|//----------------------------------------------------------------------- -| -|.macro saveregs -| daddiu sp, sp, -CFRAME_SPACE -| sd ra, SAVE_GPR_+9*8(sp) -| sd r30, SAVE_GPR_+8*8(sp) -| .FPU sdc1 f31, SAVE_FPR_+7*8(sp) -| sd r23, SAVE_GPR_+7*8(sp) -| .FPU sdc1 f30, SAVE_FPR_+6*8(sp) -| sd r22, SAVE_GPR_+6*8(sp) -| .FPU sdc1 f29, SAVE_FPR_+5*8(sp) -| sd r21, SAVE_GPR_+5*8(sp) -| .FPU sdc1 f28, SAVE_FPR_+4*8(sp) -| sd r20, SAVE_GPR_+4*8(sp) -| .FPU sdc1 f27, SAVE_FPR_+3*8(sp) -| sd r19, SAVE_GPR_+3*8(sp) -| .FPU sdc1 f26, SAVE_FPR_+2*8(sp) -| sd r18, SAVE_GPR_+2*8(sp) -| .FPU sdc1 f25, SAVE_FPR_+1*8(sp) -| sd r17, SAVE_GPR_+1*8(sp) -| .FPU sdc1 f24, SAVE_FPR_+0*8(sp) -| sd r16, SAVE_GPR_+0*8(sp) -|.endmacro -| -|.macro restoreregs_ret -| ld ra, SAVE_GPR_+9*8(sp) -| ld r30, SAVE_GPR_+8*8(sp) -| ld r23, SAVE_GPR_+7*8(sp) -| .FPU ldc1 f31, SAVE_FPR_+7*8(sp) -| ld r22, SAVE_GPR_+6*8(sp) -| .FPU ldc1 f30, SAVE_FPR_+6*8(sp) -| ld r21, SAVE_GPR_+5*8(sp) -| .FPU ldc1 f29, SAVE_FPR_+5*8(sp) -| ld r20, SAVE_GPR_+4*8(sp) -| .FPU ldc1 f28, SAVE_FPR_+4*8(sp) -| ld r19, SAVE_GPR_+3*8(sp) -| .FPU ldc1 f27, SAVE_FPR_+3*8(sp) -| ld r18, SAVE_GPR_+2*8(sp) -| .FPU ldc1 f26, SAVE_FPR_+2*8(sp) -| ld r17, SAVE_GPR_+1*8(sp) -| .FPU ldc1 f25, SAVE_FPR_+1*8(sp) -| ld r16, SAVE_GPR_+0*8(sp) -| .FPU ldc1 f24, SAVE_FPR_+0*8(sp) -| jr ra -| daddiu sp, sp, CFRAME_SPACE -|.endmacro -| -|// Type definitions. Some of these are only used for documentation. -|.type L, lua_State, LREG -|.type GL, global_State -|.type TVALUE, TValue -|.type GCOBJ, GCobj -|.type STR, GCstr -|.type TAB, GCtab -|.type LFUNC, GCfuncL -|.type CFUNC, GCfuncC -|.type PROTO, GCproto -|.type UPVAL, GCupval -|.type NODE, Node -|.type NARGS8, int -|.type TRACE, GCtrace -|.type SBUF, SBuf -| -|//----------------------------------------------------------------------- -| -|// Trap for not-yet-implemented parts. -|.macro NYI; .long 0xf0f0f0f0; .endmacro -| -|// Macros to mark delay slots. -|.macro ., a; a; .endmacro -|.macro ., a,b; a,b; .endmacro -|.macro ., a,b,c; a,b,c; .endmacro -|.macro ., a,b,c,d; a,b,c,d; .endmacro -| -|.define FRAME_PC, -8 -|.define FRAME_FUNC, -16 -| -|//----------------------------------------------------------------------- -| -|// Endian-specific defines. -|.if ENDIAN_LE -|.define HI, 4 -|.define LO, 0 -|.define OFS_RD, 2 -|.define OFS_RA, 1 -|.define OFS_OP, 0 -|.else -|.define HI, 0 -|.define LO, 4 -|.define OFS_RD, 0 -|.define OFS_RA, 2 -|.define OFS_OP, 3 -|.endif -| -|// Instruction decode. -|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro -|.macro decode_OP8a, dst, ins; andi dst, ins, 0xff; .endmacro -|.macro decode_OP8b, dst; sll dst, dst, 3; .endmacro -|.macro decode_RC8a, dst, ins; srl dst, ins, 13; .endmacro -|.macro decode_RC8b, dst; andi dst, dst, 0x7f8; .endmacro -|.macro decode_RD4b, dst; sll dst, dst, 2; .endmacro -|.macro decode_RA8a, dst, ins; srl dst, ins, 5; .endmacro -|.macro decode_RA8b, dst; andi dst, dst, 0x7f8; .endmacro -|.macro decode_RB8a, dst, ins; srl dst, ins, 21; .endmacro -|.macro decode_RB8b, dst; andi dst, dst, 0x7f8; .endmacro -|.macro decode_RD8a, dst, ins; srl dst, ins, 16; .endmacro -|.macro decode_RD8b, dst; sll dst, dst, 3; .endmacro -|.macro decode_RDtoRC8, dst, src; andi dst, src, 0x7f8; .endmacro -| -|// Instruction fetch. -|.macro ins_NEXT1 -| lw INS, 0(PC) -| daddiu PC, PC, 4 -|.endmacro -|// Instruction decode+dispatch. -|.macro ins_NEXT2 -| decode_OP8a TMP1, INS -| decode_OP8b TMP1 -| daddu TMP0, DISPATCH, TMP1 -| decode_RD8a RD, INS -| ld AT, 0(TMP0) -| decode_RA8a RA, INS -| decode_RD8b RD -| jr AT -| decode_RA8b RA -|.endmacro -|.macro ins_NEXT -| ins_NEXT1 -| ins_NEXT2 -|.endmacro -| -|// Instruction footer. -|.if 1 -| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. -| .define ins_next, ins_NEXT -| .define ins_next_, ins_NEXT -| .define ins_next1, ins_NEXT1 -| .define ins_next2, ins_NEXT2 -|.else -| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. -| // Affects only certain kinds of benchmarks (and only with -j off). -| .macro ins_next -| b ->ins_next -| .endmacro -| .macro ins_next1 -| .endmacro -| .macro ins_next2 -| b ->ins_next -| .endmacro -| .macro ins_next_ -| ->ins_next: -| ins_NEXT -| .endmacro -|.endif -| -|// Call decode and dispatch. -|.macro ins_callt -| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC -| ld PC, LFUNC:RB->pc -| lw INS, 0(PC) -| daddiu PC, PC, 4 -| decode_OP8a TMP1, INS -| decode_RA8a RA, INS -| decode_OP8b TMP1 -| decode_RA8b RA -| daddu TMP0, DISPATCH, TMP1 -| ld TMP0, 0(TMP0) -| jr TMP0 -| daddu RA, RA, BASE -|.endmacro -| -|.macro ins_call -| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC -| sd PC, FRAME_PC(BASE) -| ins_callt -|.endmacro -| -|//----------------------------------------------------------------------- -| -|.macro branch_RD -| srl TMP0, RD, 1 -| lui AT, (-(BCBIAS_J*4 >> 16) & 65535) -| addu TMP0, TMP0, AT -| daddu PC, PC, TMP0 -|.endmacro -| -|// Assumes DISPATCH is relative to GL. -#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) -#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) -#define GG_DISP2GOT (GG_OFS(got) - GG_OFS(dispatch)) -#define DISPATCH_GOT(name) (GG_DISP2GOT + sizeof(void*)*LJ_GOT_##name) -| -#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) -| -|.macro load_got, func -| ld CFUNCADDR, DISPATCH_GOT(func)(DISPATCH) -|.endmacro -|// Much faster. Sadly, there's no easy way to force the required code layout. -|// .macro call_intern, func; bal extern func; .endmacro -|.macro call_intern, func; jalr CFUNCADDR; .endmacro -|.macro call_extern; jalr CFUNCADDR; .endmacro -|.macro jmp_extern; jr CFUNCADDR; .endmacro -| -|.macro hotcheck, delta, target -| dsrl TMP1, PC, 1 -| andi TMP1, TMP1, 126 -| daddu TMP1, TMP1, DISPATCH -| lhu TMP2, GG_DISP2HOT(TMP1) -| addiu TMP2, TMP2, -delta -| bltz TMP2, target -|. sh TMP2, GG_DISP2HOT(TMP1) -|.endmacro -| -|.macro hotloop -| hotcheck HOTCOUNT_LOOP, ->vm_hotloop -|.endmacro -| -|.macro hotcall -| hotcheck HOTCOUNT_CALL, ->vm_hotcall -|.endmacro -| -|// Set current VM state. Uses TMP0. -|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro -|.macro st_vmstate; sw TMP0, DISPATCH_GL(vmstate)(DISPATCH); .endmacro -| -|// Move table write barrier back. Overwrites mark and tmp. -|.macro barrierback, tab, mark, tmp, target -| ld tmp, DISPATCH_GL(gc.grayagain)(DISPATCH) -| andi mark, mark, ~LJ_GC_BLACK & 255 // black2gray(tab) -| sd tab, DISPATCH_GL(gc.grayagain)(DISPATCH) -| sb mark, tab->marked -| b target -|. sd tmp, tab->gclist -|.endmacro -| -|// Clear type tag. Isolate lowest 14+32+1=47 bits of reg. -|.macro cleartp, reg; dextm reg, reg, 0, 14; .endmacro -|.macro cleartp, dst, reg; dextm dst, reg, 0, 14; .endmacro -| -|// Set type tag: Merge 17 type bits into bits [15+32=47, 31+32+1=64) of dst. -|.macro settp, dst, tp; dinsu dst, tp, 15, 31; .endmacro -| -|// Extract (negative) type tag. -|.macro gettp, dst, src; dsra dst, src, 47; .endmacro -| -|// Macros to check the TValue type and extract the GCobj. Branch on failure. -|.macro checktp, reg, tp, target -| gettp AT, reg -| daddiu AT, AT, tp -| bnez AT, target -|. cleartp reg -|.endmacro -|.macro checktp, dst, reg, tp, target -| gettp AT, reg -| daddiu AT, AT, tp -| bnez AT, target -|. cleartp dst, reg -|.endmacro -|.macro checkstr, reg, target; checktp reg, -LJ_TSTR, target; .endmacro -|.macro checktab, reg, target; checktp reg, -LJ_TTAB, target; .endmacro -|.macro checkfunc, reg, target; checktp reg, -LJ_TFUNC, target; .endmacro -|.macro checkint, reg, target // Caveat: has delay slot! -| gettp AT, reg -| bne AT, TISNUM, target -|.endmacro -|.macro checknum, reg, target // Caveat: has delay slot! -| gettp AT, reg -| sltiu AT, AT, LJ_TISNUM -| beqz AT, target -|.endmacro -| -|.macro mov_false, reg -| lu reg, 0x8000 -| dsll reg, reg, 32 -| not reg, reg -|.endmacro -|.macro mov_true, reg -| li reg, 0x0001 -| dsll reg, reg, 48 -| not reg, reg -|.endmacro -| -|//----------------------------------------------------------------------- - -/* Generate subroutines used by opcodes and other parts of the VM. */ -/* The .code_sub section should be last to help static branch prediction. */ -static void build_subroutines(BuildCtx *ctx) -{ - |.code_sub - | - |//----------------------------------------------------------------------- - |//-- Return handling ---------------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_returnp: - | // See vm_return. Also: TMP2 = previous base. - | andi AT, PC, FRAME_P - | beqz AT, ->cont_dispatch - | - | // Return from pcall or xpcall fast func. - |. mov_true TMP1 - | ld PC, FRAME_PC(TMP2) // Fetch PC of previous frame. - | move BASE, TMP2 // Restore caller base. - | // Prepending may overwrite the pcall frame, so do it at the end. - | sd TMP1, -8(RA) // Prepend true to results. - | daddiu RA, RA, -8 - | - |->vm_returnc: - | addiu RD, RD, 8 // RD = (nresults+1)*8. - | andi TMP0, PC, FRAME_TYPE - | beqz RD, ->vm_unwind_c_eh - |. li CRET1, LUA_YIELD - | beqz TMP0, ->BC_RET_Z // Handle regular return to Lua. - |. move MULTRES, RD - | - |->vm_return: - | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return - | // TMP0 = PC & FRAME_TYPE - | li TMP2, -8 - | xori AT, TMP0, FRAME_C - | and TMP2, PC, TMP2 - | bnez AT, ->vm_returnp - | dsubu TMP2, BASE, TMP2 // TMP2 = previous base. - | - | addiu TMP1, RD, -8 - | sd TMP2, L->base - | li_vmstate C - | lw TMP2, SAVE_NRES - | daddiu BASE, BASE, -16 - | st_vmstate - | beqz TMP1, >2 - |. sll TMP2, TMP2, 3 - |1: - | addiu TMP1, TMP1, -8 - | ld CRET1, 0(RA) - | daddiu RA, RA, 8 - | sd CRET1, 0(BASE) - | bnez TMP1, <1 - |. daddiu BASE, BASE, 8 - | - |2: - | bne TMP2, RD, >6 - |3: - |. sd BASE, L->top // Store new top. - | - |->vm_leave_cp: - | ld TMP0, SAVE_CFRAME // Restore previous C frame. - | move CRET1, r0 // Ok return status for vm_pcall. - | sd TMP0, L->cframe - | - |->vm_leave_unw: - | restoreregs_ret - | - |6: - | ld TMP1, L->maxstack - | slt AT, TMP2, RD - | bnez AT, >7 // Less results wanted? - | // More results wanted. Check stack size and fill up results with nil. - |. slt AT, BASE, TMP1 - | beqz AT, >8 - |. nop - | sd TISNIL, 0(BASE) - | addiu RD, RD, 8 - | b <2 - |. daddiu BASE, BASE, 8 - | - |7: // Less results wanted. - | subu TMP0, RD, TMP2 - | dsubu TMP0, BASE, TMP0 // Either keep top or shrink it. - | b <3 - |. movn BASE, TMP0, TMP2 // LUA_MULTRET+1 case? - | - |8: // Corner case: need to grow stack for filling up results. - | // This can happen if: - | // - A C function grows the stack (a lot). - | // - The GC shrinks the stack in between. - | // - A return back from a lua_call() with (high) nresults adjustment. - | load_got lj_state_growstack - | move MULTRES, RD - | srl CARG2, TMP2, 3 - | call_intern lj_state_growstack // (lua_State *L, int n) - |. move CARG1, L - | lw TMP2, SAVE_NRES - | ld BASE, L->top // Need the (realloced) L->top in BASE. - | move RD, MULTRES - | b <2 - |. sll TMP2, TMP2, 3 - | - |->vm_unwind_c: // Unwind C stack, return from vm_pcall. - | // (void *cframe, int errcode) - | move sp, CARG1 - | move CRET1, CARG2 - |->vm_unwind_c_eh: // Landing pad for external unwinder. - | ld L, SAVE_L - | li TMP0, ~LJ_VMST_C - | ld GL:TMP1, L->glref - | b ->vm_leave_unw - |. sw TMP0, GL:TMP1->vmstate - | - |->vm_unwind_ff: // Unwind C stack, return from ff pcall. - | // (void *cframe) - | li AT, -4 - | and sp, CARG1, AT - |->vm_unwind_ff_eh: // Landing pad for external unwinder. - | ld L, SAVE_L - | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | li TISNIL, LJ_TNIL - | li TISNUM, LJ_TISNUM - | ld BASE, L->base - | ld DISPATCH, L->glref // Setup pointer to dispatch table. - | .FPU mtc1 TMP3, TOBIT - | mov_false TMP1 - | li_vmstate INTERP - | ld PC, FRAME_PC(BASE) // Fetch PC of previous frame. - | .FPU cvt.d.s TOBIT, TOBIT - | daddiu RA, BASE, -8 // Results start at BASE-8. - | daddiu DISPATCH, DISPATCH, GG_G2DISP - | sd TMP1, 0(RA) // Prepend false to error message. - | st_vmstate - | b ->vm_returnc - |. li RD, 16 // 2 results: false + error message. - | - |//----------------------------------------------------------------------- - |//-- Grow stack for calls ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_growstack_c: // Grow stack for C function. - | b >2 - |. li CARG2, LUA_MINSTACK - | - |->vm_growstack_l: // Grow stack for Lua function. - | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC - | daddu RC, BASE, RC - | dsubu RA, RA, BASE - | sd BASE, L->base - | daddiu PC, PC, 4 // Must point after first instruction. - | sd RC, L->top - | srl CARG2, RA, 3 - |2: - | // L->base = new base, L->top = top - | load_got lj_state_growstack - | sd PC, SAVE_PC - | call_intern lj_state_growstack // (lua_State *L, int n) - |. move CARG1, L - | ld BASE, L->base - | ld RC, L->top - | ld LFUNC:RB, FRAME_FUNC(BASE) - | dsubu RC, RC, BASE - | cleartp LFUNC:RB - | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC - | ins_callt // Just retry the call. - | - |//----------------------------------------------------------------------- - |//-- Entry points into the assembler VM --------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_resume: // Setup C frame and resume thread. - | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) - | saveregs - | move L, CARG1 - | ld DISPATCH, L->glref // Setup pointer to dispatch table. - | move BASE, CARG2 - | lbu TMP1, L->status - | sd L, SAVE_L - | li PC, FRAME_CP - | daddiu TMP0, sp, CFRAME_RESUME - | daddiu DISPATCH, DISPATCH, GG_G2DISP - | sw r0, SAVE_NRES - | sw r0, SAVE_ERRF - | sd CARG1, SAVE_PC // Any value outside of bytecode is ok. - | sd r0, SAVE_CFRAME - | beqz TMP1, >3 - |. sd TMP0, L->cframe - | - | // Resume after yield (like a return). - | sd L, DISPATCH_GL(cur_L)(DISPATCH) - | move RA, BASE - | ld BASE, L->base - | ld TMP1, L->top - | ld PC, FRAME_PC(BASE) - | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | dsubu RD, TMP1, BASE - | .FPU mtc1 TMP3, TOBIT - | sb r0, L->status - | .FPU cvt.d.s TOBIT, TOBIT - | li_vmstate INTERP - | daddiu RD, RD, 8 - | st_vmstate - | move MULTRES, RD - | andi TMP0, PC, FRAME_TYPE - | li TISNIL, LJ_TNIL - | beqz TMP0, ->BC_RET_Z - |. li TISNUM, LJ_TISNUM - | b ->vm_return - |. nop - | - |->vm_pcall: // Setup protected C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) - | saveregs - | sw CARG4, SAVE_ERRF - | b >1 - |. li PC, FRAME_CP - | - |->vm_call: // Setup C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1) - | saveregs - | li PC, FRAME_C - | - |1: // Entry point for vm_pcall above (PC = ftype). - | ld TMP1, L:CARG1->cframe - | move L, CARG1 - | sw CARG3, SAVE_NRES - | ld DISPATCH, L->glref // Setup pointer to dispatch table. - | sd CARG1, SAVE_L - | move BASE, CARG2 - | daddiu DISPATCH, DISPATCH, GG_G2DISP - | sd CARG1, SAVE_PC // Any value outside of bytecode is ok. - | sd TMP1, SAVE_CFRAME - | sd sp, L->cframe // Add our C frame to cframe chain. - | - |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). - | sd L, DISPATCH_GL(cur_L)(DISPATCH) - | ld TMP2, L->base // TMP2 = old base (used in vmeta_call). - | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | ld TMP1, L->top - | .FPU mtc1 TMP3, TOBIT - | daddu PC, PC, BASE - | dsubu NARGS8:RC, TMP1, BASE - | li TISNUM, LJ_TISNUM - | dsubu PC, PC, TMP2 // PC = frame delta + frame type - | .FPU cvt.d.s TOBIT, TOBIT - | li_vmstate INTERP - | li TISNIL, LJ_TNIL - | st_vmstate - | - |->vm_call_dispatch: - | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC - | ld LFUNC:RB, FRAME_FUNC(BASE) - | checkfunc LFUNC:RB, ->vmeta_call - | - |->vm_call_dispatch_f: - | ins_call - | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC - | - |->vm_cpcall: // Setup protected C frame, call C. - | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) - | saveregs - | move L, CARG1 - | ld TMP0, L:CARG1->stack - | sd CARG1, SAVE_L - | ld TMP1, L->top - | ld DISPATCH, L->glref // Setup pointer to dispatch table. - | sd CARG1, SAVE_PC // Any value outside of bytecode is ok. - | dsubu TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). - | ld TMP1, L->cframe - | daddiu DISPATCH, DISPATCH, GG_G2DISP - | sw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. - | sw r0, SAVE_ERRF // No error function. - | sd TMP1, SAVE_CFRAME - | sd sp, L->cframe // Add our C frame to cframe chain. - | sd L, DISPATCH_GL(cur_L)(DISPATCH) - | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud) - |. move CFUNCADDR, CARG4 - | move BASE, CRET1 - | bnez CRET1, <3 // Else continue with the call. - |. li PC, FRAME_CP - | b ->vm_leave_cp // No base? Just remove C frame. - |. nop - | - |//----------------------------------------------------------------------- - |//-- Metamethod handling ------------------------------------------------ - |//----------------------------------------------------------------------- - | - |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the - |// stack, so BASE doesn't need to be reloaded across these calls. - | - |//-- Continuation dispatch ---------------------------------------------- - | - |->cont_dispatch: - | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8 - | ld TMP0, -32(BASE) // Continuation. - | move RB, BASE - | move BASE, TMP2 // Restore caller BASE. - | ld LFUNC:TMP1, FRAME_FUNC(TMP2) - |.if FFI - | sltiu AT, TMP0, 2 - |.endif - | ld PC, -24(RB) // Restore PC from [cont|PC]. - | cleartp LFUNC:TMP1 - | daddu TMP2, RA, RD - | ld TMP1, LFUNC:TMP1->pc - |.if FFI - | bnez AT, >1 - |.endif - |. sd TISNIL, -8(TMP2) // Ensure one valid arg. - | // BASE = base, RA = resultptr, RB = meta base - | jr TMP0 // Jump to continuation. - |. ld KBASE, PC2PROTO(k)(TMP1) - | - |.if FFI - |1: - | bnez TMP0, ->cont_ffi_callback // cont = 1: return from FFI callback. - | // cont = 0: tailcall from C function. - |. daddiu TMP1, RB, -32 - | b ->vm_call_tail - |. dsubu RC, TMP1, BASE - |.endif - | - |->cont_cat: // RA = resultptr, RB = meta base - | lw INS, -4(PC) - | daddiu CARG2, RB, -32 - | ld CRET1, 0(RA) - | decode_RB8a MULTRES, INS - | decode_RA8a RA, INS - | decode_RB8b MULTRES - | decode_RA8b RA - | daddu TMP1, BASE, MULTRES - | sd BASE, L->base - | dsubu CARG3, CARG2, TMP1 - | bne TMP1, CARG2, ->BC_CAT_Z - |. sd CRET1, 0(CARG2) - | daddu RA, BASE, RA - | b ->cont_nop - |. sd CRET1, 0(RA) - | - |//-- Table indexing metamethods ----------------------------------------- - | - |->vmeta_tgets1: - | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | li TMP0, LJ_TSTR - | settp STR:RC, TMP0 - | b >1 - |. sd STR:RC, 0(CARG3) - | - |->vmeta_tgets: - | daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv) - | li TMP0, LJ_TTAB - | li TMP1, LJ_TSTR - | settp TAB:RB, TMP0 - | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv2) - | sd TAB:RB, 0(CARG2) - | settp STR:RC, TMP1 - | b >1 - |. sd STR:RC, 0(CARG3) - | - |->vmeta_tgetb: // TMP0 = index - | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | settp TMP0, TISNUM - | sd TMP0, 0(CARG3) - | - |->vmeta_tgetv: - |1: - | load_got lj_meta_tget - | sd BASE, L->base - | sd PC, SAVE_PC - | call_intern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) - |. move CARG1, L - | // Returns TValue * (finished) or NULL (metamethod). - | beqz CRET1, >3 - |. daddiu TMP1, BASE, -FRAME_CONT - | ld CARG1, 0(CRET1) - | ins_next1 - | sd CARG1, 0(RA) - | ins_next2 - | - |3: // Call __index metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k - | ld BASE, L->top - | sd PC, -24(BASE) // [cont|PC] - | dsubu PC, BASE, TMP1 - | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. - | cleartp LFUNC:RB - | b ->vm_call_dispatch_f - |. li NARGS8:RC, 16 // 2 args for func(t, k). - | - |->vmeta_tgetr: - | load_got lj_tab_getinth - | call_intern lj_tab_getinth // (GCtab *t, int32_t key) - |. nop - | // Returns cTValue * or NULL. - | beqz CRET1, ->BC_TGETR_Z - |. move CARG2, TISNIL - | b ->BC_TGETR_Z - |. ld CARG2, 0(CRET1) - | - |//----------------------------------------------------------------------- - | - |->vmeta_tsets1: - | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | li TMP0, LJ_TSTR - | settp STR:RC, TMP0 - | b >1 - |. sd STR:RC, 0(CARG3) - | - |->vmeta_tsets: - | daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv) - | li TMP0, LJ_TTAB - | li TMP1, LJ_TSTR - | settp TAB:RB, TMP0 - | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv2) - | sd TAB:RB, 0(CARG2) - | settp STR:RC, TMP1 - | b >1 - |. sd STR:RC, 0(CARG3) - | - |->vmeta_tsetb: // TMP0 = index - | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | settp TMP0, TISNUM - | sd TMP0, 0(CARG3) - | - |->vmeta_tsetv: - |1: - | load_got lj_meta_tset - | sd BASE, L->base - | sd PC, SAVE_PC - | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) - |. move CARG1, L - | // Returns TValue * (finished) or NULL (metamethod). - | beqz CRET1, >3 - |. ld CARG1, 0(RA) - | // NOBARRIER: lj_meta_tset ensures the table is not black. - | ins_next1 - | sd CARG1, 0(CRET1) - | ins_next2 - | - |3: // Call __newindex metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) - | daddiu TMP1, BASE, -FRAME_CONT - | ld BASE, L->top - | sd PC, -24(BASE) // [cont|PC] - | dsubu PC, BASE, TMP1 - | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. - | cleartp LFUNC:RB - | sd CARG1, 16(BASE) // Copy value to third argument. - | b ->vm_call_dispatch_f - |. li NARGS8:RC, 24 // 3 args for func(t, k, v) - | - |->vmeta_tsetr: - | load_got lj_tab_setinth - | sd BASE, L->base - | sd PC, SAVE_PC - | call_intern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) - |. move CARG1, L - | // Returns TValue *. - | b ->BC_TSETR_Z - |. nop - | - |//-- Comparison metamethods --------------------------------------------- - | - |->vmeta_comp: - | // RA/RD point to o1/o2. - | move CARG2, RA - | move CARG3, RD - | load_got lj_meta_comp - | daddiu PC, PC, -4 - | sd BASE, L->base - | sd PC, SAVE_PC - | decode_OP1 CARG4, INS - | call_intern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) - |. move CARG1, L - | // Returns 0/1 or TValue * (metamethod). - |3: - | sltiu AT, CRET1, 2 - | beqz AT, ->vmeta_binop - | negu TMP2, CRET1 - |4: - | lhu RD, OFS_RD(PC) - | daddiu PC, PC, 4 - | lui TMP1, (-(BCBIAS_J*4 >> 16) & 65535) - | sll RD, RD, 2 - | addu RD, RD, TMP1 - | and RD, RD, TMP2 - | daddu PC, PC, RD - |->cont_nop: - | ins_next - | - |->cont_ra: // RA = resultptr - | lbu TMP1, -4+OFS_RA(PC) - | ld CRET1, 0(RA) - | sll TMP1, TMP1, 3 - | daddu TMP1, BASE, TMP1 - | b ->cont_nop - |. sd CRET1, 0(TMP1) - | - |->cont_condt: // RA = resultptr - | ld TMP0, 0(RA) - | gettp TMP0, TMP0 - | sltiu AT, TMP0, LJ_TISTRUECOND - | b <4 - |. negu TMP2, AT // Branch if result is true. - | - |->cont_condf: // RA = resultptr - | ld TMP0, 0(RA) - | gettp TMP0, TMP0 - | sltiu AT, TMP0, LJ_TISTRUECOND - | b <4 - |. addiu TMP2, AT, -1 // Branch if result is false. - | - |->vmeta_equal: - | // CARG1/CARG2 point to o1/o2. TMP0 is set to 0/1. - | load_got lj_meta_equal - | cleartp LFUNC:CARG3, CARG2 - | cleartp LFUNC:CARG2, CARG1 - | move CARG4, TMP0 - | daddiu PC, PC, -4 - | sd BASE, L->base - | sd PC, SAVE_PC - | call_intern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) - |. move CARG1, L - | // Returns 0/1 or TValue * (metamethod). - | b <3 - |. nop - | - |->vmeta_equal_cd: - |.if FFI - | load_got lj_meta_equal_cd - | move CARG2, INS - | daddiu PC, PC, -4 - | sd BASE, L->base - | sd PC, SAVE_PC - | call_intern lj_meta_equal_cd // (lua_State *L, BCIns op) - |. move CARG1, L - | // Returns 0/1 or TValue * (metamethod). - | b <3 - |. nop - |.endif - | - |->vmeta_istype: - | load_got lj_meta_istype - | daddiu PC, PC, -4 - | sd BASE, L->base - | srl CARG2, RA, 3 - | srl CARG3, RD, 3 - | sd PC, SAVE_PC - | call_intern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) - |. move CARG1, L - | b ->cont_nop - |. nop - | - |//-- Arithmetic metamethods --------------------------------------------- - | - |->vmeta_unm: - | move RC, RB - | - |->vmeta_arith: - | load_got lj_meta_arith - | sd BASE, L->base - | move CARG2, RA - | sd PC, SAVE_PC - | move CARG3, RB - | move CARG4, RC - | decode_OP1 CARG5, INS // CARG5 == RB. - | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) - |. move CARG1, L - | // Returns NULL (finished) or TValue * (metamethod). - | beqz CRET1, ->cont_nop - |. nop - | - | // Call metamethod for binary op. - |->vmeta_binop: - | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 - | dsubu TMP1, CRET1, BASE - | sd PC, -24(CRET1) // [cont|PC] - | move TMP2, BASE - | daddiu PC, TMP1, FRAME_CONT - | move BASE, CRET1 - | b ->vm_call_dispatch - |. li NARGS8:RC, 16 // 2 args for func(o1, o2). - | - |->vmeta_len: - | // CARG2 already set by BC_LEN. -#if LJ_52 - | move MULTRES, CARG1 -#endif - | load_got lj_meta_len - | sd BASE, L->base - | sd PC, SAVE_PC - | call_intern lj_meta_len // (lua_State *L, TValue *o) - |. move CARG1, L - | // Returns NULL (retry) or TValue * (metamethod base). -#if LJ_52 - | bnez CRET1, ->vmeta_binop // Binop call for compatibility. - |. nop - | b ->BC_LEN_Z - |. move CARG1, MULTRES -#else - | b ->vmeta_binop // Binop call for compatibility. - |. nop -#endif - | - |//-- Call metamethod ---------------------------------------------------- - | - |->vmeta_call: // Resolve and call __call metamethod. - | // TMP2 = old base, BASE = new base, RC = nargs*8 - | load_got lj_meta_call - | sd TMP2, L->base // This is the callers base! - | daddiu CARG2, BASE, -16 - | sd PC, SAVE_PC - | daddu CARG3, BASE, RC - | move MULTRES, NARGS8:RC - | call_intern lj_meta_call // (lua_State *L, TValue *func, TValue *top) - |. move CARG1, L - | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. - | daddiu NARGS8:RC, MULTRES, 8 // Got one more argument now. - | cleartp LFUNC:RB - | ins_call - | - |->vmeta_callt: // Resolve __call for BC_CALLT. - | // BASE = old base, RA = new base, RC = nargs*8 - | load_got lj_meta_call - | sd BASE, L->base - | daddiu CARG2, RA, -16 - | sd PC, SAVE_PC - | daddu CARG3, RA, RC - | move MULTRES, NARGS8:RC - | call_intern lj_meta_call // (lua_State *L, TValue *func, TValue *top) - |. move CARG1, L - | ld RB, FRAME_FUNC(RA) // Guaranteed to be a function here. - | ld TMP1, FRAME_PC(BASE) - | daddiu NARGS8:RC, MULTRES, 8 // Got one more argument now. - | b ->BC_CALLT_Z - |. cleartp LFUNC:CARG3, RB - | - |//-- Argument coercion for 'for' statement ------------------------------ - | - |->vmeta_for: - | load_got lj_meta_for - | sd BASE, L->base - | move CARG2, RA - | sd PC, SAVE_PC - | move MULTRES, INS - | call_intern lj_meta_for // (lua_State *L, TValue *base) - |. move CARG1, L - |.if JIT - | decode_OP1 TMP0, MULTRES - | li AT, BC_JFORI - |.endif - | decode_RA8a RA, MULTRES - | decode_RD8a RD, MULTRES - | decode_RA8b RA - |.if JIT - | beq TMP0, AT, =>BC_JFORI - |. decode_RD8b RD - | b =>BC_FORI - |. nop - |.else - | b =>BC_FORI - |. decode_RD8b RD - |.endif - | - |//----------------------------------------------------------------------- - |//-- Fast functions ----------------------------------------------------- - |//----------------------------------------------------------------------- - | - |.macro .ffunc, name - |->ff_ .. name: - |.endmacro - | - |.macro .ffunc_1, name - |->ff_ .. name: - | beqz NARGS8:RC, ->fff_fallback - |. ld CARG1, 0(BASE) - |.endmacro - | - |.macro .ffunc_2, name - |->ff_ .. name: - | sltiu AT, NARGS8:RC, 16 - | ld CARG1, 0(BASE) - | bnez AT, ->fff_fallback - |. ld CARG2, 8(BASE) - |.endmacro - | - |.macro .ffunc_n, name // Caveat: has delay slot! - |->ff_ .. name: - | ld CARG1, 0(BASE) - | beqz NARGS8:RC, ->fff_fallback - | // Either ldc1 or the 1st instruction of checknum is in the delay slot. - | .FPU ldc1 FARG1, 0(BASE) - | checknum CARG1, ->fff_fallback - |.endmacro - | - |.macro .ffunc_nn, name // Caveat: has delay slot! - |->ff_ .. name: - | ld CARG1, 0(BASE) - | sltiu AT, NARGS8:RC, 16 - | ld CARG2, 8(BASE) - | bnez AT, ->fff_fallback - |. gettp TMP0, CARG1 - | gettp TMP1, CARG2 - | sltiu TMP0, TMP0, LJ_TISNUM - | sltiu TMP1, TMP1, LJ_TISNUM - | .FPU ldc1 FARG1, 0(BASE) - | and TMP0, TMP0, TMP1 - | .FPU ldc1 FARG2, 8(BASE) - | beqz TMP0, ->fff_fallback - |.endmacro - | - |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1 and has delay slot! - |.macro ffgccheck - | ld TMP0, DISPATCH_GL(gc.total)(DISPATCH) - | ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) - | dsubu AT, TMP0, TMP1 - | bgezal AT, ->fff_gcstep - |.endmacro - | - |//-- Base library: checks ----------------------------------------------- - |.ffunc_1 assert - | gettp AT, CARG1 - | sltiu AT, AT, LJ_TISTRUECOND - | beqz AT, ->fff_fallback - |. daddiu RA, BASE, -16 - | ld PC, FRAME_PC(BASE) - | addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8. - | daddu TMP2, RA, RD - | daddiu TMP1, BASE, 8 - | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument. - |. sd CARG1, 0(RA) - |1: - | ld CRET1, 0(TMP1) - | sd CRET1, -16(TMP1) - | bne TMP1, TMP2, <1 - |. daddiu TMP1, TMP1, 8 - | b ->fff_res - |. nop - | - |.ffunc_1 type - | gettp TMP0, CARG1 - | sltu TMP1, TISNUM, TMP0 - | not TMP2, TMP0 - | li TMP3, ~LJ_TISNUM - | movz TMP2, TMP3, TMP1 - | dsll TMP2, TMP2, 3 - | daddu TMP2, CFUNC:RB, TMP2 - | b ->fff_restv - |. ld CARG1, CFUNC:TMP2->upvalue - | - |//-- Base library: getters and setters --------------------------------- - | - |.ffunc_1 getmetatable - | gettp TMP2, CARG1 - | daddiu TMP0, TMP2, -LJ_TTAB - | daddiu TMP1, TMP2, -LJ_TUDATA - | movn TMP0, TMP1, TMP0 - | bnez TMP0, >6 - |. cleartp TAB:CARG1 - |1: // Field metatable must be at same offset for GCtab and GCudata! - | ld TAB:RB, TAB:CARG1->metatable - |2: - | ld STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) - | beqz TAB:RB, ->fff_restv - |. li CARG1, LJ_TNIL - | lw TMP0, TAB:RB->hmask - | lw TMP1, STR:RC->hash - | ld NODE:TMP2, TAB:RB->node - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask - | dsll TMP0, TMP1, 5 - | dsll TMP1, TMP1, 3 - | dsubu TMP1, TMP0, TMP1 - | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) - | li CARG4, LJ_TSTR - | settp STR:RC, CARG4 // Tagged key to look for. - |3: // Rearranged logic, because we expect _not_ to find the key. - | ld TMP0, NODE:TMP2->key - | ld CARG1, NODE:TMP2->val - | ld NODE:TMP2, NODE:TMP2->next - | beq RC, TMP0, >5 - |. li AT, LJ_TTAB - | bnez NODE:TMP2, <3 - |. nop - |4: - | move CARG1, RB - | b ->fff_restv // Not found, keep default result. - |. settp CARG1, AT - |5: - | bne CARG1, TISNIL, ->fff_restv - |. nop - | b <4 // Ditto for nil value. - |. nop - | - |6: - | sltiu AT, TMP2, LJ_TISNUM - | movn TMP2, TISNUM, AT - | dsll TMP2, TMP2, 3 - | dsubu TMP0, DISPATCH, TMP2 - | b <2 - |. ld TAB:RB, DISPATCH_GL(gcroot[GCROOT_BASEMT])-8(TMP0) - | - |.ffunc_2 setmetatable - | // Fast path: no mt for table yet and not clearing the mt. - | checktp TMP1, CARG1, -LJ_TTAB, ->fff_fallback - | gettp TMP3, CARG2 - | ld TAB:TMP0, TAB:TMP1->metatable - | lbu TMP2, TAB:TMP1->marked - | daddiu AT, TMP3, -LJ_TTAB - | cleartp TAB:CARG2 - | or AT, AT, TAB:TMP0 - | bnez AT, ->fff_fallback - |. andi AT, TMP2, LJ_GC_BLACK // isblack(table) - | beqz AT, ->fff_restv - |. sd TAB:CARG2, TAB:TMP1->metatable - | barrierback TAB:TMP1, TMP2, TMP0, ->fff_restv - | - |.ffunc rawget - | ld CARG2, 0(BASE) - | sltiu AT, NARGS8:RC, 16 - | load_got lj_tab_get - | gettp TMP0, CARG2 - | cleartp CARG2 - | daddiu TMP0, TMP0, -LJ_TTAB - | or AT, AT, TMP0 - | bnez AT, ->fff_fallback - |. daddiu CARG3, BASE, 8 - | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) - |. move CARG1, L - | b ->fff_restv - |. ld CARG1, 0(CRET1) - | - |//-- Base library: conversions ------------------------------------------ - | - |.ffunc tonumber - | // Only handles the number case inline (without a base argument). - | ld CARG1, 0(BASE) - | xori AT, NARGS8:RC, 8 // Exactly one number argument. - | gettp TMP1, CARG1 - | sltu TMP0, TISNUM, TMP1 - | or AT, AT, TMP0 - | bnez AT, ->fff_fallback - |. nop - | b ->fff_restv - |. nop - | - |.ffunc_1 tostring - | // Only handles the string or number case inline. - | gettp TMP0, CARG1 - | daddiu AT, TMP0, -LJ_TSTR - | // A __tostring method in the string base metatable is ignored. - | beqz AT, ->fff_restv // String key? - | // Handle numbers inline, unless a number base metatable is present. - |. ld TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) - | sltu TMP0, TISNUM, TMP0 - | or TMP0, TMP0, TMP1 - | bnez TMP0, ->fff_fallback - |. sd BASE, L->base // Add frame since C call can throw. - | ffgccheck - |. sd PC, SAVE_PC // Redundant (but a defined value). - | load_got lj_strfmt_number - | move CARG1, L - | call_intern lj_strfmt_number // (lua_State *L, cTValue *o) - |. move CARG2, BASE - | // Returns GCstr *. - | li AT, LJ_TSTR - | settp CRET1, AT - | b ->fff_restv - |. move CARG1, CRET1 - | - |//-- Base library: iterators ------------------------------------------- - | - |.ffunc_1 next - | checktp CARG2, CARG1, -LJ_TTAB, ->fff_fallback - | daddu TMP2, BASE, NARGS8:RC - | sd TISNIL, 0(TMP2) // Set missing 2nd arg to nil. - | ld PC, FRAME_PC(BASE) - | load_got lj_tab_next - | sd BASE, L->base // Add frame since C call can throw. - | sd BASE, L->top // Dummy frame length is ok. - | daddiu CARG3, BASE, 8 - | sd PC, SAVE_PC - | call_intern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) - |. move CARG1, L - | // Returns 0 at end of traversal. - | beqz CRET1, ->fff_restv // End of traversal: return nil. - |. move CARG1, TISNIL - | ld TMP0, 8(BASE) - | daddiu RA, BASE, -16 - | ld TMP2, 16(BASE) - | sd TMP0, 0(RA) - | sd TMP2, 8(RA) - | b ->fff_res - |. li RD, (2+1)*8 - | - |.ffunc_1 pairs - | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback - | ld PC, FRAME_PC(BASE) -#if LJ_52 - | ld TAB:TMP2, TAB:TMP1->metatable - | ld TMP0, CFUNC:RB->upvalue[0] - | bnez TAB:TMP2, ->fff_fallback -#else - | ld TMP0, CFUNC:RB->upvalue[0] -#endif - |. daddiu RA, BASE, -16 - | sd TISNIL, 0(BASE) - | sd CARG1, -8(BASE) - | sd TMP0, 0(RA) - | b ->fff_res - |. li RD, (3+1)*8 - | - |.ffunc_2 ipairs_aux - | checktab CARG1, ->fff_fallback - | checkint CARG2, ->fff_fallback - |. lw TMP0, TAB:CARG1->asize - | ld TMP1, TAB:CARG1->array - | ld PC, FRAME_PC(BASE) - | sextw TMP2, CARG2 - | addiu TMP2, TMP2, 1 - | sltu AT, TMP2, TMP0 - | daddiu RA, BASE, -16 - | zextw TMP0, TMP2 - | settp TMP0, TISNUM - | beqz AT, >2 // Not in array part? - |. sd TMP0, 0(RA) - | dsll TMP3, TMP2, 3 - | daddu TMP3, TMP1, TMP3 - | ld TMP1, 0(TMP3) - |1: - | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results. - |. li RD, (0+1)*8 - | sd TMP1, -8(BASE) - | b ->fff_res - |. li RD, (2+1)*8 - |2: // Check for empty hash part first. Otherwise call C function. - | lw TMP0, TAB:CARG1->hmask - | load_got lj_tab_getinth - | beqz TMP0, ->fff_res - |. li RD, (0+1)*8 - | call_intern lj_tab_getinth // (GCtab *t, int32_t key) - |. move CARG2, TMP2 - | // Returns cTValue * or NULL. - | beqz CRET1, ->fff_res - |. li RD, (0+1)*8 - | b <1 - |. ld TMP1, 0(CRET1) - | - |.ffunc_1 ipairs - | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback - | ld PC, FRAME_PC(BASE) -#if LJ_52 - | ld TAB:TMP2, TAB:TMP1->metatable - | ld CFUNC:TMP0, CFUNC:RB->upvalue[0] - | bnez TAB:TMP2, ->fff_fallback -#else - | ld TMP0, CFUNC:RB->upvalue[0] -#endif - | daddiu RA, BASE, -16 - | dsll AT, TISNUM, 47 - | sd CARG1, -8(BASE) - | sd AT, 0(BASE) - | sd CFUNC:TMP0, 0(RA) - | b ->fff_res - |. li RD, (3+1)*8 - | - |//-- Base library: catch errors ---------------------------------------- - | - |.ffunc pcall - | daddiu NARGS8:RC, NARGS8:RC, -8 - | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | bltz NARGS8:RC, ->fff_fallback - |. move TMP2, BASE - | daddiu BASE, BASE, 16 - | // Remember active hook before pcall. - | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT - | andi TMP3, TMP3, 1 - | daddiu PC, TMP3, 16+FRAME_PCALL - | beqz NARGS8:RC, ->vm_call_dispatch - |1: - |. daddu TMP0, BASE, NARGS8:RC - |2: - | ld TMP1, -16(TMP0) - | sd TMP1, -8(TMP0) - | daddiu TMP0, TMP0, -8 - | bne TMP0, BASE, <2 - |. nop - | b ->vm_call_dispatch - |. nop - | - |.ffunc xpcall - | daddiu NARGS8:RC, NARGS8:RC, -16 - | ld CARG1, 0(BASE) - | ld CARG2, 8(BASE) - | bltz NARGS8:RC, ->fff_fallback - |. lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH) - | gettp AT, CARG2 - | daddiu AT, AT, -LJ_TFUNC - | bnez AT, ->fff_fallback // Traceback must be a function. - |. move TMP2, BASE - | daddiu BASE, BASE, 24 - | // Remember active hook before pcall. - | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT - | sd CARG2, 0(TMP2) // Swap function and traceback. - | andi TMP3, TMP3, 1 - | sd CARG1, 8(TMP2) - | beqz NARGS8:RC, ->vm_call_dispatch - |. daddiu PC, TMP3, 24+FRAME_PCALL - | b <1 - |. nop - | - |//-- Coroutine library -------------------------------------------------- - | - |.macro coroutine_resume_wrap, resume - |.if resume - |.ffunc_1 coroutine_resume - | checktp CARG1, CARG1, -LJ_TTHREAD, ->fff_fallback - |.else - |.ffunc coroutine_wrap_aux - | ld L:CARG1, CFUNC:RB->upvalue[0].gcr - | cleartp L:CARG1 - |.endif - | lbu TMP0, L:CARG1->status - | ld TMP1, L:CARG1->cframe - | ld CARG2, L:CARG1->top - | ld TMP2, L:CARG1->base - | addiu AT, TMP0, -LUA_YIELD - | daddu CARG3, CARG2, TMP0 - | daddiu TMP3, CARG2, 8 - | bgtz AT, ->fff_fallback // st > LUA_YIELD? - |. movn CARG2, TMP3, AT - | xor TMP2, TMP2, CARG3 - | bnez TMP1, ->fff_fallback // cframe != 0? - |. or AT, TMP2, TMP0 - | ld TMP0, L:CARG1->maxstack - | beqz AT, ->fff_fallback // base == top && st == 0? - |. ld PC, FRAME_PC(BASE) - | daddu TMP2, CARG2, NARGS8:RC - | sltu AT, TMP0, TMP2 - | bnez AT, ->fff_fallback // Stack overflow? - |. sd PC, SAVE_PC - | sd BASE, L->base - |1: - |.if resume - | daddiu BASE, BASE, 8 // Keep resumed thread in stack for GC. - | daddiu NARGS8:RC, NARGS8:RC, -8 - | daddiu TMP2, TMP2, -8 - |.endif - | sd TMP2, L:CARG1->top - | daddu TMP1, BASE, NARGS8:RC - | move CARG3, CARG2 - | sd BASE, L->top - |2: // Move args to coroutine. - | ld CRET1, 0(BASE) - | sltu AT, BASE, TMP1 - | beqz AT, >3 - |. daddiu BASE, BASE, 8 - | sd CRET1, 0(CARG3) - | b <2 - |. daddiu CARG3, CARG3, 8 - |3: - | bal ->vm_resume // (lua_State *L, TValue *base, 0, 0) - |. move L:RA, L:CARG1 - | // Returns thread status. - |4: - | ld TMP2, L:RA->base - | sltiu AT, CRET1, LUA_YIELD+1 - | ld TMP3, L:RA->top - | li_vmstate INTERP - | ld BASE, L->base - | sd L, DISPATCH_GL(cur_L)(DISPATCH) - | st_vmstate - | beqz AT, >8 - |. dsubu RD, TMP3, TMP2 - | ld TMP0, L->maxstack - | beqz RD, >6 // No results? - |. daddu TMP1, BASE, RD - | sltu AT, TMP0, TMP1 - | bnez AT, >9 // Need to grow stack? - |. daddu TMP3, TMP2, RD - | sd TMP2, L:RA->top // Clear coroutine stack. - | move TMP1, BASE - |5: // Move results from coroutine. - | ld CRET1, 0(TMP2) - | daddiu TMP2, TMP2, 8 - | sltu AT, TMP2, TMP3 - | sd CRET1, 0(TMP1) - | bnez AT, <5 - |. daddiu TMP1, TMP1, 8 - |6: - | andi TMP0, PC, FRAME_TYPE - |.if resume - | mov_true TMP1 - | daddiu RA, BASE, -8 - | sd TMP1, -8(BASE) // Prepend true to results. - | daddiu RD, RD, 16 - |.else - | move RA, BASE - | daddiu RD, RD, 8 - |.endif - |7: - | sd PC, SAVE_PC - | beqz TMP0, ->BC_RET_Z - |. move MULTRES, RD - | b ->vm_return - |. nop - | - |8: // Coroutine returned with error (at co->top-1). - |.if resume - | daddiu TMP3, TMP3, -8 - | mov_false TMP1 - | ld CRET1, 0(TMP3) - | sd TMP3, L:RA->top // Remove error from coroutine stack. - | li RD, (2+1)*8 - | sd TMP1, -8(BASE) // Prepend false to results. - | daddiu RA, BASE, -8 - | sd CRET1, 0(BASE) // Copy error message. - | b <7 - |. andi TMP0, PC, FRAME_TYPE - |.else - | load_got lj_ffh_coroutine_wrap_err - | move CARG2, L:RA - | call_intern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) - |. move CARG1, L - |.endif - | - |9: // Handle stack expansion on return from yield. - | load_got lj_state_growstack - | srl CARG2, RD, 3 - | call_intern lj_state_growstack // (lua_State *L, int n) - |. move CARG1, L - | b <4 - |. li CRET1, 0 - |.endmacro - | - | coroutine_resume_wrap 1 // coroutine.resume - | coroutine_resume_wrap 0 // coroutine.wrap - | - |.ffunc coroutine_yield - | ld TMP0, L->cframe - | daddu TMP1, BASE, NARGS8:RC - | sd BASE, L->base - | andi TMP0, TMP0, CFRAME_RESUME - | sd TMP1, L->top - | beqz TMP0, ->fff_fallback - |. li CRET1, LUA_YIELD - | sd r0, L->cframe - | b ->vm_leave_unw - |. sb CRET1, L->status - | - |//-- Math library ------------------------------------------------------- - | - |.ffunc_1 math_abs - | gettp CARG2, CARG1 - | daddiu AT, CARG2, -LJ_TISNUM - | bnez AT, >1 - |. sextw TMP1, CARG1 - | sra TMP0, TMP1, 31 // Extract sign. - | xor TMP1, TMP1, TMP0 - | dsubu CARG1, TMP1, TMP0 - | dsll TMP3, CARG1, 32 - | bgez TMP3, ->fff_restv - |. settp CARG1, TISNUM - | li CARG1, 0x41e0 // 2^31 as a double. - | b ->fff_restv - |. dsll CARG1, CARG1, 48 - |1: - | sltiu AT, CARG2, LJ_TISNUM - | beqz AT, ->fff_fallback - |. dextm CARG1, CARG1, 0, 30 - |// fallthrough - | - |->fff_restv: - | // CARG1 = TValue result. - | ld PC, FRAME_PC(BASE) - | daddiu RA, BASE, -16 - | sd CARG1, -16(BASE) - |->fff_res1: - | // RA = results, PC = return. - | li RD, (1+1)*8 - |->fff_res: - | // RA = results, RD = (nresults+1)*8, PC = return. - | andi TMP0, PC, FRAME_TYPE - | bnez TMP0, ->vm_return - |. move MULTRES, RD - | lw INS, -4(PC) - | decode_RB8a RB, INS - | decode_RB8b RB - |5: - | sltu AT, RD, RB - | bnez AT, >6 // More results expected? - |. decode_RA8a TMP0, INS - | decode_RA8b TMP0 - | ins_next1 - | // Adjust BASE. KBASE is assumed to be set for the calling frame. - | dsubu BASE, RA, TMP0 - | ins_next2 - | - |6: // Fill up results with nil. - | daddu TMP1, RA, RD - | daddiu RD, RD, 8 - | b <5 - |. sd TISNIL, -8(TMP1) - | - |.macro math_extern, func - | .ffunc_n math_ .. func - | load_got func - | call_extern - |. nop - | b ->fff_resn - |. nop - |.endmacro - | - |.macro math_extern2, func - | .ffunc_nn math_ .. func - |. load_got func - | call_extern - |. nop - | b ->fff_resn - |. nop - |.endmacro - | - |// TODO: Return integer type if result is integer (own sf implementation). - |.macro math_round, func - |->ff_math_ .. func: - | ld CARG1, 0(BASE) - | beqz NARGS8:RC, ->fff_fallback - |. gettp TMP0, CARG1 - | beq TMP0, TISNUM, ->fff_restv - |. sltu AT, TMP0, TISNUM - | beqz AT, ->fff_fallback - |.if FPU - |. ldc1 FARG1, 0(BASE) - | bal ->vm_ .. func - |. nop - |.else - |. load_got func - | call_extern - |. nop - |.endif - | b ->fff_resn - |. nop - |.endmacro - | - | math_round floor - | math_round ceil - | - |.ffunc math_log - | li AT, 8 - | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. - |. ld CARG1, 0(BASE) - | checknum CARG1, ->fff_fallback - |. load_got log - |.if FPU - | call_extern - |. ldc1 FARG1, 0(BASE) - |.else - | call_extern - |. nop - |.endif - | b ->fff_resn - |. nop - | - | math_extern log10 - | math_extern exp - | math_extern sin - | math_extern cos - | math_extern tan - | math_extern asin - | math_extern acos - | math_extern atan - | math_extern sinh - | math_extern cosh - | math_extern tanh - | math_extern2 pow - | math_extern2 atan2 - | math_extern2 fmod - | - |.if FPU - |.ffunc_n math_sqrt - |. sqrt.d FRET1, FARG1 - |// fallthrough to ->fff_resn - |.else - | math_extern sqrt - |.endif - | - |->fff_resn: - | ld PC, FRAME_PC(BASE) - | daddiu RA, BASE, -16 - | b ->fff_res1 - |.if FPU - |. sdc1 FRET1, 0(RA) - |.else - |. sd CRET1, 0(RA) - |.endif - | - | - |.ffunc_2 math_ldexp - | checknum CARG1, ->fff_fallback - | checkint CARG2, ->fff_fallback - |. load_got ldexp - | .FPU ldc1 FARG1, 0(BASE) - | call_extern - |. lw CARG2, 8+LO(BASE) - | b ->fff_resn - |. nop - | - |.ffunc_n math_frexp - | load_got frexp - | ld PC, FRAME_PC(BASE) - | call_extern - |. daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv) - | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH) - | daddiu RA, BASE, -16 - |.if FPU - | mtc1 TMP1, FARG2 - | sdc1 FRET1, 0(RA) - | cvt.d.w FARG2, FARG2 - | sdc1 FARG2, 8(RA) - |.else - | sd CRET1, 0(RA) - | zextw TMP1, TMP1 - | settp TMP1, TISNUM - | sd TMP1, 8(RA) - |.endif - | b ->fff_res - |. li RD, (2+1)*8 - | - |.ffunc_n math_modf - | load_got modf - | ld PC, FRAME_PC(BASE) - | call_extern - |. daddiu CARG2, BASE, -16 - | daddiu RA, BASE, -16 - |.if FPU - | sdc1 FRET1, -8(BASE) - |.else - | sd CRET1, -8(BASE) - |.endif - | b ->fff_res - |. li RD, (2+1)*8 - | - |.macro math_minmax, name, intins, fpins - | .ffunc_1 name - | daddu TMP3, BASE, NARGS8:RC - | checkint CARG1, >5 - |. daddiu TMP2, BASE, 8 - |1: // Handle integers. - | beq TMP2, TMP3, ->fff_restv - |. ld CARG2, 0(TMP2) - | checkint CARG2, >3 - |. sextw CARG1, CARG1 - | lw CARG2, LO(TMP2) - |. slt AT, CARG1, CARG2 - | intins CARG1, CARG2, AT - | daddiu TMP2, TMP2, 8 - | zextw CARG1, CARG1 - | b <1 - |. settp CARG1, TISNUM - | - |3: // Convert intermediate result to number and continue with number loop. - | checknum CARG2, ->fff_fallback - |.if FPU - |. mtc1 CARG1, FRET1 - | cvt.d.w FRET1, FRET1 - | b >7 - |. ldc1 FARG1, 0(TMP2) - |.else - |. nop - | bal ->vm_sfi2d_1 - |. nop - | b >7 - |. nop - |.endif - | - |5: - | .FPU ldc1 FRET1, 0(BASE) - | checknum CARG1, ->fff_fallback - |6: // Handle numbers. - |. ld CARG2, 0(TMP2) - | beq TMP2, TMP3, ->fff_resn - |.if FPU - | ldc1 FARG1, 0(TMP2) - |.else - | move CRET1, CARG1 - |.endif - | checknum CARG2, >8 - |. nop - |7: - |.if FPU - | c.olt.d FRET1, FARG1 - | fpins FRET1, FARG1 - |.else - | bal ->vm_sfcmpolt - |. nop - | intins CARG1, CARG2, CRET1 - |.endif - | b <6 - |. daddiu TMP2, TMP2, 8 - | - |8: // Convert integer to number and continue with number loop. - | checkint CARG2, ->fff_fallback - |.if FPU - |. lwc1 FARG1, LO(TMP2) - | b <7 - |. cvt.d.w FARG1, FARG1 - |.else - |. lw CARG2, LO(TMP2) - | bal ->vm_sfi2d_2 - |. nop - | b <7 - |. nop - |.endif - | - |.endmacro - | - | math_minmax math_min, movz, movf.d - | math_minmax math_max, movn, movt.d - | - |//-- String library ----------------------------------------------------- - | - |.ffunc string_byte // Only handle the 1-arg case here. - | ld CARG1, 0(BASE) - | gettp TMP0, CARG1 - | xori AT, NARGS8:RC, 8 - | daddiu TMP0, TMP0, -LJ_TSTR - | or AT, AT, TMP0 - | bnez AT, ->fff_fallback // Need exactly 1 string argument. - |. cleartp STR:CARG1 - | lw TMP0, STR:CARG1->len - | daddiu RA, BASE, -16 - | ld PC, FRAME_PC(BASE) - | sltu RD, r0, TMP0 - | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end). - | addiu RD, RD, 1 - | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8 - | settp TMP1, TISNUM - | b ->fff_res - |. sd TMP1, 0(RA) - | - |.ffunc string_char // Only handle the 1-arg case here. - | ffgccheck - |. nop - | ld CARG1, 0(BASE) - | gettp TMP0, CARG1 - | xori AT, NARGS8:RC, 8 // Exactly 1 argument. - | daddiu TMP0, TMP0, -LJ_TISNUM // Integer. - | li TMP1, 255 - | sextw CARG1, CARG1 - | or AT, AT, TMP0 - | sltu TMP1, TMP1, CARG1 // !(255 < n). - | or AT, AT, TMP1 - | bnez AT, ->fff_fallback - |. li CARG3, 1 - | daddiu CARG2, sp, TMPD_OFS - | sb CARG1, TMPD - |->fff_newstr: - | load_got lj_str_new - | sd BASE, L->base - | sd PC, SAVE_PC - | call_intern lj_str_new // (lua_State *L, char *str, size_t l) - |. move CARG1, L - | // Returns GCstr *. - | ld BASE, L->base - |->fff_resstr: - | li AT, LJ_TSTR - | settp CRET1, AT - | b ->fff_restv - |. move CARG1, CRET1 - | - |.ffunc string_sub - | ffgccheck - |. nop - | addiu AT, NARGS8:RC, -16 - | ld TMP0, 0(BASE) - | bltz AT, ->fff_fallback - |. gettp TMP3, TMP0 - | cleartp STR:CARG1, TMP0 - | ld CARG2, 8(BASE) - | beqz AT, >1 - |. li CARG4, -1 - | ld CARG3, 16(BASE) - | checkint CARG3, ->fff_fallback - |. sextw CARG4, CARG3 - |1: - | checkint CARG2, ->fff_fallback - |. li AT, LJ_TSTR - | bne TMP3, AT, ->fff_fallback - |. sextw CARG3, CARG2 - | lw CARG2, STR:CARG1->len - | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end - | slt AT, CARG4, r0 - | addiu TMP0, CARG2, 1 - | addu TMP1, CARG4, TMP0 - | slt TMP3, CARG3, r0 - | movn CARG4, TMP1, AT // if (end < 0) end += len+1 - | addu TMP1, CARG3, TMP0 - | movn CARG3, TMP1, TMP3 // if (start < 0) start += len+1 - | li TMP2, 1 - | slt AT, CARG4, r0 - | slt TMP3, r0, CARG3 - | movn CARG4, r0, AT // if (end < 0) end = 0 - | movz CARG3, TMP2, TMP3 // if (start < 1) start = 1 - | slt AT, CARG2, CARG4 - | movn CARG4, CARG2, AT // if (end > len) end = len - | daddu CARG2, STR:CARG1, CARG3 - | subu CARG3, CARG4, CARG3 // len = end - start - | daddiu CARG2, CARG2, sizeof(GCstr)-1 - | bgez CARG3, ->fff_newstr - |. addiu CARG3, CARG3, 1 // len++ - |->fff_emptystr: // Return empty string. - | li AT, LJ_TSTR - | daddiu STR:CARG1, DISPATCH, DISPATCH_GL(strempty) - | b ->fff_restv - |. settp CARG1, AT - | - |.macro ffstring_op, name - | .ffunc string_ .. name - | ffgccheck - |. nop - | beqz NARGS8:RC, ->fff_fallback - |. ld CARG2, 0(BASE) - | checkstr STR:CARG2, ->fff_fallback - | daddiu SBUF:CARG1, DISPATCH, DISPATCH_GL(tmpbuf) - | load_got lj_buf_putstr_ .. name - | ld TMP0, SBUF:CARG1->b - | sd L, SBUF:CARG1->L - | sd BASE, L->base - | sd TMP0, SBUF:CARG1->p - | call_intern extern lj_buf_putstr_ .. name - |. sd PC, SAVE_PC - | load_got lj_buf_tostr - | call_intern lj_buf_tostr - |. move SBUF:CARG1, SBUF:CRET1 - | b ->fff_resstr - |. ld BASE, L->base - |.endmacro - | - |ffstring_op reverse - |ffstring_op lower - |ffstring_op upper - | - |//-- Bit library -------------------------------------------------------- - | - |->vm_tobit_fb: - | beqz TMP1, ->fff_fallback - |.if FPU - |. ldc1 FARG1, 0(BASE) - | add.d FARG1, FARG1, TOBIT - | mfc1 CRET1, FARG1 - | jr ra - |. zextw CRET1, CRET1 - |.else - |// FP number to bit conversion for soft-float. - |->vm_tobit: - | dsll TMP0, CARG1, 1 - | li CARG3, 1076 - | dsrl AT, TMP0, 53 - | dsubu CARG3, CARG3, AT - | sltiu AT, CARG3, 54 - | beqz AT, >1 - |. dextm TMP0, TMP0, 0, 20 - | dinsu TMP0, AT, 21, 21 - | slt AT, CARG1, r0 - | dsrlv CRET1, TMP0, CARG3 - | dsubu TMP0, r0, CRET1 - | movn CRET1, TMP0, AT - | jr ra - |. zextw CRET1, CRET1 - |1: - | jr ra - |. move CRET1, r0 - |.endif - | - |.macro .ffunc_bit, name - | .ffunc_1 bit_..name - | gettp TMP0, CARG1 - | beq TMP0, TISNUM, >6 - |. zextw CRET1, CARG1 - | bal ->vm_tobit_fb - |. sltiu TMP1, TMP0, LJ_TISNUM - |6: - |.endmacro - | - |.macro .ffunc_bit_op, name, bins - | .ffunc_bit name - | daddiu TMP2, BASE, 8 - | daddu TMP3, BASE, NARGS8:RC - |1: - | beq TMP2, TMP3, ->fff_resi - |. ld CARG1, 0(TMP2) - | gettp TMP0, CARG1 - |.if FPU - | bne TMP0, TISNUM, >2 - |. daddiu TMP2, TMP2, 8 - | zextw CARG1, CARG1 - | b <1 - |. bins CRET1, CRET1, CARG1 - |2: - | ldc1 FARG1, -8(TMP2) - | sltiu AT, TMP0, LJ_TISNUM - | beqz AT, ->fff_fallback - |. add.d FARG1, FARG1, TOBIT - | mfc1 CARG1, FARG1 - | zextw CARG1, CARG1 - | b <1 - |. bins CRET1, CRET1, CARG1 - |.else - | beq TMP0, TISNUM, >2 - |. move CRET2, CRET1 - | bal ->vm_tobit_fb - |. sltiu TMP1, TMP0, LJ_TISNUM - | move CARG1, CRET2 - |2: - | zextw CARG1, CARG1 - | bins CRET1, CRET1, CARG1 - | b <1 - |. daddiu TMP2, TMP2, 8 - |.endif - |.endmacro - | - |.ffunc_bit_op band, and - |.ffunc_bit_op bor, or - |.ffunc_bit_op bxor, xor - | - |.ffunc_bit bswap - | dsrl TMP0, CRET1, 8 - | dsrl TMP1, CRET1, 24 - | andi TMP2, TMP0, 0xff00 - | dins TMP1, CRET1, 24, 31 - | dins TMP2, TMP0, 16, 23 - | b ->fff_resi - |. or CRET1, TMP1, TMP2 - | - |.ffunc_bit bnot - | not CRET1, CRET1 - | b ->fff_resi - |. zextw CRET1, CRET1 - | - |.macro .ffunc_bit_sh, name, shins, shmod - | .ffunc_2 bit_..name - | gettp TMP0, CARG1 - | beq TMP0, TISNUM, >1 - |. nop - | bal ->vm_tobit_fb - |. sltiu TMP1, TMP0, LJ_TISNUM - | move CARG1, CRET1 - |1: - | gettp TMP0, CARG2 - | bne TMP0, TISNUM, ->fff_fallback - |. zextw CARG2, CARG2 - | sextw CARG1, CARG1 - |.if shmod == 1 - | negu CARG2, CARG2 - |.endif - | shins CRET1, CARG1, CARG2 - | b ->fff_resi - |. zextw CRET1, CRET1 - |.endmacro - | - |.ffunc_bit_sh lshift, sllv, 0 - |.ffunc_bit_sh rshift, srlv, 0 - |.ffunc_bit_sh arshift, srav, 0 - |.ffunc_bit_sh rol, rotrv, 1 - |.ffunc_bit_sh ror, rotrv, 0 - | - |.ffunc_bit tobit - |->fff_resi: - | ld PC, FRAME_PC(BASE) - | daddiu RA, BASE, -16 - | settp CRET1, TISNUM - | b ->fff_res1 - |. sd CRET1, -16(BASE) - | - |//----------------------------------------------------------------------- - |->fff_fallback: // Call fast function fallback handler. - | // BASE = new base, RB = CFUNC, RC = nargs*8 - | ld TMP3, CFUNC:RB->f - | daddu TMP1, BASE, NARGS8:RC - | ld PC, FRAME_PC(BASE) // Fallback may overwrite PC. - | daddiu TMP0, TMP1, 8*LUA_MINSTACK - | ld TMP2, L->maxstack - | sd PC, SAVE_PC // Redundant (but a defined value). - | sltu AT, TMP2, TMP0 - | sd BASE, L->base - | sd TMP1, L->top - | bnez AT, >5 // Need to grow stack. - |. move CFUNCADDR, TMP3 - | jalr TMP3 // (lua_State *L) - |. move CARG1, L - | // Either throws an error, or recovers and returns -1, 0 or nresults+1. - | ld BASE, L->base - | sll RD, CRET1, 3 - | bgtz CRET1, ->fff_res // Returned nresults+1? - |. daddiu RA, BASE, -16 - |1: // Returned 0 or -1: retry fast path. - | ld LFUNC:RB, FRAME_FUNC(BASE) - | ld TMP0, L->top - | cleartp LFUNC:RB - | bnez CRET1, ->vm_call_tail // Returned -1? - |. dsubu NARGS8:RC, TMP0, BASE - | ins_callt // Returned 0: retry fast path. - | - |// Reconstruct previous base for vmeta_call during tailcall. - |->vm_call_tail: - | andi TMP0, PC, FRAME_TYPE - | li AT, -4 - | bnez TMP0, >3 - |. and TMP1, PC, AT - | lbu TMP1, OFS_RA(PC) - | sll TMP1, TMP1, 3 - | addiu TMP1, TMP1, 16 - |3: - | b ->vm_call_dispatch // Resolve again for tailcall. - |. dsubu TMP2, BASE, TMP1 - | - |5: // Grow stack for fallback handler. - | load_got lj_state_growstack - | li CARG2, LUA_MINSTACK - | call_intern lj_state_growstack // (lua_State *L, int n) - |. move CARG1, L - | ld BASE, L->base - | b <1 - |. li CRET1, 0 // Force retry. - | - |->fff_gcstep: // Call GC step function. - | // BASE = new base, RC = nargs*8 - | move MULTRES, ra - | load_got lj_gc_step - | sd BASE, L->base - | daddu TMP0, BASE, NARGS8:RC - | sd PC, SAVE_PC // Redundant (but a defined value). - | sd TMP0, L->top - | call_intern lj_gc_step // (lua_State *L) - |. move CARG1, L - | ld BASE, L->base - | move ra, MULTRES - | ld TMP0, L->top - | ld CFUNC:RB, FRAME_FUNC(BASE) - | cleartp CFUNC:RB - | jr ra - |. dsubu NARGS8:RC, TMP0, BASE - | - |//----------------------------------------------------------------------- - |//-- Special dispatch targets ------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_record: // Dispatch target for recording phase. - |.if JIT - | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | andi AT, TMP3, HOOK_VMEVENT // No recording while in vmevent. - | bnez AT, >5 - | // Decrement the hookcount for consistency, but always do the call. - |. lw TMP2, DISPATCH_GL(hookcount)(DISPATCH) - | andi AT, TMP3, HOOK_ACTIVE - | bnez AT, >1 - |. addiu TMP2, TMP2, -1 - | andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT - | beqz AT, >1 - |. nop - | b >1 - |. sw TMP2, DISPATCH_GL(hookcount)(DISPATCH) - |.endif - | - |->vm_rethook: // Dispatch target for return hooks. - | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | andi AT, TMP3, HOOK_ACTIVE // Hook already active? - | beqz AT, >1 - |5: // Re-dispatch to static ins. - |. ld AT, GG_DISP2STATIC(TMP0) // Assumes TMP0 holds DISPATCH+OP*4. - | jr AT - |. nop - | - |->vm_inshook: // Dispatch target for instr/line hooks. - | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | lw TMP2, DISPATCH_GL(hookcount)(DISPATCH) - | andi AT, TMP3, HOOK_ACTIVE // Hook already active? - | bnez AT, <5 - |. andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT - | beqz AT, <5 - |. addiu TMP2, TMP2, -1 - | beqz TMP2, >1 - |. sw TMP2, DISPATCH_GL(hookcount)(DISPATCH) - | andi AT, TMP3, LUA_MASKLINE - | beqz AT, <5 - |1: - |. load_got lj_dispatch_ins - | sw MULTRES, SAVE_MULTRES - | move CARG2, PC - | sd BASE, L->base - | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. - | call_intern lj_dispatch_ins // (lua_State *L, const BCIns *pc) - |. move CARG1, L - |3: - | ld BASE, L->base - |4: // Re-dispatch to static ins. - | lw INS, -4(PC) - | decode_OP8a TMP1, INS - | decode_OP8b TMP1 - | daddu TMP0, DISPATCH, TMP1 - | decode_RD8a RD, INS - | ld AT, GG_DISP2STATIC(TMP0) - | decode_RA8a RA, INS - | decode_RD8b RD - | jr AT - | decode_RA8b RA - | - |->cont_hook: // Continue from hook yield. - | daddiu PC, PC, 4 - | b <4 - |. lw MULTRES, -24+LO(RB) // Restore MULTRES for *M ins. - | - |->vm_hotloop: // Hot loop counter underflow. - |.if JIT - | ld LFUNC:TMP1, FRAME_FUNC(BASE) - | daddiu CARG1, DISPATCH, GG_DISP2J - | cleartp LFUNC:TMP1 - | sd PC, SAVE_PC - | ld TMP1, LFUNC:TMP1->pc - | move CARG2, PC - | sd L, DISPATCH_J(L)(DISPATCH) - | lbu TMP1, PC2PROTO(framesize)(TMP1) - | load_got lj_trace_hot - | sd BASE, L->base - | dsll TMP1, TMP1, 3 - | daddu TMP1, BASE, TMP1 - | call_intern lj_trace_hot // (jit_State *J, const BCIns *pc) - |. sd TMP1, L->top - | b <3 - |. nop - |.endif - | - | - |->vm_callhook: // Dispatch target for call hooks. - |.if JIT - | b >1 - |.endif - |. move CARG2, PC - | - |->vm_hotcall: // Hot call counter underflow. - |.if JIT - | ori CARG2, PC, 1 - |1: - |.endif - | load_got lj_dispatch_call - | daddu TMP0, BASE, RC - | sd PC, SAVE_PC - | sd BASE, L->base - | dsubu RA, RA, BASE - | sd TMP0, L->top - | call_intern lj_dispatch_call // (lua_State *L, const BCIns *pc) - |. move CARG1, L - | // Returns ASMFunction. - | ld BASE, L->base - | ld TMP0, L->top - | sd r0, SAVE_PC // Invalidate for subsequent line hook. - | dsubu NARGS8:RC, TMP0, BASE - | daddu RA, BASE, RA - | ld LFUNC:RB, FRAME_FUNC(BASE) - | cleartp LFUNC:RB - | jr CRET1 - |. lw INS, -4(PC) - | - |->cont_stitch: // Trace stitching. - |.if JIT - | // RA = resultptr, RB = meta base - | lw INS, -4(PC) - | ld TRACE:TMP2, -40(RB) // Save previous trace. - | decode_RA8a RC, INS - | daddiu AT, MULTRES, -8 - | cleartp TRACE:TMP2 - | decode_RA8b RC - | beqz AT, >2 - |. daddu RC, BASE, RC // Call base. - |1: // Move results down. - | ld CARG1, 0(RA) - | daddiu AT, AT, -8 - | daddiu RA, RA, 8 - | sd CARG1, 0(RC) - | bnez AT, <1 - |. daddiu RC, RC, 8 - |2: - | decode_RA8a RA, INS - | decode_RB8a RB, INS - | decode_RA8b RA - | decode_RB8b RB - | daddu RA, RA, RB - | daddu RA, BASE, RA - |3: - | sltu AT, RC, RA - | bnez AT, >9 // More results wanted? - |. nop - | - | lhu TMP3, TRACE:TMP2->traceno - | lhu RD, TRACE:TMP2->link - | beq RD, TMP3, ->cont_nop // Blacklisted. - |. load_got lj_dispatch_stitch - | bnez RD, =>BC_JLOOP // Jump to stitched trace. - |. sll RD, RD, 3 - | - | // Stitch a new trace to the previous trace. - | sw TMP3, DISPATCH_J(exitno)(DISPATCH) - | sd L, DISPATCH_J(L)(DISPATCH) - | sd BASE, L->base - | daddiu CARG1, DISPATCH, GG_DISP2J - | call_intern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) - |. move CARG2, PC - | b ->cont_nop - |. ld BASE, L->base - | - |9: - | sd TISNIL, 0(RC) - | b <3 - |. daddiu RC, RC, 8 - |.endif - | - |->vm_profhook: // Dispatch target for profiler hook. -#if LJ_HASPROFILE - | load_got lj_dispatch_profile - | sd MULTRES, SAVE_MULTRES - | move CARG2, PC - | sd BASE, L->base - | call_intern lj_dispatch_profile // (lua_State *L, const BCIns *pc) - |. move CARG1, L - | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. - | daddiu PC, PC, -4 - | b ->cont_nop - |. ld BASE, L->base -#endif - | - |//----------------------------------------------------------------------- - |//-- Trace exit handler ------------------------------------------------- - |//----------------------------------------------------------------------- - | - |.macro savex_, a, b - |.if FPU - | sdc1 f..a, a*8(sp) - | sdc1 f..b, b*8(sp) - | sd r..a, 32*8+a*8(sp) - | sd r..b, 32*8+b*8(sp) - |.else - | sd r..a, a*8(sp) - | sd r..b, b*8(sp) - |.endif - |.endmacro - | - |->vm_exit_handler: - |.if JIT - |.if FPU - | daddiu sp, sp, -(32*8+32*8) - |.else - | daddiu sp, sp, -(32*8) - |.endif - | savex_ 0, 1 - | savex_ 2, 3 - | savex_ 4, 5 - | savex_ 6, 7 - | savex_ 8, 9 - | savex_ 10, 11 - | savex_ 12, 13 - | savex_ 14, 15 - | savex_ 16, 17 - | savex_ 18, 19 - | savex_ 20, 21 - | savex_ 22, 23 - | savex_ 24, 25 - | savex_ 26, 27 - | savex_ 28, 30 - |.if FPU - | sdc1 f29, 29*8(sp) - | sdc1 f31, 31*8(sp) - | sd r0, 32*8+31*8(sp) // Clear RID_TMP. - | daddiu TMP2, sp, 32*8+32*8 // Recompute original value of sp. - | sd TMP2, 32*8+29*8(sp) // Store sp in RID_SP - |.else - | sd r0, 31*8(sp) // Clear RID_TMP. - | daddiu TMP2, sp, 32*8 // Recompute original value of sp. - | sd TMP2, 29*8(sp) // Store sp in RID_SP - |.endif - | li_vmstate EXIT - | daddiu DISPATCH, JGL, -GG_DISP2G-32768 - | lw TMP1, 0(TMP2) // Load exit number. - | st_vmstate - | ld L, DISPATCH_GL(cur_L)(DISPATCH) - | ld BASE, DISPATCH_GL(jit_base)(DISPATCH) - | load_got lj_trace_exit - | sd L, DISPATCH_J(L)(DISPATCH) - | sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number. - | sd BASE, L->base - | sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number. - | daddiu CARG1, DISPATCH, GG_DISP2J - | sd r0, DISPATCH_GL(jit_base)(DISPATCH) - | call_intern lj_trace_exit // (jit_State *J, ExitState *ex) - |. move CARG2, sp - | // Returns MULTRES (unscaled) or negated error code. - | ld TMP1, L->cframe - | li AT, -4 - | ld BASE, L->base - | and sp, TMP1, AT - | ld PC, SAVE_PC // Get SAVE_PC. - | b >1 - |. sd L, SAVE_L // Set SAVE_L (on-trace resume/yield). - |.endif - |->vm_exit_interp: - |.if JIT - | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set. - | ld L, SAVE_L - | daddiu DISPATCH, JGL, -GG_DISP2G-32768 - | sd BASE, L->base - |1: - | bltz CRET1, >9 // Check for error from exit. - |. ld LFUNC:RB, FRAME_FUNC(BASE) - | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | dsll MULTRES, CRET1, 3 - | cleartp LFUNC:RB - | sd MULTRES, SAVE_MULTRES - | li TISNIL, LJ_TNIL - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | .FPU mtc1 TMP3, TOBIT - | ld TMP1, LFUNC:RB->pc - | sd r0, DISPATCH_GL(jit_base)(DISPATCH) - | ld KBASE, PC2PROTO(k)(TMP1) - | .FPU cvt.d.s TOBIT, TOBIT - | // Modified copy of ins_next which handles function header dispatch, too. - | lw INS, 0(PC) - | daddiu PC, PC, 4 - | // Assumes TISNIL == ~LJ_VMST_INTERP == -1 - | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) - | decode_OP8a TMP1, INS - | decode_OP8b TMP1 - | sltiu TMP2, TMP1, BC_FUNCF*8 - | daddu TMP0, DISPATCH, TMP1 - | decode_RD8a RD, INS - | ld AT, 0(TMP0) - | decode_RA8a RA, INS - | beqz TMP2, >2 - |. decode_RA8b RA - | jr AT - |. decode_RD8b RD - |2: - | sltiu TMP2, TMP1, (BC_FUNCC+2)*8 // Fast function? - | bnez TMP2, >3 - |. ld TMP1, FRAME_PC(BASE) - | // Check frame below fast function. - | andi TMP0, TMP1, FRAME_TYPE - | bnez TMP0, >3 // Trace stitching continuation? - |. nop - | // Otherwise set KBASE for Lua function below fast function. - | lw TMP2, -4(TMP1) - | decode_RA8a TMP0, TMP2 - | decode_RA8b TMP0 - | dsubu TMP1, BASE, TMP0 - | ld LFUNC:TMP2, -32(TMP1) - | cleartp LFUNC:TMP2 - | ld TMP1, LFUNC:TMP2->pc - | ld KBASE, PC2PROTO(k)(TMP1) - |3: - | daddiu RC, MULTRES, -8 - | jr AT - |. daddu RA, RA, BASE - | - |9: // Rethrow error from the right C frame. - | load_got lj_err_throw - | negu CARG2, CRET1 - | call_intern lj_err_throw // (lua_State *L, int errcode) - |. move CARG1, L - |.endif - | - |//----------------------------------------------------------------------- - |//-- Math helper functions ---------------------------------------------- - |//----------------------------------------------------------------------- - | - |// Hard-float round to integer. - |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1. - |.macro vm_round_hf, func - | lui TMP0, 0x4330 // Hiword of 2^52 (double). - | dsll TMP0, TMP0, 32 - | dmtc1 TMP0, f4 - | abs.d FRET2, FARG1 // |x| - | dmfc1 AT, FARG1 - | c.olt.d 0, FRET2, f4 - | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52 - | bc1f 0, >1 // Truncate only if |x| < 2^52. - |. sub.d FRET1, FRET1, f4 - | slt AT, AT, r0 - |.if "func" == "ceil" - | lui TMP0, 0xbff0 // Hiword of -1 (double). Preserves -0. - |.else - | lui TMP0, 0x3ff0 // Hiword of +1 (double). - |.endif - |.if "func" == "trunc" - | dsll TMP0, TMP0, 32 - | dmtc1 TMP0, f4 - | c.olt.d 0, FRET2, FRET1 // |x| < result? - | sub.d FRET2, FRET1, f4 - | movt.d FRET1, FRET2, 0 // If yes, subtract +1. - | neg.d FRET2, FRET1 - | jr ra - |. movn.d FRET1, FRET2, AT // Merge sign bit back in. - |.else - | neg.d FRET2, FRET1 - | dsll TMP0, TMP0, 32 - | dmtc1 TMP0, f4 - | movn.d FRET1, FRET2, AT // Merge sign bit back in. - |.if "func" == "ceil" - | c.olt.d 0, FRET1, FARG1 // x > result? - |.else - | c.olt.d 0, FARG1, FRET1 // x < result? - |.endif - | sub.d FRET2, FRET1, f4 // If yes, subtract +-1. - | jr ra - |. movt.d FRET1, FRET2, 0 - |.endif - |1: - | jr ra - |. mov.d FRET1, FARG1 - |.endmacro - | - |.macro vm_round, func - |.if FPU - | vm_round_hf, func - |.endif - |.endmacro - | - |->vm_floor: - | vm_round floor - |->vm_ceil: - | vm_round ceil - |->vm_trunc: - |.if JIT - | vm_round trunc - |.endif - | - |// Soft-float integer to number conversion. - |.macro sfi2d, ARG - |.if not FPU - | beqz ARG, >9 // Handle zero first. - |. sra TMP0, ARG, 31 - | xor TMP1, ARG, TMP0 - | dsubu TMP1, TMP1, TMP0 // Absolute value in TMP1. - | dclz ARG, TMP1 - | addiu ARG, ARG, -11 - | li AT, 0x3ff+63-11-1 - | dsllv TMP1, TMP1, ARG // Align mantissa left with leading 1. - | subu ARG, AT, ARG // Exponent - 1. - | ins ARG, TMP0, 11, 11 // Sign | Exponent. - | dsll ARG, ARG, 52 // Align left. - | jr ra - |. daddu ARG, ARG, TMP1 // Add mantissa, increment exponent. - |9: - | jr ra - |. nop - |.endif - |.endmacro - | - |// Input CARG1. Output: CARG1. Temporaries: AT, TMP0, TMP1. - |->vm_sfi2d_1: - | sfi2d CARG1 - | - |// Input CARG2. Output: CARG2. Temporaries: AT, TMP0, TMP1. - |->vm_sfi2d_2: - | sfi2d CARG2 - | - |// Soft-float comparison. Equivalent to c.eq.d. - |// Input: CARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1. - |->vm_sfcmpeq: - |.if not FPU - | dsll AT, CARG1, 1 - | dsll TMP0, CARG2, 1 - | or TMP1, AT, TMP0 - | beqz TMP1, >8 // Both args +-0: return 1. - |. lui TMP1, 0xffe0 - | dsll TMP1, TMP1, 32 - | sltu AT, TMP1, AT - | sltu TMP0, TMP1, TMP0 - | or TMP1, AT, TMP0 - | bnez TMP1, >9 // Either arg is NaN: return 0; - |. xor AT, CARG1, CARG2 - | jr ra - |. sltiu CRET1, AT, 1 // Same values: return 1. - |8: - | jr ra - |. li CRET1, 1 - |9: - | jr ra - |. li CRET1, 0 - |.endif - | - |// Soft-float comparison. Equivalent to c.ult.d and c.olt.d. - |// Input: CARG1, CARG2. Output: CRET1. Temporaries: AT, TMP0, TMP1, CRET2. - |->vm_sfcmpult: - |.if not FPU - | b >1 - |. li CRET2, 1 - |.endif - | - |->vm_sfcmpolt: - |.if not FPU - | li CRET2, 0 - |1: - | dsll AT, CARG1, 1 - | dsll TMP0, CARG2, 1 - | or TMP1, AT, TMP0 - | beqz TMP1, >8 // Both args +-0: return 0. - |. lui TMP1, 0xffe0 - | dsll TMP1, TMP1, 32 - | sltu AT, TMP1, AT - | sltu TMP0, TMP1, TMP0 - | or TMP1, AT, TMP0 - | bnez TMP1, >9 // Either arg is NaN: return 0 or 1; - |. and AT, CARG1, CARG2 - | bltz AT, >5 // Both args negative? - |. nop - | jr ra - |. slt CRET1, CARG1, CARG2 - |5: // Swap conditions if both operands are negative. - | jr ra - |. slt CRET1, CARG2, CARG1 - |8: - | jr ra - |. nop - |9: - | jr ra - |. move CRET1, CRET2 - |.endif - | - |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a. - |// Input: CARG1, CARG2, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1. - |->vm_sfcmpolex: - |.if not FPU - | dsll AT, CARG1, 1 - | dsll TMP0, CARG2, 1 - | or TMP1, AT, TMP0 - | beqz TMP1, >8 // Both args +-0: return 1. - |. lui TMP1, 0xffe0 - | dsll TMP1, TMP1, 32 - | sltu AT, TMP1, AT - | sltu TMP0, TMP1, TMP0 - | or TMP1, AT, TMP0 - | bnez TMP1, >9 // Either arg is NaN: return 0; - |. and AT, CARG1, CARG2 - | xor AT, AT, TMP3 - | bltz AT, >5 // Both args negative? - |. nop - | jr ra - |. slt CRET1, CARG2, CARG1 - |5: // Swap conditions if both operands are negative. - | jr ra - |. slt CRET1, CARG1, CARG2 - |8: - | jr ra - |. li CRET1, 1 - |9: - | jr ra - |. li CRET1, 0 - |.endif - | - |//----------------------------------------------------------------------- - |//-- Miscellaneous functions -------------------------------------------- - |//----------------------------------------------------------------------- - | - |//----------------------------------------------------------------------- - |//-- FFI helper functions ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |// Handler for callback functions. Callback slot number in r1, g in r2. - |->vm_ffi_callback: - |.if FFI - |.type CTSTATE, CTState, PC - | saveregs - | ld CTSTATE, GL:r2->ctype_state - | daddiu DISPATCH, r2, GG_G2DISP - | load_got lj_ccallback_enter - | sw r1, CTSTATE->cb.slot - | sd CARG1, CTSTATE->cb.gpr[0] - | .FPU sdc1 FARG1, CTSTATE->cb.fpr[0] - | sd CARG2, CTSTATE->cb.gpr[1] - | .FPU sdc1 FARG2, CTSTATE->cb.fpr[1] - | sd CARG3, CTSTATE->cb.gpr[2] - | .FPU sdc1 FARG3, CTSTATE->cb.fpr[2] - | sd CARG4, CTSTATE->cb.gpr[3] - | .FPU sdc1 FARG4, CTSTATE->cb.fpr[3] - | sd CARG5, CTSTATE->cb.gpr[4] - | .FPU sdc1 FARG5, CTSTATE->cb.fpr[4] - | sd CARG6, CTSTATE->cb.gpr[5] - | .FPU sdc1 FARG6, CTSTATE->cb.fpr[5] - | sd CARG7, CTSTATE->cb.gpr[6] - | .FPU sdc1 FARG7, CTSTATE->cb.fpr[6] - | sd CARG8, CTSTATE->cb.gpr[7] - | .FPU sdc1 FARG8, CTSTATE->cb.fpr[7] - | daddiu TMP0, sp, CFRAME_SPACE - | sd TMP0, CTSTATE->cb.stack - | sd r0, SAVE_PC // Any value outside of bytecode is ok. - | move CARG2, sp - | call_intern lj_ccallback_enter // (CTState *cts, void *cf) - |. move CARG1, CTSTATE - | // Returns lua_State *. - | ld BASE, L:CRET1->base - | ld RC, L:CRET1->top - | move L, CRET1 - | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | ld LFUNC:RB, FRAME_FUNC(BASE) - | .FPU mtc1 TMP3, TOBIT - | li TISNIL, LJ_TNIL - | li TISNUM, LJ_TISNUM - | li_vmstate INTERP - | subu RC, RC, BASE - | cleartp LFUNC:RB - | st_vmstate - | .FPU cvt.d.s TOBIT, TOBIT - | ins_callt - |.endif - | - |->cont_ffi_callback: // Return from FFI callback. - |.if FFI - | load_got lj_ccallback_leave - | ld CTSTATE, DISPATCH_GL(ctype_state)(DISPATCH) - | sd BASE, L->base - | sd RB, L->top - | sd L, CTSTATE->L - | move CARG2, RA - | call_intern lj_ccallback_leave // (CTState *cts, TValue *o) - |. move CARG1, CTSTATE - | .FPU ldc1 FRET1, CTSTATE->cb.fpr[0] - | ld CRET1, CTSTATE->cb.gpr[0] - | .FPU ldc1 FRET2, CTSTATE->cb.fpr[1] - | b ->vm_leave_unw - |. ld CRET2, CTSTATE->cb.gpr[1] - |.endif - | - |->vm_ffi_call: // Call C function via FFI. - | // Caveat: needs special frame unwinding, see below. - |.if FFI - | .type CCSTATE, CCallState, CARG1 - | lw TMP1, CCSTATE->spadj - | lbu CARG2, CCSTATE->nsp - | move TMP2, sp - | dsubu sp, sp, TMP1 - | sd ra, -8(TMP2) - | sll CARG2, CARG2, 3 - | sd r16, -16(TMP2) - | sd CCSTATE, -24(TMP2) - | move r16, TMP2 - | daddiu TMP1, CCSTATE, offsetof(CCallState, stack) - | move TMP2, sp - | beqz CARG2, >2 - |. daddu TMP3, TMP1, CARG2 - |1: - | ld TMP0, 0(TMP1) - | daddiu TMP1, TMP1, 8 - | sltu AT, TMP1, TMP3 - | sd TMP0, 0(TMP2) - | bnez AT, <1 - |. daddiu TMP2, TMP2, 8 - |2: - | ld CFUNCADDR, CCSTATE->func - | .FPU ldc1 FARG1, CCSTATE->gpr[0] - | ld CARG2, CCSTATE->gpr[1] - | .FPU ldc1 FARG2, CCSTATE->gpr[1] - | ld CARG3, CCSTATE->gpr[2] - | .FPU ldc1 FARG3, CCSTATE->gpr[2] - | ld CARG4, CCSTATE->gpr[3] - | .FPU ldc1 FARG4, CCSTATE->gpr[3] - | ld CARG5, CCSTATE->gpr[4] - | .FPU ldc1 FARG5, CCSTATE->gpr[4] - | ld CARG6, CCSTATE->gpr[5] - | .FPU ldc1 FARG6, CCSTATE->gpr[5] - | ld CARG7, CCSTATE->gpr[6] - | .FPU ldc1 FARG7, CCSTATE->gpr[6] - | ld CARG8, CCSTATE->gpr[7] - | .FPU ldc1 FARG8, CCSTATE->gpr[7] - | jalr CFUNCADDR - |. ld CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. - | ld CCSTATE:TMP1, -24(r16) - | ld TMP2, -16(r16) - | ld ra, -8(r16) - | sd CRET1, CCSTATE:TMP1->gpr[0] - | sd CRET2, CCSTATE:TMP1->gpr[1] - |.if FPU - | sdc1 FRET1, CCSTATE:TMP1->fpr[0] - | sdc1 FRET2, CCSTATE:TMP1->fpr[1] - |.else - | sd CARG1, CCSTATE:TMP1->gpr[2] // 2nd FP struct field for soft-float. - |.endif - | move sp, r16 - | jr ra - |. move r16, TMP2 - |.endif - |// Note: vm_ffi_call must be the last function in this object file! - | - |//----------------------------------------------------------------------- -} - -/* Generate the code for a single instruction. */ -static void build_ins(BuildCtx *ctx, BCOp op, int defop) -{ - int vk = 0; - |=>defop: - - switch (op) { - - /* -- Comparison ops ---------------------------------------------------- */ - - /* Remember: all ops branch for a true comparison, fall through otherwise. */ - - case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: - | // RA = src1*8, RD = src2*8, JMP with RD = target - |.macro bc_comp, FRA, FRD, ARGRA, ARGRD, movop, fmovop, fcomp, sfcomp - | daddu RA, BASE, RA - | daddu RD, BASE, RD - | ld ARGRA, 0(RA) - | ld ARGRD, 0(RD) - | lhu TMP2, OFS_RD(PC) - | gettp CARG3, ARGRA - | gettp CARG4, ARGRD - | bne CARG3, TISNUM, >2 - |. daddiu PC, PC, 4 - | bne CARG4, TISNUM, >5 - |. decode_RD4b TMP2 - | sextw ARGRA, ARGRA - | sextw ARGRD, ARGRD - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | slt AT, CARG1, CARG2 - | addu TMP2, TMP2, TMP3 - | movop TMP2, r0, AT - |1: - | daddu PC, PC, TMP2 - | ins_next - | - |2: // RA is not an integer. - | sltiu AT, CARG3, LJ_TISNUM - | beqz AT, ->vmeta_comp - |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | sltiu AT, CARG4, LJ_TISNUM - | beqz AT, >4 - |. decode_RD4b TMP2 - |.if FPU - | ldc1 FRA, 0(RA) - | ldc1 FRD, 0(RD) - |.endif - |3: // RA and RD are both numbers. - |.if FPU - | fcomp f20, f22 - | addu TMP2, TMP2, TMP3 - | b <1 - |. fmovop TMP2, r0 - |.else - | bal sfcomp - |. addu TMP2, TMP2, TMP3 - | b <1 - |. movop TMP2, r0, CRET1 - |.endif - | - |4: // RA is a number, RD is not a number. - | bne CARG4, TISNUM, ->vmeta_comp - | // RA is a number, RD is an integer. Convert RD to a number. - |.if FPU - |. lwc1 FRD, LO(RD) - | ldc1 FRA, 0(RA) - | b <3 - |. cvt.d.w FRD, FRD - |.else - |.if "ARGRD" == "CARG1" - |. sextw CARG1, CARG1 - | bal ->vm_sfi2d_1 - |. nop - |.else - |. sextw CARG2, CARG2 - | bal ->vm_sfi2d_2 - |. nop - |.endif - | b <3 - |. nop - |.endif - | - |5: // RA is an integer, RD is not an integer - | sltiu AT, CARG4, LJ_TISNUM - | beqz AT, ->vmeta_comp - |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | // RA is an integer, RD is a number. Convert RA to a number. - |.if FPU - | lwc1 FRA, LO(RA) - | ldc1 FRD, 0(RD) - | b <3 - | cvt.d.w FRA, FRA - |.else - |.if "ARGRA" == "CARG1" - | bal ->vm_sfi2d_1 - |. sextw CARG1, CARG1 - |.else - | bal ->vm_sfi2d_2 - |. sextw CARG2, CARG2 - |.endif - | b <3 - |. nop - |.endif - |.endmacro - | - if (op == BC_ISLT) { - | bc_comp f20, f22, CARG1, CARG2, movz, movf, c.olt.d, ->vm_sfcmpolt - } else if (op == BC_ISGE) { - | bc_comp f20, f22, CARG1, CARG2, movn, movt, c.olt.d, ->vm_sfcmpolt - } else if (op == BC_ISLE) { - | bc_comp f22, f20, CARG2, CARG1, movn, movt, c.ult.d, ->vm_sfcmpult - } else { - | bc_comp f22, f20, CARG2, CARG1, movz, movf, c.ult.d, ->vm_sfcmpult - } - break; - - case BC_ISEQV: case BC_ISNEV: - vk = op == BC_ISEQV; - | // RA = src1*8, RD = src2*8, JMP with RD = target - | daddu RA, BASE, RA - | daddiu PC, PC, 4 - | daddu RD, BASE, RD - | ld CARG1, 0(RA) - | lhu TMP2, -4+OFS_RD(PC) - | ld CARG2, 0(RD) - | gettp CARG3, CARG1 - | gettp CARG4, CARG2 - | sltu AT, TISNUM, CARG3 - | sltu TMP1, TISNUM, CARG4 - | or AT, AT, TMP1 - if (vk) { - | beqz AT, ->BC_ISEQN_Z - } else { - | beqz AT, ->BC_ISNEN_Z - } - | // Either or both types are not numbers. - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - |.if FFI - |. li AT, LJ_TCDATA - | beq CARG3, AT, ->vmeta_equal_cd - |.endif - | decode_RD4b TMP2 - |.if FFI - | beq CARG4, AT, ->vmeta_equal_cd - |. nop - |.endif - | bne CARG1, CARG2, >2 - |. addu TMP2, TMP2, TMP3 - | // Tag and value are equal. - if (vk) { - |->BC_ISEQV_Z: - | daddu PC, PC, TMP2 - } - |1: - | ins_next - | - |2: // Check if the tags are the same and it's a table or userdata. - | xor AT, CARG3, CARG4 // Same type? - | sltiu TMP0, CARG3, LJ_TISTABUD+1 // Table or userdata? - | movn TMP0, r0, AT - if (vk) { - | beqz TMP0, <1 - } else { - | beqz TMP0, ->BC_ISEQV_Z // Reuse code from opposite instruction. - } - | // Different tables or userdatas. Need to check __eq metamethod. - | // Field metatable must be at same offset for GCtab and GCudata! - |. cleartp TAB:TMP1, CARG1 - | ld TAB:TMP3, TAB:TMP1->metatable - if (vk) { - | beqz TAB:TMP3, <1 // No metatable? - |. nop - | lbu TMP3, TAB:TMP3->nomm - | andi TMP3, TMP3, 1<1 // Or 'no __eq' flag set? - } else { - | beqz TAB:TMP3,->BC_ISEQV_Z // No metatable? - |. nop - | lbu TMP3, TAB:TMP3->nomm - | andi TMP3, TMP3, 1<BC_ISEQV_Z // Or 'no __eq' flag set? - } - |. nop - | b ->vmeta_equal // Handle __eq metamethod. - |. li TMP0, 1-vk // ne = 0 or 1. - break; - - case BC_ISEQS: case BC_ISNES: - vk = op == BC_ISEQS; - | // RA = src*8, RD = str_const*8 (~), JMP with RD = target - | daddu RA, BASE, RA - | daddiu PC, PC, 4 - | ld CARG1, 0(RA) - | dsubu RD, KBASE, RD - | lhu TMP2, -4+OFS_RD(PC) - | ld CARG2, -8(RD) // KBASE-8-str_const*8 - |.if FFI - | gettp TMP0, CARG1 - | li AT, LJ_TCDATA - |.endif - | li TMP1, LJ_TSTR - | decode_RD4b TMP2 - |.if FFI - | beq TMP0, AT, ->vmeta_equal_cd - |.endif - |. settp CARG2, TMP1 - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | xor TMP1, CARG1, CARG2 - | addu TMP2, TMP2, TMP3 - if (vk) { - | movn TMP2, r0, TMP1 - } else { - | movz TMP2, r0, TMP1 - } - | daddu PC, PC, TMP2 - | ins_next - break; - - case BC_ISEQN: case BC_ISNEN: - vk = op == BC_ISEQN; - | // RA = src*8, RD = num_const*8, JMP with RD = target - | daddu RA, BASE, RA - | daddu RD, KBASE, RD - | ld CARG1, 0(RA) - | ld CARG2, 0(RD) - | lhu TMP2, OFS_RD(PC) - | gettp CARG3, CARG1 - | gettp CARG4, CARG2 - | daddiu PC, PC, 4 - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - if (vk) { - |->BC_ISEQN_Z: - } else { - |->BC_ISNEN_Z: - } - | bne CARG3, TISNUM, >3 - |. decode_RD4b TMP2 - | bne CARG4, TISNUM, >6 - |. addu TMP2, TMP2, TMP3 - | xor AT, CARG1, CARG2 - if (vk) { - | movn TMP2, r0, AT - |1: - | daddu PC, PC, TMP2 - |2: - } else { - | movz TMP2, r0, AT - |1: - |2: - | daddu PC, PC, TMP2 - } - | ins_next - | - |3: // RA is not an integer. - | sltu AT, CARG3, TISNUM - |.if FFI - | beqz AT, >8 - |.else - | beqz AT, <2 - |.endif - |. addu TMP2, TMP2, TMP3 - | sltu AT, CARG4, TISNUM - |.if FPU - | ldc1 f20, 0(RA) - | ldc1 f22, 0(RD) - |.endif - | beqz AT, >5 - |. nop - |4: // RA and RD are both numbers. - |.if FPU - | c.eq.d f20, f22 - | b <1 - if (vk) { - |. movf TMP2, r0 - } else { - |. movt TMP2, r0 - } - |.else - | bal ->vm_sfcmpeq - |. nop - | b <1 - if (vk) { - |. movz TMP2, r0, CRET1 - } else { - |. movn TMP2, r0, CRET1 - } - |.endif - | - |5: // RA is a number, RD is not a number. - |.if FFI - | bne CARG4, TISNUM, >9 - |.else - | bne CARG4, TISNUM, <2 - |.endif - | // RA is a number, RD is an integer. Convert RD to a number. - |.if FPU - |. lwc1 f22, LO(RD) - | b <4 - |. cvt.d.w f22, f22 - |.else - |. sextw CARG2, CARG2 - | bal ->vm_sfi2d_2 - |. nop - | b <4 - |. nop - |.endif - | - |6: // RA is an integer, RD is not an integer - | sltu AT, CARG4, TISNUM - |.if FFI - | beqz AT, >9 - |.else - | beqz AT, <2 - |.endif - | // RA is an integer, RD is a number. Convert RA to a number. - |.if FPU - |. lwc1 f20, LO(RA) - | ldc1 f22, 0(RD) - | b <4 - | cvt.d.w f20, f20 - |.else - |. sextw CARG1, CARG1 - | bal ->vm_sfi2d_1 - |. nop - | b <4 - |. nop - |.endif - | - |.if FFI - |8: - | li AT, LJ_TCDATA - | bne CARG3, AT, <2 - |. nop - | b ->vmeta_equal_cd - |. nop - |9: - | li AT, LJ_TCDATA - | bne CARG4, AT, <2 - |. nop - | b ->vmeta_equal_cd - |. nop - |.endif - break; - - case BC_ISEQP: case BC_ISNEP: - vk = op == BC_ISEQP; - | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target - | daddu RA, BASE, RA - | srl TMP1, RD, 3 - | ld TMP0, 0(RA) - | lhu TMP2, OFS_RD(PC) - | not TMP1, TMP1 - | gettp TMP0, TMP0 - | daddiu PC, PC, 4 - |.if FFI - | li AT, LJ_TCDATA - | beq TMP0, AT, ->vmeta_equal_cd - |.endif - |. xor TMP0, TMP0, TMP1 - | decode_RD4b TMP2 - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | addu TMP2, TMP2, TMP3 - if (vk) { - | movn TMP2, r0, TMP0 - } else { - | movz TMP2, r0, TMP0 - } - | daddu PC, PC, TMP2 - | ins_next - break; - - /* -- Unary test and copy ops ------------------------------------------- */ - - case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: - | // RA = dst*8 or unused, RD = src*8, JMP with RD = target - | daddu RD, BASE, RD - | lhu TMP2, OFS_RD(PC) - | ld TMP0, 0(RD) - | daddiu PC, PC, 4 - | gettp TMP0, TMP0 - | sltiu TMP0, TMP0, LJ_TISTRUECOND - if (op == BC_IST || op == BC_ISF) { - | decode_RD4b TMP2 - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | addu TMP2, TMP2, TMP3 - if (op == BC_IST) { - | movz TMP2, r0, TMP0 - } else { - | movn TMP2, r0, TMP0 - } - | daddu PC, PC, TMP2 - } else { - | ld CRET1, 0(RD) - if (op == BC_ISTC) { - | beqz TMP0, >1 - } else { - | bnez TMP0, >1 - } - |. daddu RA, BASE, RA - | decode_RD4b TMP2 - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | addu TMP2, TMP2, TMP3 - | sd CRET1, 0(RA) - | daddu PC, PC, TMP2 - |1: - } - | ins_next - break; - - case BC_ISTYPE: - | // RA = src*8, RD = -type*8 - | daddu TMP2, BASE, RA - | srl TMP1, RD, 3 - | ld TMP0, 0(TMP2) - | ins_next1 - | gettp TMP0, TMP0 - | daddu AT, TMP0, TMP1 - | bnez AT, ->vmeta_istype - |. ins_next2 - break; - case BC_ISNUM: - | // RA = src*8, RD = -(TISNUM-1)*8 - | daddu TMP2, BASE, RA - | ld TMP0, 0(TMP2) - | ins_next1 - | checknum TMP0, ->vmeta_istype - |. ins_next2 - break; - - /* -- Unary ops --------------------------------------------------------- */ - - case BC_MOV: - | // RA = dst*8, RD = src*8 - | daddu RD, BASE, RD - | daddu RA, BASE, RA - | ld CRET1, 0(RD) - | ins_next1 - | sd CRET1, 0(RA) - | ins_next2 - break; - case BC_NOT: - | // RA = dst*8, RD = src*8 - | daddu RD, BASE, RD - | daddu RA, BASE, RA - | ld TMP0, 0(RD) - | li AT, LJ_TTRUE - | gettp TMP0, TMP0 - | sltu TMP0, AT, TMP0 - | addiu TMP0, TMP0, 1 - | dsll TMP0, TMP0, 47 - | not TMP0, TMP0 - | ins_next1 - | sd TMP0, 0(RA) - | ins_next2 - break; - case BC_UNM: - | // RA = dst*8, RD = src*8 - | daddu RB, BASE, RD - | ld CARG1, 0(RB) - | daddu RA, BASE, RA - | gettp CARG3, CARG1 - | bne CARG3, TISNUM, >2 - |. lui TMP1, 0x8000 - | sextw CARG1, CARG1 - | beq CARG1, TMP1, ->vmeta_unm // Meta handler deals with -2^31. - |. negu CARG1, CARG1 - | zextw CARG1, CARG1 - | settp CARG1, TISNUM - |1: - | ins_next1 - | sd CARG1, 0(RA) - | ins_next2 - |2: - | sltiu AT, CARG3, LJ_TISNUM - | beqz AT, ->vmeta_unm - |. dsll TMP1, TMP1, 32 - | b <1 - |. xor CARG1, CARG1, TMP1 - break; - case BC_LEN: - | // RA = dst*8, RD = src*8 - | daddu CARG2, BASE, RD - | daddu RA, BASE, RA - | ld TMP0, 0(CARG2) - | gettp TMP1, TMP0 - | daddiu AT, TMP1, -LJ_TSTR - | bnez AT, >2 - |. cleartp STR:CARG1, TMP0 - | lw CRET1, STR:CARG1->len - |1: - | settp CRET1, TISNUM - | ins_next1 - | sd CRET1, 0(RA) - | ins_next2 - |2: - | daddiu AT, TMP1, -LJ_TTAB - | bnez AT, ->vmeta_len - |. nop -#if LJ_52 - | ld TAB:TMP2, TAB:CARG1->metatable - | bnez TAB:TMP2, >9 - |. nop - |3: -#endif - |->BC_LEN_Z: - | load_got lj_tab_len - | call_intern lj_tab_len // (GCtab *t) - |. nop - | // Returns uint32_t (but less than 2^31). - | b <1 - |. nop -#if LJ_52 - |9: - | lbu TMP0, TAB:TMP2->nomm - | andi TMP0, TMP0, 1<vmeta_len - |. nop -#endif - break; - - /* -- Binary ops -------------------------------------------------------- */ - - |.macro fpmod, a, b, c - | bal ->vm_floor // floor(b/c) - |. div.d FARG1, b, c - | mul.d a, FRET1, c - | sub.d a, b, a // b - floor(b/c)*c - |.endmacro - - |.macro sfpmod - | daddiu sp, sp, -16 - | - | load_got __divdf3 - | sd CARG1, 0(sp) - | call_extern - |. sd CARG2, 8(sp) - | - | load_got floor - | call_extern - |. move CARG1, CRET1 - | - | load_got __muldf3 - | move CARG1, CRET1 - | call_extern - |. ld CARG2, 8(sp) - | - | load_got __subdf3 - | ld CARG1, 0(sp) - | call_extern - |. move CARG2, CRET1 - | - | daddiu sp, sp, 16 - |.endmacro - - |.macro ins_arithpre, label - ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); - | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 - ||switch (vk) { - ||case 0: - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | // RA = dst*8, RB = src1*8, RC = num_const*8 - | daddu RB, BASE, RB - |.if "label" ~= "none" - | b label - |.endif - |. daddu RC, KBASE, RC - || break; - ||case 1: - | decode_RB8a RC, INS - | decode_RB8b RC - | decode_RDtoRC8 RB, RD - | // RA = dst*8, RB = num_const*8, RC = src1*8 - | daddu RC, BASE, RC - |.if "label" ~= "none" - | b label - |.endif - |. daddu RB, KBASE, RB - || break; - ||default: - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | // RA = dst*8, RB = src1*8, RC = src2*8 - | daddu RB, BASE, RB - |.if "label" ~= "none" - | b label - |.endif - |. daddu RC, BASE, RC - || break; - ||} - |.endmacro - | - |.macro ins_arith, intins, fpins, fpcall, label - | ins_arithpre none - | - |.if "label" ~= "none" - |label: - |.endif - | - |// Used in 5. - | ld CARG1, 0(RB) - | ld CARG2, 0(RC) - | gettp TMP0, CARG1 - | gettp TMP1, CARG2 - | - |.if "intins" ~= "div" - | - | // Check for two integers. - | sextw CARG3, CARG1 - | bne TMP0, TISNUM, >5 - |. sextw CARG4, CARG2 - | bne TMP1, TISNUM, >5 - | - |.if "intins" == "addu" - |. intins CRET1, CARG3, CARG4 - | xor TMP1, CRET1, CARG3 // ((y^a) & (y^b)) < 0: overflow. - | xor TMP2, CRET1, CARG4 - | and TMP1, TMP1, TMP2 - | bltz TMP1, ->vmeta_arith - |. daddu RA, BASE, RA - |.elif "intins" == "subu" - |. intins CRET1, CARG3, CARG4 - | xor TMP1, CRET1, CARG3 // ((y^a) & (a^b)) < 0: overflow. - | xor TMP2, CARG3, CARG4 - | and TMP1, TMP1, TMP2 - | bltz TMP1, ->vmeta_arith - |. daddu RA, BASE, RA - |.elif "intins" == "mult" - |. intins CARG3, CARG4 - | mflo CRET1 - | mfhi TMP2 - | sra TMP1, CRET1, 31 - | bne TMP1, TMP2, ->vmeta_arith - |. daddu RA, BASE, RA - |.else - |. load_got lj_vm_modi - | beqz CARG4, ->vmeta_arith - |. daddu RA, BASE, RA - | move CARG1, CARG3 - | call_extern - |. move CARG2, CARG4 - |.endif - | - | zextw CRET1, CRET1 - | settp CRET1, TISNUM - | ins_next1 - | sd CRET1, 0(RA) - |3: - | ins_next2 - | - |.endif - | - |5: // Check for two numbers. - | .FPU ldc1 f20, 0(RB) - | sltu AT, TMP0, TISNUM - | sltu TMP0, TMP1, TISNUM - | .FPU ldc1 f22, 0(RC) - | and AT, AT, TMP0 - | beqz AT, ->vmeta_arith - |. daddu RA, BASE, RA - | - |.if FPU - | fpins FRET1, f20, f22 - |.elif "fpcall" == "sfpmod" - | sfpmod - |.else - | load_got fpcall - | call_extern - |. nop - |.endif - | - | ins_next1 - |.if "intins" ~= "div" - | b <3 - |.endif - |.if FPU - |. sdc1 FRET1, 0(RA) - |.else - |. sd CRET1, 0(RA) - |.endif - |.if "intins" == "div" - | ins_next2 - |.endif - | - |.endmacro - - case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: - | ins_arith addu, add.d, __adddf3, none - break; - case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: - | ins_arith subu, sub.d, __subdf3, none - break; - case BC_MULVN: case BC_MULNV: case BC_MULVV: - | ins_arith mult, mul.d, __muldf3, none - break; - case BC_DIVVN: - | ins_arith div, div.d, __divdf3, ->BC_DIVVN_Z - break; - case BC_DIVNV: case BC_DIVVV: - | ins_arithpre ->BC_DIVVN_Z - break; - case BC_MODVN: - | ins_arith modi, fpmod, sfpmod, ->BC_MODVN_Z - break; - case BC_MODNV: case BC_MODVV: - | ins_arithpre ->BC_MODVN_Z - break; - case BC_POW: - | ins_arithpre none - | ld CARG1, 0(RB) - | ld CARG2, 0(RC) - | gettp TMP0, CARG1 - | gettp TMP1, CARG2 - | sltiu TMP0, TMP0, LJ_TISNUM - | sltiu TMP1, TMP1, LJ_TISNUM - | and AT, TMP0, TMP1 - | load_got pow - | beqz AT, ->vmeta_arith - |. daddu RA, BASE, RA - |.if FPU - | ldc1 FARG1, 0(RB) - | ldc1 FARG2, 0(RC) - |.endif - | call_extern - |. nop - | ins_next1 - |.if FPU - | sdc1 FRET1, 0(RA) - |.else - | sd CRET1, 0(RA) - |.endif - | ins_next2 - break; - - case BC_CAT: - | // RA = dst*8, RB = src_start*8, RC = src_end*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | dsubu CARG3, RC, RB - | sd BASE, L->base - | daddu CARG2, BASE, RC - | move MULTRES, RB - |->BC_CAT_Z: - | load_got lj_meta_cat - | srl CARG3, CARG3, 3 - | sd PC, SAVE_PC - | call_intern lj_meta_cat // (lua_State *L, TValue *top, int left) - |. move CARG1, L - | // Returns NULL (finished) or TValue * (metamethod). - | bnez CRET1, ->vmeta_binop - |. ld BASE, L->base - | daddu RB, BASE, MULTRES - | ld CRET1, 0(RB) - | daddu RA, BASE, RA - | ins_next1 - | sd CRET1, 0(RA) - | ins_next2 - break; - - /* -- Constant ops ------------------------------------------------------ */ - - case BC_KSTR: - | // RA = dst*8, RD = str_const*8 (~) - | dsubu TMP1, KBASE, RD - | ins_next1 - | li TMP2, LJ_TSTR - | ld TMP0, -8(TMP1) // KBASE-8-str_const*8 - | daddu RA, BASE, RA - | settp TMP0, TMP2 - | sd TMP0, 0(RA) - | ins_next2 - break; - case BC_KCDATA: - |.if FFI - | // RA = dst*8, RD = cdata_const*8 (~) - | dsubu TMP1, KBASE, RD - | ins_next1 - | ld TMP0, -8(TMP1) // KBASE-8-cdata_const*8 - | li TMP2, LJ_TCDATA - | daddu RA, BASE, RA - | settp TMP0, TMP2 - | sd TMP0, 0(RA) - | ins_next2 - |.endif - break; - case BC_KSHORT: - | // RA = dst*8, RD = int16_literal*8 - | sra RD, INS, 16 - | daddu RA, BASE, RA - | zextw RD, RD - | ins_next1 - | settp RD, TISNUM - | sd RD, 0(RA) - | ins_next2 - break; - case BC_KNUM: - | // RA = dst*8, RD = num_const*8 - | daddu RD, KBASE, RD - | daddu RA, BASE, RA - | ld CRET1, 0(RD) - | ins_next1 - | sd CRET1, 0(RA) - | ins_next2 - break; - case BC_KPRI: - | // RA = dst*8, RD = primitive_type*8 (~) - | daddu RA, BASE, RA - | dsll TMP0, RD, 44 - | not TMP0, TMP0 - | ins_next1 - | sd TMP0, 0(RA) - | ins_next2 - break; - case BC_KNIL: - | // RA = base*8, RD = end*8 - | daddu RA, BASE, RA - | sd TISNIL, 0(RA) - | daddiu RA, RA, 8 - | daddu RD, BASE, RD - |1: - | sd TISNIL, 0(RA) - | slt AT, RA, RD - | bnez AT, <1 - |. daddiu RA, RA, 8 - | ins_next_ - break; - - /* -- Upvalue and function ops ------------------------------------------ */ - - case BC_UGET: - | // RA = dst*8, RD = uvnum*8 - | ld LFUNC:RB, FRAME_FUNC(BASE) - | daddu RA, BASE, RA - | cleartp LFUNC:RB - | daddu RD, RD, LFUNC:RB - | ld UPVAL:RB, LFUNC:RD->uvptr - | ins_next1 - | ld TMP1, UPVAL:RB->v - | ld CRET1, 0(TMP1) - | sd CRET1, 0(RA) - | ins_next2 - break; - case BC_USETV: - | // RA = uvnum*8, RD = src*8 - | ld LFUNC:RB, FRAME_FUNC(BASE) - | daddu RD, BASE, RD - | cleartp LFUNC:RB - | daddu RA, RA, LFUNC:RB - | ld UPVAL:RB, LFUNC:RA->uvptr - | ld CRET1, 0(RD) - | lbu TMP3, UPVAL:RB->marked - | ld CARG2, UPVAL:RB->v - | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv) - | lbu TMP0, UPVAL:RB->closed - | gettp TMP2, RD - | sd CRET1, 0(CARG2) - | li AT, LJ_GC_BLACK|1 - | or TMP3, TMP3, TMP0 - | beq TMP3, AT, >2 // Upvalue is closed and black? - |. daddiu TMP2, TMP2, -(LJ_TNUMX+1) - |1: - | ins_next - | - |2: // Check if new value is collectable. - | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1) - | beqz AT, <1 // tvisgcv(v) - |. cleartp GCOBJ:TMP1, RB - | lbu TMP3, GCOBJ:TMP1->gch.marked - | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v) - | beqz TMP3, <1 - |. load_got lj_gc_barrieruv - | // Crossed a write barrier. Move the barrier forward. - | call_intern lj_gc_barrieruv // (global_State *g, TValue *tv) - |. daddiu CARG1, DISPATCH, GG_DISP2G - | b <1 - |. nop - break; - case BC_USETS: - | // RA = uvnum*8, RD = str_const*8 (~) - | ld LFUNC:RB, FRAME_FUNC(BASE) - | dsubu TMP1, KBASE, RD - | cleartp LFUNC:RB - | daddu RA, RA, LFUNC:RB - | ld UPVAL:RB, LFUNC:RA->uvptr - | ld STR:TMP1, -8(TMP1) // KBASE-8-str_const*8 - | lbu TMP2, UPVAL:RB->marked - | ld CARG2, UPVAL:RB->v - | lbu TMP3, STR:TMP1->marked - | andi AT, TMP2, LJ_GC_BLACK // isblack(uv) - | lbu TMP2, UPVAL:RB->closed - | li TMP0, LJ_TSTR - | settp TMP1, TMP0 - | bnez AT, >2 - |. sd TMP1, 0(CARG2) - |1: - | ins_next - | - |2: // Check if string is white and ensure upvalue is closed. - | beqz TMP2, <1 - |. andi AT, TMP3, LJ_GC_WHITES // iswhite(str) - | beqz AT, <1 - |. load_got lj_gc_barrieruv - | // Crossed a write barrier. Move the barrier forward. - | call_intern lj_gc_barrieruv // (global_State *g, TValue *tv) - |. daddiu CARG1, DISPATCH, GG_DISP2G - | b <1 - |. nop - break; - case BC_USETN: - | // RA = uvnum*8, RD = num_const*8 - | ld LFUNC:RB, FRAME_FUNC(BASE) - | daddu RD, KBASE, RD - | cleartp LFUNC:RB - | daddu RA, RA, LFUNC:RB - | ld UPVAL:RB, LFUNC:RA->uvptr - | ld CRET1, 0(RD) - | ld TMP1, UPVAL:RB->v - | ins_next1 - | sd CRET1, 0(TMP1) - | ins_next2 - break; - case BC_USETP: - | // RA = uvnum*8, RD = primitive_type*8 (~) - | ld LFUNC:RB, FRAME_FUNC(BASE) - | dsll TMP0, RD, 44 - | cleartp LFUNC:RB - | daddu RA, RA, LFUNC:RB - | not TMP0, TMP0 - | ld UPVAL:RB, LFUNC:RA->uvptr - | ins_next1 - | ld TMP1, UPVAL:RB->v - | sd TMP0, 0(TMP1) - | ins_next2 - break; - - case BC_UCLO: - | // RA = level*8, RD = target - | ld TMP2, L->openupval - | branch_RD // Do this first since RD is not saved. - | load_got lj_func_closeuv - | sd BASE, L->base - | beqz TMP2, >1 - |. move CARG1, L - | call_intern lj_func_closeuv // (lua_State *L, TValue *level) - |. daddu CARG2, BASE, RA - | ld BASE, L->base - |1: - | ins_next - break; - - case BC_FNEW: - | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype) - | load_got lj_func_newL_gc - | dsubu TMP1, KBASE, RD - | ld CARG3, FRAME_FUNC(BASE) - | ld CARG2, -8(TMP1) // KBASE-8-tab_const*8 - | sd BASE, L->base - | sd PC, SAVE_PC - | cleartp CARG3 - | // (lua_State *L, GCproto *pt, GCfuncL *parent) - | call_intern lj_func_newL_gc - |. move CARG1, L - | // Returns GCfuncL *. - | li TMP0, LJ_TFUNC - | ld BASE, L->base - | ins_next1 - | settp CRET1, TMP0 - | daddu RA, BASE, RA - | sd CRET1, 0(RA) - | ins_next2 - break; - - /* -- Table ops --------------------------------------------------------- */ - - case BC_TNEW: - case BC_TDUP: - | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~) - | ld TMP0, DISPATCH_GL(gc.total)(DISPATCH) - | ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) - | sd BASE, L->base - | sd PC, SAVE_PC - | sltu AT, TMP0, TMP1 - | beqz AT, >5 - |1: - if (op == BC_TNEW) { - | load_got lj_tab_new - | srl CARG2, RD, 3 - | andi CARG2, CARG2, 0x7ff - | li TMP0, 0x801 - | addiu AT, CARG2, -0x7ff - | srl CARG3, RD, 14 - | movz CARG2, TMP0, AT - | // (lua_State *L, int32_t asize, uint32_t hbits) - | call_intern lj_tab_new - |. move CARG1, L - | // Returns Table *. - } else { - | load_got lj_tab_dup - | dsubu TMP1, KBASE, RD - | move CARG1, L - | call_intern lj_tab_dup // (lua_State *L, Table *kt) - |. ld CARG2, -8(TMP1) // KBASE-8-str_const*8 - | // Returns Table *. - } - | li TMP0, LJ_TTAB - | ld BASE, L->base - | ins_next1 - | daddu RA, BASE, RA - | settp CRET1, TMP0 - | sd CRET1, 0(RA) - | ins_next2 - |5: - | load_got lj_gc_step_fixtop - | move MULTRES, RD - | call_intern lj_gc_step_fixtop // (lua_State *L) - |. move CARG1, L - | b <1 - |. move RD, MULTRES - break; - - case BC_GGET: - | // RA = dst*8, RD = str_const*8 (~) - case BC_GSET: - | // RA = src*8, RD = str_const*8 (~) - | ld LFUNC:TMP2, FRAME_FUNC(BASE) - | dsubu TMP1, KBASE, RD - | ld STR:RC, -8(TMP1) // KBASE-8-str_const*8 - | cleartp LFUNC:TMP2 - | ld TAB:RB, LFUNC:TMP2->env - if (op == BC_GGET) { - | b ->BC_TGETS_Z - } else { - | b ->BC_TSETS_Z - } - |. daddu RA, BASE, RA - break; - - case BC_TGETV: - | // RA = dst*8, RB = table*8, RC = key*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | daddu CARG2, BASE, RB - | daddu CARG3, BASE, RC - | ld TAB:RB, 0(CARG2) - | ld TMP2, 0(CARG3) - | daddu RA, BASE, RA - | checktab TAB:RB, ->vmeta_tgetv - | gettp TMP3, TMP2 - | bne TMP3, TISNUM, >5 // Integer key? - |. lw TMP0, TAB:RB->asize - | sextw TMP2, TMP2 - | ld TMP1, TAB:RB->array - | sltu AT, TMP2, TMP0 - | sll TMP2, TMP2, 3 - | beqz AT, ->vmeta_tgetv // Integer key and in array part? - |. daddu TMP2, TMP1, TMP2 - | ld AT, 0(TMP2) - | beq AT, TISNIL, >2 - |. ld CRET1, 0(TMP2) - |1: - | ins_next1 - | sd CRET1, 0(RA) - | ins_next2 - | - |2: // Check for __index if table value is nil. - | ld TAB:TMP2, TAB:RB->metatable - | beqz TAB:TMP2, <1 // No metatable: done. - |. nop - | lbu TMP0, TAB:TMP2->nomm - | andi TMP0, TMP0, 1<vmeta_tgetv - |. nop - | - |5: - | li AT, LJ_TSTR - | bne TMP3, AT, ->vmeta_tgetv - |. cleartp RC, TMP2 - | b ->BC_TGETS_Z // String key? - |. nop - break; - case BC_TGETS: - | // RA = dst*8, RB = table*8, RC = str_const*8 (~) - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RC8a RC, INS - | daddu CARG2, BASE, RB - | decode_RC8b RC - | ld TAB:RB, 0(CARG2) - | dsubu CARG3, KBASE, RC - | daddu RA, BASE, RA - | ld STR:RC, -8(CARG3) // KBASE-8-str_const*8 - | checktab TAB:RB, ->vmeta_tgets1 - |->BC_TGETS_Z: - | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 - | lw TMP0, TAB:RB->hmask - | lw TMP1, STR:RC->hash - | ld NODE:TMP2, TAB:RB->node - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask - | sll TMP0, TMP1, 5 - | sll TMP1, TMP1, 3 - | subu TMP1, TMP0, TMP1 - | li TMP3, LJ_TSTR - | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) - | settp STR:RC, TMP3 // Tagged key to look for. - |1: - | ld CARG1, NODE:TMP2->key - | ld CRET1, NODE:TMP2->val - | ld NODE:TMP1, NODE:TMP2->next - | bne CARG1, RC, >4 - |. ld TAB:TMP3, TAB:RB->metatable - | beq CRET1, TISNIL, >5 // Key found, but nil value? - |. nop - |3: - | ins_next1 - | sd CRET1, 0(RA) - | ins_next2 - | - |4: // Follow hash chain. - | bnez NODE:TMP1, <1 - |. move NODE:TMP2, NODE:TMP1 - | // End of hash chain: key not found, nil result. - | - |5: // Check for __index if table value is nil. - | beqz TAB:TMP3, <3 // No metatable: done. - |. move CRET1, TISNIL - | lbu TMP0, TAB:TMP3->nomm - | andi TMP0, TMP0, 1<vmeta_tgets - |. nop - break; - case BC_TGETB: - | // RA = dst*8, RB = table*8, RC = index*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | daddu CARG2, BASE, RB - | decode_RDtoRC8 RC, RD - | ld TAB:RB, 0(CARG2) - | daddu RA, BASE, RA - | srl TMP0, RC, 3 - | checktab TAB:RB, ->vmeta_tgetb - | lw TMP1, TAB:RB->asize - | ld TMP2, TAB:RB->array - | sltu AT, TMP0, TMP1 - | beqz AT, ->vmeta_tgetb - |. daddu RC, TMP2, RC - | ld AT, 0(RC) - | beq AT, TISNIL, >5 - |. ld CRET1, 0(RC) - |1: - | ins_next1 - | sd CRET1, 0(RA) - | ins_next2 - | - |5: // Check for __index if table value is nil. - | ld TAB:TMP2, TAB:RB->metatable - | beqz TAB:TMP2, <1 // No metatable: done. - |. nop - | lbu TMP1, TAB:TMP2->nomm - | andi TMP1, TMP1, 1<vmeta_tgetb // Caveat: preserve TMP0 and CARG2! - |. nop - break; - case BC_TGETR: - | // RA = dst*8, RB = table*8, RC = key*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | daddu RB, BASE, RB - | daddu RC, BASE, RC - | ld TAB:CARG1, 0(RB) - | lw CARG2, LO(RC) - | daddu RA, BASE, RA - | cleartp TAB:CARG1 - | lw TMP0, TAB:CARG1->asize - | ld TMP1, TAB:CARG1->array - | sltu AT, CARG2, TMP0 - | sll TMP2, CARG2, 3 - | beqz AT, ->vmeta_tgetr // In array part? - |. daddu CRET1, TMP1, TMP2 - | ld CARG2, 0(CRET1) - |->BC_TGETR_Z: - | ins_next1 - | sd CARG2, 0(RA) - | ins_next2 - break; - - case BC_TSETV: - | // RA = src*8, RB = table*8, RC = key*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | daddu CARG2, BASE, RB - | daddu CARG3, BASE, RC - | ld RB, 0(CARG2) - | ld TMP2, 0(CARG3) - | daddu RA, BASE, RA - | checktab RB, ->vmeta_tsetv - | checkint TMP2, >5 - |. sextw RC, TMP2 - | lw TMP0, TAB:RB->asize - | ld TMP1, TAB:RB->array - | sltu AT, RC, TMP0 - | sll TMP2, RC, 3 - | beqz AT, ->vmeta_tsetv // Integer key and in array part? - |. daddu TMP1, TMP1, TMP2 - | ld TMP0, 0(TMP1) - | lbu TMP3, TAB:RB->marked - | beq TMP0, TISNIL, >3 - |. ld CRET1, 0(RA) - |1: - | andi AT, TMP3, LJ_GC_BLACK // isblack(table) - | bnez AT, >7 - |. sd CRET1, 0(TMP1) - |2: - | ins_next - | - |3: // Check for __newindex if previous value is nil. - | ld TAB:TMP2, TAB:RB->metatable - | beqz TAB:TMP2, <1 // No metatable: done. - |. nop - | lbu TMP2, TAB:TMP2->nomm - | andi TMP2, TMP2, 1<vmeta_tsetv - |. nop - | - |5: - | gettp AT, TMP2 - | daddiu AT, AT, -LJ_TSTR - | bnez AT, ->vmeta_tsetv - |. nop - | b ->BC_TSETS_Z // String key? - |. cleartp STR:RC, TMP2 - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, TMP3, TMP0, <2 - break; - case BC_TSETS: - | // RA = src*8, RB = table*8, RC = str_const*8 (~) - | decode_RB8a RB, INS - | decode_RB8b RB - | daddu CARG2, BASE, RB - | decode_RC8a RC, INS - | ld TAB:RB, 0(CARG2) - | decode_RC8b RC - | dsubu CARG3, KBASE, RC - | ld RC, -8(CARG3) // KBASE-8-str_const*8 - | daddu RA, BASE, RA - | cleartp STR:RC - | checktab TAB:RB, ->vmeta_tsets1 - |->BC_TSETS_Z: - | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8 - | lw TMP0, TAB:RB->hmask - | lw TMP1, STR:RC->hash - | ld NODE:TMP2, TAB:RB->node - | sb r0, TAB:RB->nomm // Clear metamethod cache. - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask - | sll TMP0, TMP1, 5 - | sll TMP1, TMP1, 3 - | subu TMP1, TMP0, TMP1 - | li TMP3, LJ_TSTR - | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) - | settp STR:RC, TMP3 // Tagged key to look for. - |.if FPU - | ldc1 f20, 0(RA) - |.else - | ld CRET1, 0(RA) - |.endif - |1: - | ld TMP0, NODE:TMP2->key - | ld CARG2, NODE:TMP2->val - | ld NODE:TMP1, NODE:TMP2->next - | bne TMP0, RC, >5 - |. lbu TMP3, TAB:RB->marked - | beq CARG2, TISNIL, >4 // Key found, but nil value? - |. ld TAB:TMP0, TAB:RB->metatable - |2: - | andi AT, TMP3, LJ_GC_BLACK // isblack(table) - | bnez AT, >7 - |.if FPU - |. sdc1 f20, NODE:TMP2->val - |.else - |. sd CRET1, NODE:TMP2->val - |.endif - |3: - | ins_next - | - |4: // Check for __newindex if previous value is nil. - | beqz TAB:TMP0, <2 // No metatable: done. - |. nop - | lbu TMP0, TAB:TMP0->nomm - | andi TMP0, TMP0, 1<vmeta_tsets - |. nop - | - |5: // Follow hash chain. - | bnez NODE:TMP1, <1 - |. move NODE:TMP2, NODE:TMP1 - | // End of hash chain: key not found, add a new one - | - | // But check for __newindex first. - | ld TAB:TMP2, TAB:RB->metatable - | beqz TAB:TMP2, >6 // No metatable: continue. - |. daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | lbu TMP0, TAB:TMP2->nomm - | andi TMP0, TMP0, 1<vmeta_tsets // 'no __newindex' flag NOT set: check. - |6: - | load_got lj_tab_newkey - | sd RC, 0(CARG3) - | sd BASE, L->base - | move CARG2, TAB:RB - | sd PC, SAVE_PC - | call_intern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k - |. move CARG1, L - | // Returns TValue *. - | ld BASE, L->base - |.if FPU - | b <3 // No 2nd write barrier needed. - |. sdc1 f20, 0(CRET1) - |.else - | ld CARG1, 0(RA) - | b <3 // No 2nd write barrier needed. - |. sd CARG1, 0(CRET1) - |.endif - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, TMP3, TMP0, <3 - break; - case BC_TSETB: - | // RA = src*8, RB = table*8, RC = index*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | daddu CARG2, BASE, RB - | decode_RDtoRC8 RC, RD - | ld TAB:RB, 0(CARG2) - | daddu RA, BASE, RA - | srl TMP0, RC, 3 - | checktab RB, ->vmeta_tsetb - | lw TMP1, TAB:RB->asize - | ld TMP2, TAB:RB->array - | sltu AT, TMP0, TMP1 - | beqz AT, ->vmeta_tsetb - |. daddu RC, TMP2, RC - | ld TMP1, 0(RC) - | lbu TMP3, TAB:RB->marked - | beq TMP1, TISNIL, >5 - |1: - |. ld CRET1, 0(RA) - | andi AT, TMP3, LJ_GC_BLACK // isblack(table) - | bnez AT, >7 - |. sd CRET1, 0(RC) - |2: - | ins_next - | - |5: // Check for __newindex if previous value is nil. - | ld TAB:TMP2, TAB:RB->metatable - | beqz TAB:TMP2, <1 // No metatable: done. - |. nop - | lbu TMP1, TAB:TMP2->nomm - | andi TMP1, TMP1, 1<vmeta_tsetb // Caveat: preserve TMP0 and CARG2! - |. nop - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, TMP3, TMP0, <2 - break; - case BC_TSETR: - | // RA = dst*8, RB = table*8, RC = key*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | daddu CARG1, BASE, RB - | daddu CARG3, BASE, RC - | ld TAB:CARG2, 0(CARG1) - | lw CARG3, LO(CARG3) - | cleartp TAB:CARG2 - | lbu TMP3, TAB:CARG2->marked - | lw TMP0, TAB:CARG2->asize - | ld TMP1, TAB:CARG2->array - | andi AT, TMP3, LJ_GC_BLACK // isblack(table) - | bnez AT, >7 - |. daddu RA, BASE, RA - |2: - | sltu AT, CARG3, TMP0 - | sll TMP2, CARG3, 3 - | beqz AT, ->vmeta_tsetr // In array part? - |. daddu CRET1, TMP1, TMP2 - |->BC_TSETR_Z: - | ld CARG1, 0(RA) - | ins_next1 - | sd CARG1, 0(CRET1) - | ins_next2 - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP3, TMP0, <2 - break; - - case BC_TSETM: - | // RA = base*8 (table at base-1), RD = num_const*8 (start index) - | daddu RA, BASE, RA - |1: - | daddu TMP3, KBASE, RD - | ld TAB:CARG2, -8(RA) // Guaranteed to be a table. - | addiu TMP0, MULTRES, -8 - | lw TMP3, LO(TMP3) // Integer constant is in lo-word. - | beqz TMP0, >4 // Nothing to copy? - |. srl CARG3, TMP0, 3 - | cleartp CARG2 - | addu CARG3, CARG3, TMP3 - | lw TMP2, TAB:CARG2->asize - | sll TMP1, TMP3, 3 - | lbu TMP3, TAB:CARG2->marked - | ld CARG1, TAB:CARG2->array - | sltu AT, TMP2, CARG3 - | bnez AT, >5 - |. daddu TMP2, RA, TMP0 - | daddu TMP1, TMP1, CARG1 - | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table) - |3: // Copy result slots to table. - | ld CRET1, 0(RA) - | daddiu RA, RA, 8 - | sltu AT, RA, TMP2 - | sd CRET1, 0(TMP1) - | bnez AT, <3 - |. daddiu TMP1, TMP1, 8 - | bnez TMP0, >7 - |. nop - |4: - | ins_next - | - |5: // Need to resize array part. - | load_got lj_tab_reasize - | sd BASE, L->base - | sd PC, SAVE_PC - | move BASE, RD - | call_intern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) - |. move CARG1, L - | // Must not reallocate the stack. - | move RD, BASE - | b <1 - |. ld BASE, L->base // Reload BASE for lack of a saved register. - | - |7: // Possible table write barrier for any value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP3, TMP0, <4 - break; - - /* -- Calls and vararg handling ----------------------------------------- */ - - case BC_CALLM: - | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8 - | decode_RDtoRC8 NARGS8:RC, RD - | b ->BC_CALL_Z - |. addu NARGS8:RC, NARGS8:RC, MULTRES - break; - case BC_CALL: - | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8 - | decode_RDtoRC8 NARGS8:RC, RD - |->BC_CALL_Z: - | move TMP2, BASE - | daddu BASE, BASE, RA - | ld LFUNC:RB, 0(BASE) - | daddiu BASE, BASE, 16 - | addiu NARGS8:RC, NARGS8:RC, -8 - | checkfunc RB, ->vmeta_call - | ins_call - break; - - case BC_CALLMT: - | // RA = base*8, (RB = 0,) RC = extra_nargs*8 - | addu NARGS8:RD, NARGS8:RD, MULTRES // BC_CALLT gets RC from RD. - | // Fall through. Assumes BC_CALLT follows. - break; - case BC_CALLT: - | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 - | daddu RA, BASE, RA - | ld RB, 0(RA) - | move NARGS8:RC, RD - | ld TMP1, FRAME_PC(BASE) - | daddiu RA, RA, 16 - | addiu NARGS8:RC, NARGS8:RC, -8 - | checktp CARG3, RB, -LJ_TFUNC, ->vmeta_callt - |->BC_CALLT_Z: - | andi TMP0, TMP1, FRAME_TYPE // Caveat: preserve TMP0 until the 'or'. - | lbu TMP3, LFUNC:CARG3->ffid - | bnez TMP0, >7 - |. xori TMP2, TMP1, FRAME_VARG - |1: - | sd RB, FRAME_FUNC(BASE) // Copy function down, but keep PC. - | sltiu AT, TMP3, 2 // (> FF_C) Calling a fast function? - | move TMP2, BASE - | move RB, CARG3 - | beqz NARGS8:RC, >3 - |. move TMP3, NARGS8:RC - |2: - | ld CRET1, 0(RA) - | daddiu RA, RA, 8 - | addiu TMP3, TMP3, -8 - | sd CRET1, 0(TMP2) - | bnez TMP3, <2 - |. daddiu TMP2, TMP2, 8 - |3: - | or TMP0, TMP0, AT - | beqz TMP0, >5 - |. nop - |4: - | ins_callt - | - |5: // Tailcall to a fast function with a Lua frame below. - | lw INS, -4(TMP1) - | decode_RA8a RA, INS - | decode_RA8b RA - | dsubu TMP1, BASE, RA - | ld TMP1, -32(TMP1) - | cleartp LFUNC:TMP1 - | ld TMP1, LFUNC:TMP1->pc - | b <4 - |. ld KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE. - | - |7: // Tailcall from a vararg function. - | andi AT, TMP2, FRAME_TYPEP - | bnez AT, <1 // Vararg frame below? - |. dsubu TMP2, BASE, TMP2 // Relocate BASE down. - | move BASE, TMP2 - | ld TMP1, FRAME_PC(TMP2) - | b <1 - |. andi TMP0, TMP1, FRAME_TYPE - break; - - case BC_ITERC: - | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8)) - | move TMP2, BASE // Save old BASE fir vmeta_call. - | daddu BASE, BASE, RA - | ld RB, -24(BASE) - | ld CARG1, -16(BASE) - | ld CARG2, -8(BASE) - | li NARGS8:RC, 16 // Iterators get 2 arguments. - | sd RB, 0(BASE) // Copy callable. - | sd CARG1, 16(BASE) // Copy state. - | sd CARG2, 24(BASE) // Copy control var. - | daddiu BASE, BASE, 16 - | checkfunc RB, ->vmeta_call - | ins_call - break; - - case BC_ITERN: - | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) - |.if JIT - | // NYI: add hotloop, record BC_ITERN. - |.endif - | daddu RA, BASE, RA - | ld TAB:RB, -16(RA) - | lw RC, -8+LO(RA) // Get index from control var. - | cleartp TAB:RB - | daddiu PC, PC, 4 - | lw TMP0, TAB:RB->asize - | ld TMP1, TAB:RB->array - | dsll CARG3, TISNUM, 47 - |1: // Traverse array part. - | sltu AT, RC, TMP0 - | beqz AT, >5 // Index points after array part? - |. sll TMP3, RC, 3 - | daddu TMP3, TMP1, TMP3 - | ld CARG1, 0(TMP3) - | lhu RD, -4+OFS_RD(PC) - | or TMP2, RC, CARG3 - | beq CARG1, TISNIL, <1 // Skip holes in array part. - |. addiu RC, RC, 1 - | sd TMP2, 0(RA) - | sd CARG1, 8(RA) - | or TMP0, RC, CARG3 - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | decode_RD4b RD - | daddu RD, RD, TMP3 - | sw TMP0, -8+LO(RA) // Update control var. - | daddu PC, PC, RD - |3: - | ins_next - | - |5: // Traverse hash part. - | lw TMP1, TAB:RB->hmask - | subu RC, RC, TMP0 - | ld TMP2, TAB:RB->node - |6: - | sltu AT, TMP1, RC // End of iteration? Branch to ITERL+1. - | bnez AT, <3 - |. sll TMP3, RC, 5 - | sll RB, RC, 3 - | subu TMP3, TMP3, RB - | daddu NODE:TMP3, TMP3, TMP2 - | ld CARG1, 0(NODE:TMP3) - | lhu RD, -4+OFS_RD(PC) - | beq CARG1, TISNIL, <6 // Skip holes in hash part. - |. addiu RC, RC, 1 - | ld CARG2, NODE:TMP3->key - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | sd CARG1, 8(RA) - | addu RC, RC, TMP0 - | decode_RD4b RD - | addu RD, RD, TMP3 - | sd CARG2, 0(RA) - | daddu PC, PC, RD - | b <3 - |. sw RC, -8+LO(RA) // Update control var. - break; - - case BC_ISNEXT: - | // RA = base*8, RD = target (points to ITERN) - | daddu RA, BASE, RA - | srl TMP0, RD, 1 - | ld CFUNC:CARG1, -24(RA) - | daddu TMP0, PC, TMP0 - | ld CARG2, -16(RA) - | ld CARG3, -8(RA) - | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535) - | checkfunc CFUNC:CARG1, >5 - | gettp CARG2, CARG2 - | daddiu CARG2, CARG2, -LJ_TTAB - | lbu TMP1, CFUNC:CARG1->ffid - | daddiu CARG3, CARG3, -LJ_TNIL - | or AT, CARG2, CARG3 - | daddiu TMP1, TMP1, -FF_next_N - | or AT, AT, TMP1 - | bnez AT, >5 - |. lui TMP1, 0xfffe - | daddu PC, TMP0, TMP2 - | ori TMP1, TMP1, 0x7fff - | dsll TMP1, TMP1, 32 - | sd TMP1, -8(RA) - |1: - | ins_next - |5: // Despecialize bytecode if any of the checks fail. - | li TMP3, BC_JMP - | li TMP1, BC_ITERC - | sb TMP3, -4+OFS_OP(PC) - | daddu PC, TMP0, TMP2 - | b <1 - |. sb TMP1, OFS_OP(PC) - break; - - case BC_VARG: - | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 - | ld TMP0, FRAME_PC(BASE) - | decode_RDtoRC8 RC, RD - | decode_RB8a RB, INS - | daddu RC, BASE, RC - | decode_RB8b RB - | daddu RA, BASE, RA - | daddiu RC, RC, FRAME_VARG - | daddu TMP2, RA, RB - | daddiu TMP3, BASE, -16 // TMP3 = vtop - | dsubu RC, RC, TMP0 // RC = vbase - | // Note: RC may now be even _above_ BASE if nargs was < numparams. - | beqz RB, >5 // Copy all varargs? - |. dsubu TMP1, TMP3, RC - | daddiu TMP2, TMP2, -16 - |1: // Copy vararg slots to destination slots. - | ld CARG1, 0(RC) - | sltu AT, RC, TMP3 - | daddiu RC, RC, 8 - | movz CARG1, TISNIL, AT - | sd CARG1, 0(RA) - | sltu AT, RA, TMP2 - | bnez AT, <1 - |. daddiu RA, RA, 8 - |3: - | ins_next - | - |5: // Copy all varargs. - | ld TMP0, L->maxstack - | blez TMP1, <3 // No vararg slots? - |. li MULTRES, 8 // MULTRES = (0+1)*8 - | daddu TMP2, RA, TMP1 - | sltu AT, TMP0, TMP2 - | bnez AT, >7 - |. daddiu MULTRES, TMP1, 8 - |6: - | ld CRET1, 0(RC) - | daddiu RC, RC, 8 - | sd CRET1, 0(RA) - | sltu AT, RC, TMP3 - | bnez AT, <6 // More vararg slots? - |. daddiu RA, RA, 8 - | b <3 - |. nop - | - |7: // Grow stack for varargs. - | load_got lj_state_growstack - | sd RA, L->top - | dsubu RA, RA, BASE - | sd BASE, L->base - | dsubu BASE, RC, BASE // Need delta, because BASE may change. - | sd PC, SAVE_PC - | srl CARG2, TMP1, 3 - | call_intern lj_state_growstack // (lua_State *L, int n) - |. move CARG1, L - | move RC, BASE - | ld BASE, L->base - | daddu RA, BASE, RA - | daddu RC, BASE, RC - | b <6 - |. daddiu TMP3, BASE, -16 - break; - - /* -- Returns ----------------------------------------------------------- */ - - case BC_RETM: - | // RA = results*8, RD = extra_nresults*8 - | addu RD, RD, MULTRES // MULTRES >= 8, so RD >= 8. - | // Fall through. Assumes BC_RET follows. - break; - - case BC_RET: - | // RA = results*8, RD = (nresults+1)*8 - | ld PC, FRAME_PC(BASE) - | daddu RA, BASE, RA - | move MULTRES, RD - |1: - | andi TMP0, PC, FRAME_TYPE - | bnez TMP0, ->BC_RETV_Z - |. xori TMP1, PC, FRAME_VARG - | - |->BC_RET_Z: - | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return - | lw INS, -4(PC) - | daddiu TMP2, BASE, -16 - | daddiu RC, RD, -8 - | decode_RA8a TMP0, INS - | decode_RB8a RB, INS - | decode_RA8b TMP0 - | decode_RB8b RB - | daddu TMP3, TMP2, RB - | beqz RC, >3 - |. dsubu BASE, TMP2, TMP0 - |2: - | ld CRET1, 0(RA) - | daddiu RA, RA, 8 - | daddiu RC, RC, -8 - | sd CRET1, 0(TMP2) - | bnez RC, <2 - |. daddiu TMP2, TMP2, 8 - |3: - | daddiu TMP3, TMP3, -8 - |5: - | sltu AT, TMP2, TMP3 - | bnez AT, >6 - |. ld LFUNC:TMP1, FRAME_FUNC(BASE) - | ins_next1 - | cleartp LFUNC:TMP1 - | ld TMP1, LFUNC:TMP1->pc - | ld KBASE, PC2PROTO(k)(TMP1) - | ins_next2 - | - |6: // Fill up results with nil. - | sd TISNIL, 0(TMP2) - | b <5 - |. daddiu TMP2, TMP2, 8 - | - |->BC_RETV_Z: // Non-standard return case. - | andi TMP2, TMP1, FRAME_TYPEP - | bnez TMP2, ->vm_return - |. nop - | // Return from vararg function: relocate BASE down. - | dsubu BASE, BASE, TMP1 - | b <1 - |. ld PC, FRAME_PC(BASE) - break; - - case BC_RET0: case BC_RET1: - | // RA = results*8, RD = (nresults+1)*8 - | ld PC, FRAME_PC(BASE) - | daddu RA, BASE, RA - | move MULTRES, RD - | andi TMP0, PC, FRAME_TYPE - | bnez TMP0, ->BC_RETV_Z - |. xori TMP1, PC, FRAME_VARG - | lw INS, -4(PC) - | daddiu TMP2, BASE, -16 - if (op == BC_RET1) { - | ld CRET1, 0(RA) - } - | decode_RB8a RB, INS - | decode_RA8a RA, INS - | decode_RB8b RB - | decode_RA8b RA - | dsubu BASE, TMP2, RA - if (op == BC_RET1) { - | sd CRET1, 0(TMP2) - } - |5: - | sltu AT, RD, RB - | bnez AT, >6 - |. ld TMP1, FRAME_FUNC(BASE) - | ins_next1 - | cleartp LFUNC:TMP1 - | ld TMP1, LFUNC:TMP1->pc - | ld KBASE, PC2PROTO(k)(TMP1) - | ins_next2 - | - |6: // Fill up results with nil. - | daddiu TMP2, TMP2, 8 - | daddiu RD, RD, 8 - | b <5 - if (op == BC_RET1) { - |. sd TISNIL, 0(TMP2) - } else { - |. sd TISNIL, -8(TMP2) - } - break; - - /* -- Loops and branches ------------------------------------------------ */ - - case BC_FORL: - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_IFORL follows. - break; - - case BC_JFORI: - case BC_JFORL: -#if !LJ_HASJIT - break; -#endif - case BC_FORI: - case BC_IFORL: - | // RA = base*8, RD = target (after end of loop or start of loop) - vk = (op == BC_IFORL || op == BC_JFORL); - | daddu RA, BASE, RA - | ld CARG1, FORL_IDX*8(RA) // IDX CARG1 - CARG3 type - | gettp CARG3, CARG1 - if (op != BC_JFORL) { - | srl RD, RD, 1 - | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535) - | daddu TMP2, RD, TMP2 - } - if (!vk) { - | ld CARG2, FORL_STOP*8(RA) // STOP CARG2 - CARG4 type - | ld CRET1, FORL_STEP*8(RA) // STEP CRET1 - CRET2 type - | gettp CARG4, CARG2 - | bne CARG3, TISNUM, >5 - |. gettp CRET2, CRET1 - | bne CARG4, TISNUM, ->vmeta_for - |. sextw CARG3, CARG1 - | bne CRET2, TISNUM, ->vmeta_for - |. sextw CARG2, CARG2 - | dext AT, CRET1, 31, 0 - | slt CRET1, CARG2, CARG3 - | slt TMP1, CARG3, CARG2 - | movn CRET1, TMP1, AT - } else { - | bne CARG3, TISNUM, >5 - |. ld CARG2, FORL_STEP*8(RA) // STEP CARG2 - CARG4 type - | ld CRET1, FORL_STOP*8(RA) // STOP CRET1 - CRET2 type - | sextw TMP3, CARG1 - | sextw CARG2, CARG2 - | sextw CRET1, CRET1 - | addu CARG1, TMP3, CARG2 - | xor TMP0, CARG1, TMP3 - | xor TMP1, CARG1, CARG2 - | and TMP0, TMP0, TMP1 - | slt TMP1, CARG1, CRET1 - | slt CRET1, CRET1, CARG1 - | slt AT, CARG2, r0 - | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow. - | movn CRET1, TMP1, AT - | or CRET1, CRET1, TMP0 - | zextw CARG1, CARG1 - | settp CARG1, TISNUM - } - |1: - if (op == BC_FORI) { - | movz TMP2, r0, CRET1 - | daddu PC, PC, TMP2 - } else if (op == BC_JFORI) { - | daddu PC, PC, TMP2 - | lhu RD, -4+OFS_RD(PC) - } else if (op == BC_IFORL) { - | movn TMP2, r0, CRET1 - | daddu PC, PC, TMP2 - } - if (vk) { - | sd CARG1, FORL_IDX*8(RA) - } - | ins_next1 - | sd CARG1, FORL_EXT*8(RA) - |2: - if (op == BC_JFORI) { - | beqz CRET1, =>BC_JLOOP - |. decode_RD8b RD - } else if (op == BC_JFORL) { - | beqz CRET1, =>BC_JLOOP - } - | ins_next2 - | - |5: // FP loop. - |.if FPU - if (!vk) { - | ldc1 f0, FORL_IDX*8(RA) - | ldc1 f2, FORL_STOP*8(RA) - | sltiu TMP0, CARG3, LJ_TISNUM - | sltiu TMP1, CARG4, LJ_TISNUM - | sltiu AT, CRET2, LJ_TISNUM - | ld TMP3, FORL_STEP*8(RA) - | and TMP0, TMP0, TMP1 - | and AT, AT, TMP0 - | beqz AT, ->vmeta_for - |. slt TMP3, TMP3, r0 - | c.ole.d 0, f0, f2 - | c.ole.d 1, f2, f0 - | li CRET1, 1 - | movt CRET1, r0, 0 - | movt AT, r0, 1 - | b <1 - |. movn CRET1, AT, TMP3 - } else { - | ldc1 f0, FORL_IDX*8(RA) - | ldc1 f4, FORL_STEP*8(RA) - | ldc1 f2, FORL_STOP*8(RA) - | ld TMP3, FORL_STEP*8(RA) - | add.d f0, f0, f4 - | c.ole.d 0, f0, f2 - | c.ole.d 1, f2, f0 - | slt TMP3, TMP3, r0 - | li CRET1, 1 - | li AT, 1 - | movt CRET1, r0, 0 - | movt AT, r0, 1 - | movn CRET1, AT, TMP3 - if (op == BC_IFORL) { - | movn TMP2, r0, CRET1 - | daddu PC, PC, TMP2 - } - | sdc1 f0, FORL_IDX*8(RA) - | ins_next1 - | b <2 - |. sdc1 f0, FORL_EXT*8(RA) - } - |.else - if (!vk) { - | sltiu TMP0, CARG3, LJ_TISNUM - | sltiu TMP1, CARG4, LJ_TISNUM - | sltiu AT, CRET2, LJ_TISNUM - | and TMP0, TMP0, TMP1 - | and AT, AT, TMP0 - | beqz AT, ->vmeta_for - |. nop - | bal ->vm_sfcmpolex - |. lw TMP3, FORL_STEP*8+HI(RA) - | b <1 - |. nop - } else { - | load_got __adddf3 - | call_extern - |. sw TMP2, TMPD - | ld CARG2, FORL_STOP*8(RA) - | move CARG1, CRET1 - if ( op == BC_JFORL ) { - | lhu RD, -4+OFS_RD(PC) - | decode_RD8b RD - } - | bal ->vm_sfcmpolex - |. lw TMP3, FORL_STEP*8+HI(RA) - | b <1 - |. lw TMP2, TMPD - } - |.endif - break; - - case BC_ITERL: - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_IITERL follows. - break; - - case BC_JITERL: -#if !LJ_HASJIT - break; -#endif - case BC_IITERL: - | // RA = base*8, RD = target - | daddu RA, BASE, RA - | ld TMP1, 0(RA) - | beq TMP1, TISNIL, >1 // Stop if iterator returned nil. - |. nop - if (op == BC_JITERL) { - | b =>BC_JLOOP - |. sd TMP1, -8(RA) - } else { - | branch_RD // Otherwise save control var + branch. - | sd TMP1, -8(RA) - } - |1: - | ins_next - break; - - case BC_LOOP: - | // RA = base*8, RD = target (loop extent) - | // Note: RA/RD is only used by trace recorder to determine scope/extent - | // This opcode does NOT jump, it's only purpose is to detect a hot loop. - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_ILOOP follows. - break; - - case BC_ILOOP: - | // RA = base*8, RD = target (loop extent) - | ins_next - break; - - case BC_JLOOP: - |.if JIT - | // RA = base*8 (ignored), RD = traceno*8 - | ld TMP1, DISPATCH_J(trace)(DISPATCH) - | li AT, 0 - | daddu TMP1, TMP1, RD - | // Traces on MIPS don't store the trace number, so use 0. - | sd AT, DISPATCH_GL(vmstate)(DISPATCH) - | ld TRACE:TMP2, 0(TMP1) - | sd BASE, DISPATCH_GL(jit_base)(DISPATCH) - | ld TMP2, TRACE:TMP2->mcode - | sd L, DISPATCH_GL(tmpbuf.L)(DISPATCH) - | jr TMP2 - |. daddiu JGL, DISPATCH, GG_DISP2G+32768 - |.endif - break; - - case BC_JMP: - | // RA = base*8 (only used by trace recorder), RD = target - | branch_RD - | ins_next - break; - - /* -- Function headers -------------------------------------------------- */ - - case BC_FUNCF: - |.if JIT - | hotcall - |.endif - case BC_FUNCV: /* NYI: compiled vararg functions. */ - | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. - break; - - case BC_JFUNCF: -#if !LJ_HASJIT - break; -#endif - case BC_IFUNCF: - | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 - | ld TMP2, L->maxstack - | lbu TMP1, -4+PC2PROTO(numparams)(PC) - | ld KBASE, -4+PC2PROTO(k)(PC) - | sltu AT, TMP2, RA - | bnez AT, ->vm_growstack_l - |. sll TMP1, TMP1, 3 - if (op != BC_JFUNCF) { - | ins_next1 - } - |2: - | sltu AT, NARGS8:RC, TMP1 // Check for missing parameters. - | bnez AT, >3 - |. daddu AT, BASE, NARGS8:RC - if (op == BC_JFUNCF) { - | decode_RD8a RD, INS - | b =>BC_JLOOP - |. decode_RD8b RD - } else { - | ins_next2 - } - | - |3: // Clear missing parameters. - | sd TISNIL, 0(AT) - | b <2 - |. addiu NARGS8:RC, NARGS8:RC, 8 - break; - - case BC_JFUNCV: -#if !LJ_HASJIT - break; -#endif - | NYI // NYI: compiled vararg functions - break; /* NYI: compiled vararg functions. */ - - case BC_IFUNCV: - | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 - | li TMP0, LJ_TFUNC - | daddu TMP1, BASE, RC - | ld TMP2, L->maxstack - | settp LFUNC:RB, TMP0 - | daddu TMP0, RA, RC - | sd LFUNC:RB, 0(TMP1) // Store (tagged) copy of LFUNC. - | daddiu TMP3, RC, 16+FRAME_VARG - | sltu AT, TMP0, TMP2 - | ld KBASE, -4+PC2PROTO(k)(PC) - | beqz AT, ->vm_growstack_l - |. sd TMP3, 8(TMP1) // Store delta + FRAME_VARG. - | lbu TMP2, -4+PC2PROTO(numparams)(PC) - | move RA, BASE - | move RC, TMP1 - | ins_next1 - | beqz TMP2, >3 - |. daddiu BASE, TMP1, 16 - |1: - | ld TMP0, 0(RA) - | sltu AT, RA, RC // Less args than parameters? - | move CARG1, TMP0 - | movz TMP0, TISNIL, AT // Clear missing parameters. - | movn CARG1, TISNIL, AT // Clear old fixarg slot (help the GC). - | addiu TMP2, TMP2, -1 - | sd TMP0, 16(TMP1) - | daddiu TMP1, TMP1, 8 - | sd CARG1, 0(RA) - | bnez TMP2, <1 - |. daddiu RA, RA, 8 - |3: - | ins_next2 - break; - - case BC_FUNCC: - case BC_FUNCCW: - | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8 - if (op == BC_FUNCC) { - | ld CFUNCADDR, CFUNC:RB->f - } else { - | ld CFUNCADDR, DISPATCH_GL(wrapf)(DISPATCH) - } - | daddu TMP1, RA, NARGS8:RC - | ld TMP2, L->maxstack - | daddu RC, BASE, NARGS8:RC - | sd BASE, L->base - | sltu AT, TMP2, TMP1 - | sd RC, L->top - | li_vmstate C - if (op == BC_FUNCCW) { - | ld CARG2, CFUNC:RB->f - } - | bnez AT, ->vm_growstack_c // Need to grow stack. - |. move CARG1, L - | jalr CFUNCADDR // (lua_State *L [, lua_CFunction f]) - |. st_vmstate - | // Returns nresults. - | ld BASE, L->base - | sll RD, CRET1, 3 - | ld TMP1, L->top - | li_vmstate INTERP - | ld PC, FRAME_PC(BASE) // Fetch PC of caller. - | dsubu RA, TMP1, RD // RA = L->top - nresults*8 - | sd L, DISPATCH_GL(cur_L)(DISPATCH) - | b ->vm_returnc - |. st_vmstate - break; - - /* ---------------------------------------------------------------------- */ - - default: - fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); - exit(2); - break; - } -} - -static int build_backend(BuildCtx *ctx) -{ - int op; - - dasm_growpc(Dst, BC__MAX); - - build_subroutines(ctx); - - |.code_op - for (op = 0; op < BC__MAX; op++) - build_ins(ctx, (BCOp)op, op); - - return BC__MAX; -} - -/* Emit pseudo frame-info for all assembler functions. */ -static void emit_asm_debug(BuildCtx *ctx) -{ - int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); - int i; - switch (ctx->mode) { - case BUILD_elfasm: - fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); - fprintf(ctx->fp, - ".Lframe0:\n" - "\t.4byte .LECIE0-.LSCIE0\n" - ".LSCIE0:\n" - "\t.4byte 0xffffffff\n" - "\t.byte 0x1\n" - "\t.string \"\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -4\n" - "\t.byte 31\n" - "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 0\n" - "\t.align 2\n" - ".LECIE0:\n\n"); - fprintf(ctx->fp, - ".LSFDE0:\n" - "\t.4byte .LEFDE0-.LASFDE0\n" - ".LASFDE0:\n" - "\t.4byte .Lframe0\n" - "\t.8byte .Lbegin\n" - "\t.8byte %d\n" - "\t.byte 0xe\n\t.uleb128 %d\n" - "\t.byte 0x9f\n\t.sleb128 2*5\n" - "\t.byte 0x9e\n\t.sleb128 2*6\n", - fcofs, CFRAME_SIZE); - for (i = 23; i >= 16; i--) - fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(30-i)); -#if !LJ_SOFTFP - for (i = 31; i >= 24; i--) - fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(46-i)); -#endif - fprintf(ctx->fp, - "\t.align 2\n" - ".LEFDE0:\n\n"); -#if LJ_HASFFI - fprintf(ctx->fp, - ".LSFDE1:\n" - "\t.4byte .LEFDE1-.LASFDE1\n" - ".LASFDE1:\n" - "\t.4byte .Lframe0\n" - "\t.4byte lj_vm_ffi_call\n" - "\t.4byte %d\n" - "\t.byte 0x9f\n\t.uleb128 2*1\n" - "\t.byte 0x90\n\t.uleb128 2*2\n" - "\t.byte 0xd\n\t.uleb128 0x10\n" - "\t.align 2\n" - ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); -#endif -#if !LJ_NO_UNWIND - /* NYI */ -#endif - break; - default: - break; - } -} - diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc deleted file mode 100644 index b4260ebc10..0000000000 --- a/src/vm_ppc.dasc +++ /dev/null @@ -1,5248 +0,0 @@ -|// Low-level VM code for PowerPC 32 bit or 32on64 bit mode. -|// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -| -|.arch ppc -|.section code_op, code_sub -| -|.actionlist build_actionlist -|.globals GLOB_ -|.globalnames globnames -|.externnames extnames -| -|// Note: The ragged indentation of the instructions is intentional. -|// The starting columns indicate data dependencies. -| -|//----------------------------------------------------------------------- -| -|// DynASM defines used by the PPC port: -|// -|// P64 64 bit pointers (only for GPR64 testing). -|// Note: see vm_ppc64.dasc for a full PPC64 _LP64 port. -|// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3). -|// Affects reg saves, stack layout, carry/overflow/dot flags etc. -|// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360). -|// TOC Need table of contents (64 bit or 32 bit variant, e.g. PS3). -|// Function pointers are really a struct: code, TOC, env (optional). -|// TOCENV Function pointers have an environment pointer, too (not on PS3). -|// PPE Power Processor Element of Cell (PS3) or Xenon (Xbox 360). -|// Must avoid (slow) micro-coded instructions. -| -|.if P64 -|.define TOC, 1 -|.define TOCENV, 1 -|.macro lpx, a, b, c; ldx a, b, c; .endmacro -|.macro lp, a, b; ld a, b; .endmacro -|.macro stp, a, b; std a, b; .endmacro -|.define decode_OPP, decode_OP8 -|.if FFI -|// Missing: Calling conventions, 64 bit regs, TOC. -|.error lib_ffi not yet implemented for PPC64 -|.endif -|.else -|.macro lpx, a, b, c; lwzx a, b, c; .endmacro -|.macro lp, a, b; lwz a, b; .endmacro -|.macro stp, a, b; stw a, b; .endmacro -|.define decode_OPP, decode_OP4 -|.endif -| -|// Convenience macros for TOC handling. -|.if TOC -|// Linker needs a TOC patch area for every external call relocation. -|.macro blex, target; bl extern target@plt; nop; .endmacro -|.macro .toc, a, b; a, b; .endmacro -|.if P64 -|.define TOC_OFS, 8 -|.define ENV_OFS, 16 -|.else -|.define TOC_OFS, 4 -|.define ENV_OFS, 8 -|.endif -|.else // No TOC. -|.macro blex, target; bl extern target@plt; .endmacro -|.macro .toc, a, b; .endmacro -|.endif -|.macro .tocenv, a, b; .if TOCENV; a, b; .endif; .endmacro -| -|.macro .gpr64, a, b; .if GPR64; a, b; .endif; .endmacro -| -|.macro andix., y, a, i -|.if PPE -| rlwinm y, a, 0, 31-lj_fls(i), 31-lj_ffs(i) -| cmpwi y, 0 -|.else -| andi. y, a, i -|.endif -|.endmacro -| -|.macro clrso, reg -|.if PPE -| li reg, 0 -| mtxer reg -|.else -| mcrxr cr0 -|.endif -|.endmacro -| -|.macro checkov, reg, noov -|.if PPE -| mfxer reg -| add reg, reg, reg -| cmpwi reg, 0 -| li reg, 0 -| mtxer reg -| bgey noov -|.else -| mcrxr cr0 -| bley noov -|.endif -|.endmacro -| -|//----------------------------------------------------------------------- -| -|// Fixed register assignments for the interpreter. -|// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA) -| -|// The following must be C callee-save (but BASE is often refetched). -|.define BASE, r14 // Base of current Lua stack frame. -|.define KBASE, r15 // Constants of current Lua function. -|.define PC, r16 // Next PC. -|.define DISPATCH, r17 // Opcode dispatch table. -|.define LREG, r18 // Register holding lua_State (also in SAVE_L). -|.define MULTRES, r19 // Size of multi-result: (nresults+1)*8. -|.define JGL, r31 // On-trace: global_State + 32768. -| -|// Constants for type-comparisons, stores and conversions. C callee-save. -|.define TISNUM, r22 -|.define TISNIL, r23 -|.define ZERO, r24 -|.define TOBIT, f30 // 2^52 + 2^51. -|.define TONUM, f31 // 2^52 + 2^51 + 2^31. -| -|// The following temporaries are not saved across C calls, except for RA. -|.define RA, r20 // Callee-save. -|.define RB, r10 -|.define RC, r11 -|.define RD, r12 -|.define INS, r7 // Overlaps CARG5. -| -|.define TMP0, r0 -|.define TMP1, r8 -|.define TMP2, r9 -|.define TMP3, r6 // Overlaps CARG4. -| -|// Saved temporaries. -|.define SAVE0, r21 -| -|// Calling conventions. -|.define CARG1, r3 -|.define CARG2, r4 -|.define CARG3, r5 -|.define CARG4, r6 // Overlaps TMP3. -|.define CARG5, r7 // Overlaps INS. -| -|.define FARG1, f1 -|.define FARG2, f2 -| -|.define CRET1, r3 -|.define CRET2, r4 -| -|.define TOCREG, r2 // TOC register (only used by C code). -|.define ENVREG, r11 // Environment pointer (nested C functions). -| -|// Stack layout while in interpreter. Must match with lj_frame.h. -|.if GPR64 -|.if FRAME32 -| -|// 456(sp) // \ 32/64 bit C frame info -|.define TONUM_LO, 452(sp) // | -|.define TONUM_HI, 448(sp) // | -|.define TMPD_LO, 444(sp) // | -|.define TMPD_HI, 440(sp) // | -|.define SAVE_CR, 432(sp) // | 64 bit CR save. -|.define SAVE_ERRF, 424(sp) // > Parameter save area. -|.define SAVE_NRES, 420(sp) // | -|.define SAVE_L, 416(sp) // | -|.define SAVE_PC, 412(sp) // | -|.define SAVE_MULTRES, 408(sp) // | -|.define SAVE_CFRAME, 400(sp) // / 64 bit C frame chain. -|// 392(sp) // Reserved. -|.define CFRAME_SPACE, 384 // Delta for sp. -|// Back chain for sp: 384(sp) <-- sp entering interpreter -|.define SAVE_LR, 376(sp) // 32 bit LR stored in hi-part. -|.define SAVE_GPR_, 232 // .. 232+18*8: 64 bit GPR saves. -|.define SAVE_FPR_, 88 // .. 88+18*8: 64 bit FPR saves. -|// 80(sp) // Needed for 16 byte stack frame alignment. -|// 16(sp) // Callee parameter save area (ABI mandated). -|// 8(sp) // Reserved -|// Back chain for sp: 0(sp) <-- sp while in interpreter -|// 32 bit sp stored in hi-part of 0(sp). -| -|.define TMPD_BLO, 447(sp) -|.define TMPD, TMPD_HI -|.define TONUM_D, TONUM_HI -| -|.else -| -|// 508(sp) // \ 32 bit C frame info. -|.define SAVE_ERRF, 472(sp) // | -|.define SAVE_NRES, 468(sp) // | -|.define SAVE_L, 464(sp) // > Parameter save area. -|.define SAVE_PC, 460(sp) // | -|.define SAVE_MULTRES, 456(sp) // | -|.define SAVE_CFRAME, 448(sp) // / 64 bit C frame chain. -|.define SAVE_LR, 416(sp) -|.define CFRAME_SPACE, 400 // Delta for sp. -|// Back chain for sp: 400(sp) <-- sp entering interpreter -|.define SAVE_FPR_, 256 // .. 256+18*8: 64 bit FPR saves. -|.define SAVE_GPR_, 112 // .. 112+18*8: 64 bit GPR saves. -|// 48(sp) // Callee parameter save area (ABI mandated). -|.define SAVE_TOC, 40(sp) // TOC save area. -|.define TMPD_LO, 36(sp) // \ Link editor temp (ABI mandated). -|.define TMPD_HI, 32(sp) // / -|.define TONUM_LO, 28(sp) // \ Compiler temp (ABI mandated). -|.define TONUM_HI, 24(sp) // / -|// Next frame lr: 16(sp) -|.define SAVE_CR, 8(sp) // 64 bit CR save. -|// Back chain for sp: 0(sp) <-- sp while in interpreter -| -|.define TMPD_BLO, 39(sp) -|.define TMPD, TMPD_HI -|.define TONUM_D, TONUM_HI -| -|.endif -|.else -| -|.define SAVE_LR, 276(sp) -|.define CFRAME_SPACE, 272 // Delta for sp. -|// Back chain for sp: 272(sp) <-- sp entering interpreter -|.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves. -|.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves. -|.define SAVE_CR, 52(sp) // 32 bit CR save. -|.define SAVE_ERRF, 48(sp) // 32 bit C frame info. -|.define SAVE_NRES, 44(sp) -|.define SAVE_CFRAME, 40(sp) -|.define SAVE_L, 36(sp) -|.define SAVE_PC, 32(sp) -|.define SAVE_MULTRES, 28(sp) -|.define UNUSED1, 24(sp) -|.define TMPD_LO, 20(sp) -|.define TMPD_HI, 16(sp) -|.define TONUM_LO, 12(sp) -|.define TONUM_HI, 8(sp) -|// Next frame lr: 4(sp) -|// Back chain for sp: 0(sp) <-- sp while in interpreter -| -|.define TMPD_BLO, 23(sp) -|.define TMPD, TMPD_HI -|.define TONUM_D, TONUM_HI -| -|.endif -| -|.macro save_, reg -|.if GPR64 -| std r..reg, SAVE_GPR_+(reg-14)*8(sp) -|.else -| stw r..reg, SAVE_GPR_+(reg-14)*4(sp) -|.endif -| stfd f..reg, SAVE_FPR_+(reg-14)*8(sp) -|.endmacro -|.macro rest_, reg -|.if GPR64 -| ld r..reg, SAVE_GPR_+(reg-14)*8(sp) -|.else -| lwz r..reg, SAVE_GPR_+(reg-14)*4(sp) -|.endif -| lfd f..reg, SAVE_FPR_+(reg-14)*8(sp) -|.endmacro -| -|.macro saveregs -|.if GPR64 and not FRAME32 -| stdu sp, -CFRAME_SPACE(sp) -|.else -| stwu sp, -CFRAME_SPACE(sp) -|.endif -| save_ 14; save_ 15; save_ 16 -| mflr r0 -| save_ 17; save_ 18; save_ 19; save_ 20; save_ 21; save_ 22 -|.if GPR64 and not FRAME32 -| std r0, SAVE_LR -|.else -| stw r0, SAVE_LR -|.endif -| save_ 23; save_ 24; save_ 25 -| mfcr r0 -| save_ 26; save_ 27; save_ 28; save_ 29; save_ 30; save_ 31 -|.if GPR64 -| std r0, SAVE_CR -|.else -| stw r0, SAVE_CR -|.endif -| .toc std TOCREG, SAVE_TOC -|.endmacro -| -|.macro restoreregs -|.if GPR64 and not FRAME32 -| ld r0, SAVE_LR -|.else -| lwz r0, SAVE_LR -|.endif -|.if GPR64 -| ld r12, SAVE_CR -|.else -| lwz r12, SAVE_CR -|.endif -| rest_ 14; rest_ 15; rest_ 16; rest_ 17; rest_ 18; rest_ 19 -| mtlr r0; -|.if PPE; mtocrf 0x20, r12; .else; mtcrf 0x38, r12; .endif -| rest_ 20; rest_ 21; rest_ 22; rest_ 23; rest_ 24; rest_ 25 -|.if PPE; mtocrf 0x10, r12; .endif -| rest_ 26; rest_ 27; rest_ 28; rest_ 29; rest_ 30; rest_ 31 -|.if PPE; mtocrf 0x08, r12; .endif -| addi sp, sp, CFRAME_SPACE -|.endmacro -| -|// Type definitions. Some of these are only used for documentation. -|.type L, lua_State, LREG -|.type GL, global_State -|.type TVALUE, TValue -|.type GCOBJ, GCobj -|.type STR, GCstr -|.type TAB, GCtab -|.type LFUNC, GCfuncL -|.type CFUNC, GCfuncC -|.type PROTO, GCproto -|.type UPVAL, GCupval -|.type NODE, Node -|.type NARGS8, int -|.type TRACE, GCtrace -|.type SBUF, SBuf -| -|//----------------------------------------------------------------------- -| -|// Trap for not-yet-implemented parts. -|.macro NYI; tw 4, sp, sp; .endmacro -| -|// int/FP conversions. -|.macro tonum_i, freg, reg -| xoris reg, reg, 0x8000 -| stw reg, TONUM_LO -| lfd freg, TONUM_D -| fsub freg, freg, TONUM -|.endmacro -| -|.macro tonum_u, freg, reg -| stw reg, TONUM_LO -| lfd freg, TONUM_D -| fsub freg, freg, TOBIT -|.endmacro -| -|.macro toint, reg, freg, tmpfreg -| fctiwz tmpfreg, freg -| stfd tmpfreg, TMPD -| lwz reg, TMPD_LO -|.endmacro -| -|.macro toint, reg, freg -| toint reg, freg, freg -|.endmacro -| -|//----------------------------------------------------------------------- -| -|// Access to frame relative to BASE. -|.define FRAME_PC, -8 -|.define FRAME_FUNC, -4 -| -|// Instruction decode. -|.macro decode_OP4, dst, ins; rlwinm dst, ins, 2, 22, 29; .endmacro -|.macro decode_OP8, dst, ins; rlwinm dst, ins, 3, 21, 28; .endmacro -|.macro decode_RA8, dst, ins; rlwinm dst, ins, 27, 21, 28; .endmacro -|.macro decode_RB8, dst, ins; rlwinm dst, ins, 11, 21, 28; .endmacro -|.macro decode_RC8, dst, ins; rlwinm dst, ins, 19, 21, 28; .endmacro -|.macro decode_RD8, dst, ins; rlwinm dst, ins, 19, 13, 28; .endmacro -| -|.macro decode_OP1, dst, ins; rlwinm dst, ins, 0, 24, 31; .endmacro -|.macro decode_RD4, dst, ins; rlwinm dst, ins, 18, 14, 29; .endmacro -| -|// Instruction fetch. -|.macro ins_NEXT1 -| lwz INS, 0(PC) -| addi PC, PC, 4 -|.endmacro -|// Instruction decode+dispatch. Note: optimized for e300! -|.macro ins_NEXT2 -| decode_OPP TMP1, INS -| lpx TMP0, DISPATCH, TMP1 -| mtctr TMP0 -| decode_RB8 RB, INS -| decode_RD8 RD, INS -| decode_RA8 RA, INS -| decode_RC8 RC, INS -| bctr -|.endmacro -|.macro ins_NEXT -| ins_NEXT1 -| ins_NEXT2 -|.endmacro -| -|// Instruction footer. -|.if 1 -| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. -| .define ins_next, ins_NEXT -| .define ins_next_, ins_NEXT -| .define ins_next1, ins_NEXT1 -| .define ins_next2, ins_NEXT2 -|.else -| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. -| // Affects only certain kinds of benchmarks (and only with -j off). -| .macro ins_next -| b ->ins_next -| .endmacro -| .macro ins_next1 -| .endmacro -| .macro ins_next2 -| b ->ins_next -| .endmacro -| .macro ins_next_ -| ->ins_next: -| ins_NEXT -| .endmacro -|.endif -| -|// Call decode and dispatch. -|.macro ins_callt -| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC -| lwz PC, LFUNC:RB->pc -| lwz INS, 0(PC) -| addi PC, PC, 4 -| decode_OPP TMP1, INS -| decode_RA8 RA, INS -| lpx TMP0, DISPATCH, TMP1 -| add RA, RA, BASE -| mtctr TMP0 -| bctr -|.endmacro -| -|.macro ins_call -| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC -| stw PC, FRAME_PC(BASE) -| ins_callt -|.endmacro -| -|//----------------------------------------------------------------------- -| -|// Macros to test operand types. -|.macro checknum, reg; cmplw reg, TISNUM; .endmacro -|.macro checknum, cr, reg; cmplw cr, reg, TISNUM; .endmacro -|.macro checkstr, reg; cmpwi reg, LJ_TSTR; .endmacro -|.macro checktab, reg; cmpwi reg, LJ_TTAB; .endmacro -|.macro checkfunc, reg; cmpwi reg, LJ_TFUNC; .endmacro -|.macro checknil, reg; cmpwi reg, LJ_TNIL; .endmacro -| -|.macro branch_RD -| srwi TMP0, RD, 1 -| addis PC, PC, -(BCBIAS_J*4 >> 16) -| add PC, PC, TMP0 -|.endmacro -| -|// Assumes DISPATCH is relative to GL. -#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) -#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) -| -#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) -| -|.macro hotcheck, delta, target -| rlwinm TMP1, PC, 31, 25, 30 -| addi TMP1, TMP1, GG_DISP2HOT -| lhzx TMP2, DISPATCH, TMP1 -| addic. TMP2, TMP2, -delta -| sthx TMP2, DISPATCH, TMP1 -| blt target -|.endmacro -| -|.macro hotloop -| hotcheck HOTCOUNT_LOOP, ->vm_hotloop -|.endmacro -| -|.macro hotcall -| hotcheck HOTCOUNT_CALL, ->vm_hotcall -|.endmacro -| -|// Set current VM state. Uses TMP0. -|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro -|.macro st_vmstate; stw TMP0, DISPATCH_GL(vmstate)(DISPATCH); .endmacro -| -|// Move table write barrier back. Overwrites mark and tmp. -|.macro barrierback, tab, mark, tmp -| lwz tmp, DISPATCH_GL(gc.grayagain)(DISPATCH) -| // Assumes LJ_GC_BLACK is 0x04. -| rlwinm mark, mark, 0, 30, 28 // black2gray(tab) -| stw tab, DISPATCH_GL(gc.grayagain)(DISPATCH) -| stb mark, tab->marked -| stw tmp, tab->gclist -|.endmacro -| -|//----------------------------------------------------------------------- - -/* Generate subroutines used by opcodes and other parts of the VM. */ -/* The .code_sub section should be last to help static branch prediction. */ -static void build_subroutines(BuildCtx *ctx) -{ - |.code_sub - | - |//----------------------------------------------------------------------- - |//-- Return handling ---------------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_returnp: - | // See vm_return. Also: TMP2 = previous base. - | andix. TMP0, PC, FRAME_P - | li TMP1, LJ_TTRUE - | beq ->cont_dispatch - | - | // Return from pcall or xpcall fast func. - | lwz PC, FRAME_PC(TMP2) // Fetch PC of previous frame. - | mr BASE, TMP2 // Restore caller base. - | // Prepending may overwrite the pcall frame, so do it at the end. - | stwu TMP1, FRAME_PC(RA) // Prepend true to results. - | - |->vm_returnc: - | addi RD, RD, 8 // RD = (nresults+1)*8. - | andix. TMP0, PC, FRAME_TYPE - | cmpwi cr1, RD, 0 - | li CRET1, LUA_YIELD - | beq cr1, ->vm_unwind_c_eh - | mr MULTRES, RD - | beq ->BC_RET_Z // Handle regular return to Lua. - | - |->vm_return: - | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return - | // TMP0 = PC & FRAME_TYPE - | cmpwi TMP0, FRAME_C - | rlwinm TMP2, PC, 0, 0, 28 - | li_vmstate C - | sub TMP2, BASE, TMP2 // TMP2 = previous base. - | bney ->vm_returnp - | - | addic. TMP1, RD, -8 - | stp TMP2, L->base - | lwz TMP2, SAVE_NRES - | subi BASE, BASE, 8 - | st_vmstate - | slwi TMP2, TMP2, 3 - | beq >2 - |1: - | addic. TMP1, TMP1, -8 - | lfd f0, 0(RA) - | addi RA, RA, 8 - | stfd f0, 0(BASE) - | addi BASE, BASE, 8 - | bney <1 - | - |2: - | cmpw TMP2, RD // More/less results wanted? - | bne >6 - |3: - | stp BASE, L->top // Store new top. - | - |->vm_leave_cp: - | lp TMP0, SAVE_CFRAME // Restore previous C frame. - | li CRET1, 0 // Ok return status for vm_pcall. - | stp TMP0, L->cframe - | - |->vm_leave_unw: - | restoreregs - | blr - | - |6: - | ble >7 // Less results wanted? - | // More results wanted. Check stack size and fill up results with nil. - | lwz TMP1, L->maxstack - | cmplw BASE, TMP1 - | bge >8 - | stw TISNIL, 0(BASE) - | addi RD, RD, 8 - | addi BASE, BASE, 8 - | b <2 - | - |7: // Less results wanted. - | subfic TMP3, TMP2, 0 // LUA_MULTRET+1 case? - | sub TMP0, RD, TMP2 - | subfe TMP1, TMP1, TMP1 // TMP1 = TMP2 == 0 ? 0 : -1 - | and TMP0, TMP0, TMP1 - | sub BASE, BASE, TMP0 // Either keep top or shrink it. - | b <3 - | - |8: // Corner case: need to grow stack for filling up results. - | // This can happen if: - | // - A C function grows the stack (a lot). - | // - The GC shrinks the stack in between. - | // - A return back from a lua_call() with (high) nresults adjustment. - | stp BASE, L->top // Save current top held in BASE (yes). - | mr SAVE0, RD - | srwi CARG2, TMP2, 3 - | mr CARG1, L - | bl extern lj_state_growstack // (lua_State *L, int n) - | lwz TMP2, SAVE_NRES - | mr RD, SAVE0 - | slwi TMP2, TMP2, 3 - | lp BASE, L->top // Need the (realloced) L->top in BASE. - | b <2 - | - |->vm_unwind_c: // Unwind C stack, return from vm_pcall. - | // (void *cframe, int errcode) - | mr sp, CARG1 - | mr CRET1, CARG2 - |->vm_unwind_c_eh: // Landing pad for external unwinder. - | lwz L, SAVE_L - | .toc ld TOCREG, SAVE_TOC - | li TMP0, ~LJ_VMST_C - | lwz GL:TMP1, L->glref - | stw TMP0, GL:TMP1->vmstate - | b ->vm_leave_unw - | - |->vm_unwind_ff: // Unwind C stack, return from ff pcall. - | // (void *cframe) - |.if GPR64 - | rldicr sp, CARG1, 0, 61 - |.else - | rlwinm sp, CARG1, 0, 0, 29 - |.endif - |->vm_unwind_ff_eh: // Landing pad for external unwinder. - | lwz L, SAVE_L - | .toc ld TOCREG, SAVE_TOC - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | lp BASE, L->base - | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | lwz DISPATCH, L->glref // Setup pointer to dispatch table. - | li ZERO, 0 - | stw TMP3, TMPD - | li TMP1, LJ_TFALSE - | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). - | li TISNIL, LJ_TNIL - | li_vmstate INTERP - | lfs TOBIT, TMPD - | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame. - | la RA, -8(BASE) // Results start at BASE-8. - | stw TMP3, TMPD - | addi DISPATCH, DISPATCH, GG_G2DISP - | stw TMP1, 0(RA) // Prepend false to error message. - | li RD, 16 // 2 results: false + error message. - | st_vmstate - | lfs TONUM, TMPD - | b ->vm_returnc - | - |//----------------------------------------------------------------------- - |//-- Grow stack for calls ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_growstack_c: // Grow stack for C function. - | li CARG2, LUA_MINSTACK - | b >2 - | - |->vm_growstack_l: // Grow stack for Lua function. - | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC - | add RC, BASE, RC - | sub RA, RA, BASE - | stp BASE, L->base - | addi PC, PC, 4 // Must point after first instruction. - | stp RC, L->top - | srwi CARG2, RA, 3 - |2: - | // L->base = new base, L->top = top - | stw PC, SAVE_PC - | mr CARG1, L - | bl extern lj_state_growstack // (lua_State *L, int n) - | lp BASE, L->base - | lp RC, L->top - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | sub RC, RC, BASE - | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC - | ins_callt // Just retry the call. - | - |//----------------------------------------------------------------------- - |//-- Entry points into the assembler VM --------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_resume: // Setup C frame and resume thread. - | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) - | saveregs - | mr L, CARG1 - | lwz DISPATCH, L->glref // Setup pointer to dispatch table. - | mr BASE, CARG2 - | lbz TMP1, L->status - | stw L, SAVE_L - | li PC, FRAME_CP - | addi TMP0, sp, CFRAME_RESUME - | addi DISPATCH, DISPATCH, GG_G2DISP - | stw CARG3, SAVE_NRES - | cmplwi TMP1, 0 - | stw CARG3, SAVE_ERRF - | stp CARG3, SAVE_CFRAME - | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. - | stp TMP0, L->cframe - | beq >3 - | - | // Resume after yield (like a return). - | stw L, DISPATCH_GL(cur_L)(DISPATCH) - | mr RA, BASE - | lp BASE, L->base - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | lp TMP1, L->top - | lwz PC, FRAME_PC(BASE) - | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | stb CARG3, L->status - | stw TMP3, TMPD - | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). - | lfs TOBIT, TMPD - | sub RD, TMP1, BASE - | stw TMP3, TMPD - | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) - | addi RD, RD, 8 - | stw TMP0, TONUM_HI - | li_vmstate INTERP - | li ZERO, 0 - | st_vmstate - | andix. TMP0, PC, FRAME_TYPE - | mr MULTRES, RD - | lfs TONUM, TMPD - | li TISNIL, LJ_TNIL - | beq ->BC_RET_Z - | b ->vm_return - | - |->vm_pcall: // Setup protected C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) - | saveregs - | li PC, FRAME_CP - | stw CARG4, SAVE_ERRF - | b >1 - | - |->vm_call: // Setup C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1) - | saveregs - | li PC, FRAME_C - | - |1: // Entry point for vm_pcall above (PC = ftype). - | lp TMP1, L:CARG1->cframe - | mr L, CARG1 - | stw CARG3, SAVE_NRES - | lwz DISPATCH, L->glref // Setup pointer to dispatch table. - | stw CARG1, SAVE_L - | mr BASE, CARG2 - | addi DISPATCH, DISPATCH, GG_G2DISP - | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. - | stp TMP1, SAVE_CFRAME - | stp sp, L->cframe // Add our C frame to cframe chain. - | - |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). - | stw L, DISPATCH_GL(cur_L)(DISPATCH) - | lp TMP2, L->base // TMP2 = old base (used in vmeta_call). - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | lp TMP1, L->top - | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | add PC, PC, BASE - | stw TMP3, TMPD - | li ZERO, 0 - | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). - | lfs TOBIT, TMPD - | sub PC, PC, TMP2 // PC = frame delta + frame type - | stw TMP3, TMPD - | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) - | sub NARGS8:RC, TMP1, BASE - | stw TMP0, TONUM_HI - | li_vmstate INTERP - | lfs TONUM, TMPD - | li TISNIL, LJ_TNIL - | st_vmstate - | - |->vm_call_dispatch: - | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC - | lwz TMP0, FRAME_PC(BASE) - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | checkfunc TMP0; bne ->vmeta_call - | - |->vm_call_dispatch_f: - | ins_call - | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC - | - |->vm_cpcall: // Setup protected C frame, call C. - | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) - | saveregs - | mr L, CARG1 - | lwz TMP0, L:CARG1->stack - | stw CARG1, SAVE_L - | lp TMP1, L->top - | lwz DISPATCH, L->glref // Setup pointer to dispatch table. - | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. - | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). - | lp TMP1, L->cframe - | addi DISPATCH, DISPATCH, GG_G2DISP - | .toc lp CARG4, 0(CARG4) - | li TMP2, 0 - | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. - | stw TMP2, SAVE_ERRF // No error function. - | stp TMP1, SAVE_CFRAME - | stp sp, L->cframe // Add our C frame to cframe chain. - | stw L, DISPATCH_GL(cur_L)(DISPATCH) - | mtctr CARG4 - | bctrl // (lua_State *L, lua_CFunction func, void *ud) - |.if PPE - | mr BASE, CRET1 - | cmpwi CRET1, 0 - |.else - | mr. BASE, CRET1 - |.endif - | li PC, FRAME_CP - | bne <3 // Else continue with the call. - | b ->vm_leave_cp // No base? Just remove C frame. - | - |//----------------------------------------------------------------------- - |//-- Metamethod handling ------------------------------------------------ - |//----------------------------------------------------------------------- - | - |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the - |// stack, so BASE doesn't need to be reloaded across these calls. - | - |//-- Continuation dispatch ---------------------------------------------- - | - |->cont_dispatch: - | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8 - | lwz TMP0, -12(BASE) // Continuation. - | mr RB, BASE - | mr BASE, TMP2 // Restore caller BASE. - | lwz LFUNC:TMP1, FRAME_FUNC(TMP2) - |.if FFI - | cmplwi TMP0, 1 - |.endif - | lwz PC, -16(RB) // Restore PC from [cont|PC]. - | subi TMP2, RD, 8 - | lwz TMP1, LFUNC:TMP1->pc - | stwx TISNIL, RA, TMP2 // Ensure one valid arg. - |.if FFI - | ble >1 - |.endif - | lwz KBASE, PC2PROTO(k)(TMP1) - | // BASE = base, RA = resultptr, RB = meta base - | mtctr TMP0 - | bctr // Jump to continuation. - | - |.if FFI - |1: - | beq ->cont_ffi_callback // cont = 1: return from FFI callback. - | // cont = 0: tailcall from C function. - | subi TMP1, RB, 16 - | sub RC, TMP1, BASE - | b ->vm_call_tail - |.endif - | - |->cont_cat: // RA = resultptr, RB = meta base - | lwz INS, -4(PC) - | subi CARG2, RB, 16 - | decode_RB8 SAVE0, INS - | lfd f0, 0(RA) - | add TMP1, BASE, SAVE0 - | stp BASE, L->base - | cmplw TMP1, CARG2 - | sub CARG3, CARG2, TMP1 - | decode_RA8 RA, INS - | stfd f0, 0(CARG2) - | bney ->BC_CAT_Z - | stfdx f0, BASE, RA - | b ->cont_nop - | - |//-- Table indexing metamethods ----------------------------------------- - | - |->vmeta_tgets1: - | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) - | li TMP0, LJ_TSTR - | decode_RB8 RB, INS - | stw STR:RC, 4(CARG3) - | add CARG2, BASE, RB - | stw TMP0, 0(CARG3) - | b >1 - | - |->vmeta_tgets: - | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) - | li TMP0, LJ_TTAB - | stw TAB:RB, 4(CARG2) - | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH) - | stw TMP0, 0(CARG2) - | li TMP1, LJ_TSTR - | stw STR:RC, 4(CARG3) - | stw TMP1, 0(CARG3) - | b >1 - | - |->vmeta_tgetb: // TMP0 = index - |.if not DUALNUM - | tonum_u f0, TMP0 - |.endif - | decode_RB8 RB, INS - | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) - | add CARG2, BASE, RB - |.if DUALNUM - | stw TISNUM, 0(CARG3) - | stw TMP0, 4(CARG3) - |.else - | stfd f0, 0(CARG3) - |.endif - | b >1 - | - |->vmeta_tgetv: - | decode_RB8 RB, INS - | decode_RC8 RC, INS - | add CARG2, BASE, RB - | add CARG3, BASE, RC - |1: - | stp BASE, L->base - | mr CARG1, L - | stw PC, SAVE_PC - | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) - | // Returns TValue * (finished) or NULL (metamethod). - | cmplwi CRET1, 0 - | beq >3 - | lfd f0, 0(CRET1) - | ins_next1 - | stfdx f0, BASE, RA - | ins_next2 - | - |3: // Call __index metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k - | subfic TMP1, BASE, FRAME_CONT - | lp BASE, L->top - | stw PC, -16(BASE) // [cont|PC] - | add PC, TMP1, BASE - | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. - | li NARGS8:RC, 16 // 2 args for func(t, k). - | b ->vm_call_dispatch_f - | - |->vmeta_tgetr: - | bl extern lj_tab_getinth // (GCtab *t, int32_t key) - | // Returns cTValue * or NULL. - | cmplwi CRET1, 0 - | beq >1 - | lfd f14, 0(CRET1) - | b ->BC_TGETR_Z - |1: - | stwx TISNIL, BASE, RA - | b ->cont_nop - | - |//----------------------------------------------------------------------- - | - |->vmeta_tsets1: - | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) - | li TMP0, LJ_TSTR - | decode_RB8 RB, INS - | stw STR:RC, 4(CARG3) - | add CARG2, BASE, RB - | stw TMP0, 0(CARG3) - | b >1 - | - |->vmeta_tsets: - | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) - | li TMP0, LJ_TTAB - | stw TAB:RB, 4(CARG2) - | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH) - | stw TMP0, 0(CARG2) - | li TMP1, LJ_TSTR - | stw STR:RC, 4(CARG3) - | stw TMP1, 0(CARG3) - | b >1 - | - |->vmeta_tsetb: // TMP0 = index - |.if not DUALNUM - | tonum_u f0, TMP0 - |.endif - | decode_RB8 RB, INS - | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) - | add CARG2, BASE, RB - |.if DUALNUM - | stw TISNUM, 0(CARG3) - | stw TMP0, 4(CARG3) - |.else - | stfd f0, 0(CARG3) - |.endif - | b >1 - | - |->vmeta_tsetv: - | decode_RB8 RB, INS - | decode_RC8 RC, INS - | add CARG2, BASE, RB - | add CARG3, BASE, RC - |1: - | stp BASE, L->base - | mr CARG1, L - | stw PC, SAVE_PC - | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) - | // Returns TValue * (finished) or NULL (metamethod). - | cmplwi CRET1, 0 - | lfdx f0, BASE, RA - | beq >3 - | // NOBARRIER: lj_meta_tset ensures the table is not black. - | ins_next1 - | stfd f0, 0(CRET1) - | ins_next2 - | - |3: // Call __newindex metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) - | subfic TMP1, BASE, FRAME_CONT - | lp BASE, L->top - | stw PC, -16(BASE) // [cont|PC] - | add PC, TMP1, BASE - | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. - | li NARGS8:RC, 24 // 3 args for func(t, k, v) - | stfd f0, 16(BASE) // Copy value to third argument. - | b ->vm_call_dispatch_f - | - |->vmeta_tsetr: - | stp BASE, L->base - | stw PC, SAVE_PC - | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) - | // Returns TValue *. - | stfd f14, 0(CRET1) - | b ->cont_nop - | - |//-- Comparison metamethods --------------------------------------------- - | - |->vmeta_comp: - | mr CARG1, L - | subi PC, PC, 4 - |.if DUALNUM - | mr CARG2, RA - |.else - | add CARG2, BASE, RA - |.endif - | stw PC, SAVE_PC - |.if DUALNUM - | mr CARG3, RD - |.else - | add CARG3, BASE, RD - |.endif - | stp BASE, L->base - | decode_OP1 CARG4, INS - | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) - | // Returns 0/1 or TValue * (metamethod). - |3: - | cmplwi CRET1, 1 - | bgt ->vmeta_binop - | subfic CRET1, CRET1, 0 - |4: - | lwz INS, 0(PC) - | addi PC, PC, 4 - | decode_RD4 TMP2, INS - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) - | and TMP2, TMP2, CRET1 - | add PC, PC, TMP2 - |->cont_nop: - | ins_next - | - |->cont_ra: // RA = resultptr - | lwz INS, -4(PC) - | lfd f0, 0(RA) - | decode_RA8 TMP1, INS - | stfdx f0, BASE, TMP1 - | b ->cont_nop - | - |->cont_condt: // RA = resultptr - | lwz TMP0, 0(RA) - | .gpr64 extsw TMP0, TMP0 - | subfic TMP0, TMP0, LJ_TTRUE // Branch if result is true. - | subfe CRET1, CRET1, CRET1 - | not CRET1, CRET1 - | b <4 - | - |->cont_condf: // RA = resultptr - | lwz TMP0, 0(RA) - | .gpr64 extsw TMP0, TMP0 - | subfic TMP0, TMP0, LJ_TTRUE // Branch if result is false. - | subfe CRET1, CRET1, CRET1 - | b <4 - | - |->vmeta_equal: - | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV. - | subi PC, PC, 4 - | stp BASE, L->base - | mr CARG1, L - | stw PC, SAVE_PC - | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) - | // Returns 0/1 or TValue * (metamethod). - | b <3 - | - |->vmeta_equal_cd: - |.if FFI - | mr CARG2, INS - | subi PC, PC, 4 - | stp BASE, L->base - | mr CARG1, L - | stw PC, SAVE_PC - | bl extern lj_meta_equal_cd // (lua_State *L, BCIns op) - | // Returns 0/1 or TValue * (metamethod). - | b <3 - |.endif - | - |->vmeta_istype: - | subi PC, PC, 4 - | stp BASE, L->base - | srwi CARG2, RA, 3 - | mr CARG1, L - | srwi CARG3, RD, 3 - | stw PC, SAVE_PC - | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) - | b ->cont_nop - | - |//-- Arithmetic metamethods --------------------------------------------- - | - |->vmeta_arith_nv: - | add CARG3, KBASE, RC - | add CARG4, BASE, RB - | b >1 - |->vmeta_arith_nv2: - |.if DUALNUM - | mr CARG3, RC - | mr CARG4, RB - | b >1 - |.endif - | - |->vmeta_unm: - | mr CARG3, RD - | mr CARG4, RD - | b >1 - | - |->vmeta_arith_vn: - | add CARG3, BASE, RB - | add CARG4, KBASE, RC - | b >1 - | - |->vmeta_arith_vv: - | add CARG3, BASE, RB - | add CARG4, BASE, RC - |.if DUALNUM - | b >1 - |.endif - |->vmeta_arith_vn2: - |->vmeta_arith_vv2: - |.if DUALNUM - | mr CARG3, RB - | mr CARG4, RC - |.endif - |1: - | add CARG2, BASE, RA - | stp BASE, L->base - | mr CARG1, L - | stw PC, SAVE_PC - | decode_OP1 CARG5, INS // Caveat: CARG5 overlaps INS. - | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) - | // Returns NULL (finished) or TValue * (metamethod). - | cmplwi CRET1, 0 - | beq ->cont_nop - | - | // Call metamethod for binary op. - |->vmeta_binop: - | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 - | sub TMP1, CRET1, BASE - | stw PC, -16(CRET1) // [cont|PC] - | mr TMP2, BASE - | addi PC, TMP1, FRAME_CONT - | mr BASE, CRET1 - | li NARGS8:RC, 16 // 2 args for func(o1, o2). - | b ->vm_call_dispatch - | - |->vmeta_len: -#if LJ_52 - | mr SAVE0, CARG1 -#endif - | mr CARG2, RD - | stp BASE, L->base - | mr CARG1, L - | stw PC, SAVE_PC - | bl extern lj_meta_len // (lua_State *L, TValue *o) - | // Returns NULL (retry) or TValue * (metamethod base). -#if LJ_52 - | cmplwi CRET1, 0 - | bne ->vmeta_binop // Binop call for compatibility. - | mr CARG1, SAVE0 - | b ->BC_LEN_Z -#else - | b ->vmeta_binop // Binop call for compatibility. -#endif - | - |//-- Call metamethod ---------------------------------------------------- - | - |->vmeta_call: // Resolve and call __call metamethod. - | // TMP2 = old base, BASE = new base, RC = nargs*8 - | mr CARG1, L - | stp TMP2, L->base // This is the callers base! - | subi CARG2, BASE, 8 - | stw PC, SAVE_PC - | add CARG3, BASE, RC - | mr SAVE0, NARGS8:RC - | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) - | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. - | addi NARGS8:RC, SAVE0, 8 // Got one more argument now. - | ins_call - | - |->vmeta_callt: // Resolve __call for BC_CALLT. - | // BASE = old base, RA = new base, RC = nargs*8 - | mr CARG1, L - | stp BASE, L->base - | subi CARG2, RA, 8 - | stw PC, SAVE_PC - | add CARG3, RA, RC - | mr SAVE0, NARGS8:RC - | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) - | lwz TMP1, FRAME_PC(BASE) - | addi NARGS8:RC, SAVE0, 8 // Got one more argument now. - | lwz LFUNC:RB, FRAME_FUNC(RA) // Guaranteed to be a function here. - | b ->BC_CALLT_Z - | - |//-- Argument coercion for 'for' statement ------------------------------ - | - |->vmeta_for: - | mr CARG1, L - | stp BASE, L->base - | mr CARG2, RA - | stw PC, SAVE_PC - | mr SAVE0, INS - | bl extern lj_meta_for // (lua_State *L, TValue *base) - |.if JIT - | decode_OP1 TMP0, SAVE0 - |.endif - | decode_RA8 RA, SAVE0 - |.if JIT - | cmpwi TMP0, BC_JFORI - |.endif - | decode_RD8 RD, SAVE0 - |.if JIT - | beqy =>BC_JFORI - |.endif - | b =>BC_FORI - | - |//----------------------------------------------------------------------- - |//-- Fast functions ----------------------------------------------------- - |//----------------------------------------------------------------------- - | - |.macro .ffunc, name - |->ff_ .. name: - |.endmacro - | - |.macro .ffunc_1, name - |->ff_ .. name: - | cmplwi NARGS8:RC, 8 - | lwz CARG3, 0(BASE) - | lwz CARG1, 4(BASE) - | blt ->fff_fallback - |.endmacro - | - |.macro .ffunc_2, name - |->ff_ .. name: - | cmplwi NARGS8:RC, 16 - | lwz CARG3, 0(BASE) - | lwz CARG4, 8(BASE) - | lwz CARG1, 4(BASE) - | lwz CARG2, 12(BASE) - | blt ->fff_fallback - |.endmacro - | - |.macro .ffunc_n, name - |->ff_ .. name: - | cmplwi NARGS8:RC, 8 - | lwz CARG3, 0(BASE) - | lfd FARG1, 0(BASE) - | blt ->fff_fallback - | checknum CARG3; bge ->fff_fallback - |.endmacro - | - |.macro .ffunc_nn, name - |->ff_ .. name: - | cmplwi NARGS8:RC, 16 - | lwz CARG3, 0(BASE) - | lfd FARG1, 0(BASE) - | lwz CARG4, 8(BASE) - | lfd FARG2, 8(BASE) - | blt ->fff_fallback - | checknum CARG3; bge ->fff_fallback - | checknum CARG4; bge ->fff_fallback - |.endmacro - | - |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1. - |.macro ffgccheck - | lwz TMP0, DISPATCH_GL(gc.total)(DISPATCH) - | lwz TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) - | cmplw TMP0, TMP1 - | bgel ->fff_gcstep - |.endmacro - | - |//-- Base library: checks ----------------------------------------------- - | - |.ffunc_1 assert - | li TMP1, LJ_TFALSE - | la RA, -8(BASE) - | cmplw cr1, CARG3, TMP1 - | lwz PC, FRAME_PC(BASE) - | bge cr1, ->fff_fallback - | stw CARG3, 0(RA) - | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8. - | stw CARG1, 4(RA) - | beq ->fff_res // Done if exactly 1 argument. - | li TMP1, 8 - | subi RC, RC, 8 - |1: - | cmplw TMP1, RC - | lfdx f0, BASE, TMP1 - | stfdx f0, RA, TMP1 - | addi TMP1, TMP1, 8 - | bney <1 - | b ->fff_res - | - |.ffunc type - | cmplwi NARGS8:RC, 8 - | lwz CARG1, 0(BASE) - | blt ->fff_fallback - | .gpr64 extsw CARG1, CARG1 - | subfc TMP0, TISNUM, CARG1 - | subfe TMP2, CARG1, CARG1 - | orc TMP1, TMP2, TMP0 - | addi TMP1, TMP1, ~LJ_TISNUM+1 - | slwi TMP1, TMP1, 3 - | la TMP2, CFUNC:RB->upvalue - | lfdx FARG1, TMP2, TMP1 - | b ->fff_resn - | - |//-- Base library: getters and setters --------------------------------- - | - |.ffunc_1 getmetatable - | checktab CARG3; bne >6 - |1: // Field metatable must be at same offset for GCtab and GCudata! - | lwz TAB:CARG1, TAB:CARG1->metatable - |2: - | li CARG3, LJ_TNIL - | cmplwi TAB:CARG1, 0 - | lwz STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) - | beq ->fff_restv - | lwz TMP0, TAB:CARG1->hmask - | li CARG3, LJ_TTAB // Use metatable as default result. - | lwz TMP1, STR:RC->hash - | lwz NODE:TMP2, TAB:CARG1->node - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask - | slwi TMP0, TMP1, 5 - | slwi TMP1, TMP1, 3 - | sub TMP1, TMP0, TMP1 - | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) - |3: // Rearranged logic, because we expect _not_ to find the key. - | lwz CARG4, NODE:TMP2->key - | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2) - | lwz CARG2, NODE:TMP2->val - | lwz TMP1, 4+offsetof(Node, val)(NODE:TMP2) - | checkstr CARG4; bne >4 - | cmpw TMP0, STR:RC; beq >5 - |4: - | lwz NODE:TMP2, NODE:TMP2->next - | cmplwi NODE:TMP2, 0 - | beq ->fff_restv // Not found, keep default result. - | b <3 - |5: - | checknil CARG2 - | beq ->fff_restv // Ditto for nil value. - | mr CARG3, CARG2 // Return value of mt.__metatable. - | mr CARG1, TMP1 - | b ->fff_restv - | - |6: - | cmpwi CARG3, LJ_TUDATA; beq <1 - | .gpr64 extsw CARG3, CARG3 - | subfc TMP0, TISNUM, CARG3 - | subfe TMP2, CARG3, CARG3 - | orc TMP1, TMP2, TMP0 - | addi TMP1, TMP1, ~LJ_TISNUM+1 - | slwi TMP1, TMP1, 2 - | la TMP2, DISPATCH_GL(gcroot[GCROOT_BASEMT])(DISPATCH) - | lwzx TAB:CARG1, TMP2, TMP1 - | b <2 - | - |.ffunc_2 setmetatable - | // Fast path: no mt for table yet and not clearing the mt. - | checktab CARG3; bne ->fff_fallback - | lwz TAB:TMP1, TAB:CARG1->metatable - | checktab CARG4; bne ->fff_fallback - | cmplwi TAB:TMP1, 0 - | lbz TMP3, TAB:CARG1->marked - | bne ->fff_fallback - | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) - | stw TAB:CARG2, TAB:CARG1->metatable - | beq ->fff_restv - | barrierback TAB:CARG1, TMP3, TMP0 - | b ->fff_restv - | - |.ffunc rawget - | cmplwi NARGS8:RC, 16 - | lwz CARG4, 0(BASE) - | lwz TAB:CARG2, 4(BASE) - | blt ->fff_fallback - | checktab CARG4; bne ->fff_fallback - | la CARG3, 8(BASE) - | mr CARG1, L - | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) - | // Returns cTValue *. - | lfd FARG1, 0(CRET1) - | b ->fff_resn - | - |//-- Base library: conversions ------------------------------------------ - | - |.ffunc tonumber - | // Only handles the number case inline (without a base argument). - | cmplwi NARGS8:RC, 8 - | lwz CARG1, 0(BASE) - | lfd FARG1, 0(BASE) - | bne ->fff_fallback // Exactly one argument. - | checknum CARG1; bgt ->fff_fallback - | b ->fff_resn - | - |.ffunc_1 tostring - | // Only handles the string or number case inline. - | checkstr CARG3 - | // A __tostring method in the string base metatable is ignored. - | beq ->fff_restv // String key? - | // Handle numbers inline, unless a number base metatable is present. - | lwz TMP0, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) - | checknum CARG3 - | cmplwi cr1, TMP0, 0 - | stp BASE, L->base // Add frame since C call can throw. - | crorc 4*cr0+eq, 4*cr0+gt, 4*cr1+eq - | stw PC, SAVE_PC // Redundant (but a defined value). - | beq ->fff_fallback - | ffgccheck - | mr CARG1, L - | mr CARG2, BASE - |.if DUALNUM - | bl extern lj_strfmt_number // (lua_State *L, cTValue *o) - |.else - | bl extern lj_strfmt_num // (lua_State *L, lua_Number *np) - |.endif - | // Returns GCstr *. - | li CARG3, LJ_TSTR - | b ->fff_restv - | - |//-- Base library: iterators ------------------------------------------- - | - |.ffunc next - | cmplwi NARGS8:RC, 8 - | lwz CARG1, 0(BASE) - | lwz TAB:CARG2, 4(BASE) - | blt ->fff_fallback - | stwx TISNIL, BASE, NARGS8:RC // Set missing 2nd arg to nil. - | checktab CARG1 - | lwz PC, FRAME_PC(BASE) - | bne ->fff_fallback - | stp BASE, L->base // Add frame since C call can throw. - | mr CARG1, L - | stp BASE, L->top // Dummy frame length is ok. - | la CARG3, 8(BASE) - | stw PC, SAVE_PC - | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) - | // Returns 0 at end of traversal. - | cmplwi CRET1, 0 - | li CARG3, LJ_TNIL - | beq ->fff_restv // End of traversal: return nil. - | lfd f0, 8(BASE) // Copy key and value to results. - | la RA, -8(BASE) - | lfd f1, 16(BASE) - | stfd f0, 0(RA) - | li RD, (2+1)*8 - | stfd f1, 8(RA) - | b ->fff_res - | - |.ffunc_1 pairs - | checktab CARG3 - | lwz PC, FRAME_PC(BASE) - | bne ->fff_fallback -#if LJ_52 - | lwz TAB:TMP2, TAB:CARG1->metatable - | lfd f0, CFUNC:RB->upvalue[0] - | cmplwi TAB:TMP2, 0 - | la RA, -8(BASE) - | bne ->fff_fallback -#else - | lfd f0, CFUNC:RB->upvalue[0] - | la RA, -8(BASE) -#endif - | stw TISNIL, 8(BASE) - | li RD, (3+1)*8 - | stfd f0, 0(RA) - | b ->fff_res - | - |.ffunc ipairs_aux - | cmplwi NARGS8:RC, 16 - | lwz CARG3, 0(BASE) - | lwz TAB:CARG1, 4(BASE) - | lwz CARG4, 8(BASE) - |.if DUALNUM - | lwz TMP2, 12(BASE) - |.else - | lfd FARG2, 8(BASE) - |.endif - | blt ->fff_fallback - | checktab CARG3 - | checknum cr1, CARG4 - | lwz PC, FRAME_PC(BASE) - |.if DUALNUM - | bne ->fff_fallback - | bne cr1, ->fff_fallback - |.else - | lus TMP0, 0x3ff0 - | stw ZERO, TMPD_LO - | bne ->fff_fallback - | stw TMP0, TMPD_HI - | bge cr1, ->fff_fallback - | lfd FARG1, TMPD - | toint TMP2, FARG2, f0 - |.endif - | lwz TMP0, TAB:CARG1->asize - | lwz TMP1, TAB:CARG1->array - |.if not DUALNUM - | fadd FARG2, FARG2, FARG1 - |.endif - | addi TMP2, TMP2, 1 - | la RA, -8(BASE) - | cmplw TMP0, TMP2 - |.if DUALNUM - | stw TISNUM, 0(RA) - | slwi TMP3, TMP2, 3 - | stw TMP2, 4(RA) - |.else - | slwi TMP3, TMP2, 3 - | stfd FARG2, 0(RA) - |.endif - | ble >2 // Not in array part? - | lwzx TMP2, TMP1, TMP3 - | lfdx f0, TMP1, TMP3 - |1: - | checknil TMP2 - | li RD, (0+1)*8 - | beq ->fff_res // End of iteration, return 0 results. - | li RD, (2+1)*8 - | stfd f0, 8(RA) - | b ->fff_res - |2: // Check for empty hash part first. Otherwise call C function. - | lwz TMP0, TAB:CARG1->hmask - | cmplwi TMP0, 0 - | li RD, (0+1)*8 - | beq ->fff_res - | mr CARG2, TMP2 - | bl extern lj_tab_getinth // (GCtab *t, int32_t key) - | // Returns cTValue * or NULL. - | cmplwi CRET1, 0 - | li RD, (0+1)*8 - | beq ->fff_res - | lwz TMP2, 0(CRET1) - | lfd f0, 0(CRET1) - | b <1 - | - |.ffunc_1 ipairs - | checktab CARG3 - | lwz PC, FRAME_PC(BASE) - | bne ->fff_fallback -#if LJ_52 - | lwz TAB:TMP2, TAB:CARG1->metatable - | lfd f0, CFUNC:RB->upvalue[0] - | cmplwi TAB:TMP2, 0 - | la RA, -8(BASE) - | bne ->fff_fallback -#else - | lfd f0, CFUNC:RB->upvalue[0] - | la RA, -8(BASE) -#endif - |.if DUALNUM - | stw TISNUM, 8(BASE) - |.else - | stw ZERO, 8(BASE) - |.endif - | stw ZERO, 12(BASE) - | li RD, (3+1)*8 - | stfd f0, 0(RA) - | b ->fff_res - | - |//-- Base library: catch errors ---------------------------------------- - | - |.ffunc pcall - | cmplwi NARGS8:RC, 8 - | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | blt ->fff_fallback - | mr TMP2, BASE - | la BASE, 8(BASE) - | // Remember active hook before pcall. - | rlwinm TMP3, TMP3, 32-HOOK_ACTIVE_SHIFT, 31, 31 - | subi NARGS8:RC, NARGS8:RC, 8 - | addi PC, TMP3, 8+FRAME_PCALL - | b ->vm_call_dispatch - | - |.ffunc xpcall - | cmplwi NARGS8:RC, 16 - | lwz CARG4, 8(BASE) - | lfd FARG2, 8(BASE) - | lfd FARG1, 0(BASE) - | blt ->fff_fallback - | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH) - | mr TMP2, BASE - | checkfunc CARG4; bne ->fff_fallback // Traceback must be a function. - | la BASE, 16(BASE) - | // Remember active hook before pcall. - | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31 - | stfd FARG2, 0(TMP2) // Swap function and traceback. - | subi NARGS8:RC, NARGS8:RC, 16 - | stfd FARG1, 8(TMP2) - | addi PC, TMP1, 16+FRAME_PCALL - | b ->vm_call_dispatch - | - |//-- Coroutine library -------------------------------------------------- - | - |.macro coroutine_resume_wrap, resume - |.if resume - |.ffunc_1 coroutine_resume - | cmpwi CARG3, LJ_TTHREAD; bne ->fff_fallback - |.else - |.ffunc coroutine_wrap_aux - | lwz L:CARG1, CFUNC:RB->upvalue[0].gcr - |.endif - | lbz TMP0, L:CARG1->status - | lp TMP1, L:CARG1->cframe - | lp CARG2, L:CARG1->top - | cmplwi cr0, TMP0, LUA_YIELD - | lp TMP2, L:CARG1->base - | cmplwi cr1, TMP1, 0 - | lwz TMP0, L:CARG1->maxstack - | cmplw cr7, CARG2, TMP2 - | lwz PC, FRAME_PC(BASE) - | crorc 4*cr6+lt, 4*cr0+gt, 4*cr1+eq // st>LUA_YIELD || cframe!=0 - | add TMP2, CARG2, NARGS8:RC - | crandc 4*cr6+gt, 4*cr7+eq, 4*cr0+eq // base==top && st!=LUA_YIELD - | cmplw cr1, TMP2, TMP0 - | cror 4*cr6+lt, 4*cr6+lt, 4*cr6+gt - | stw PC, SAVE_PC - | cror 4*cr6+lt, 4*cr6+lt, 4*cr1+gt // cond1 || cond2 || stackov - | stp BASE, L->base - | blt cr6, ->fff_fallback - |1: - |.if resume - | addi BASE, BASE, 8 // Keep resumed thread in stack for GC. - | subi NARGS8:RC, NARGS8:RC, 8 - | subi TMP2, TMP2, 8 - |.endif - | stp TMP2, L:CARG1->top - | li TMP1, 0 - | stp BASE, L->top - |2: // Move args to coroutine. - | cmpw TMP1, NARGS8:RC - | lfdx f0, BASE, TMP1 - | beq >3 - | stfdx f0, CARG2, TMP1 - | addi TMP1, TMP1, 8 - | b <2 - |3: - | li CARG3, 0 - | mr L:SAVE0, L:CARG1 - | li CARG4, 0 - | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0) - | // Returns thread status. - |4: - | lp TMP2, L:SAVE0->base - | cmplwi CRET1, LUA_YIELD - | lp TMP3, L:SAVE0->top - | li_vmstate INTERP - | lp BASE, L->base - | stw L, DISPATCH_GL(cur_L)(DISPATCH) - | st_vmstate - | bgt >8 - | sub RD, TMP3, TMP2 - | lwz TMP0, L->maxstack - | cmplwi RD, 0 - | add TMP1, BASE, RD - | beq >6 // No results? - | cmplw TMP1, TMP0 - | li TMP1, 0 - | bgt >9 // Need to grow stack? - | - | subi TMP3, RD, 8 - | stp TMP2, L:SAVE0->top // Clear coroutine stack. - |5: // Move results from coroutine. - | cmplw TMP1, TMP3 - | lfdx f0, TMP2, TMP1 - | stfdx f0, BASE, TMP1 - | addi TMP1, TMP1, 8 - | bne <5 - |6: - | andix. TMP0, PC, FRAME_TYPE - |.if resume - | li TMP1, LJ_TTRUE - | la RA, -8(BASE) - | stw TMP1, -8(BASE) // Prepend true to results. - | addi RD, RD, 16 - |.else - | mr RA, BASE - | addi RD, RD, 8 - |.endif - |7: - | stw PC, SAVE_PC - | mr MULTRES, RD - | beq ->BC_RET_Z - | b ->vm_return - | - |8: // Coroutine returned with error (at co->top-1). - |.if resume - | andix. TMP0, PC, FRAME_TYPE - | la TMP3, -8(TMP3) - | li TMP1, LJ_TFALSE - | lfd f0, 0(TMP3) - | stp TMP3, L:SAVE0->top // Remove error from coroutine stack. - | li RD, (2+1)*8 - | stw TMP1, -8(BASE) // Prepend false to results. - | la RA, -8(BASE) - | stfd f0, 0(BASE) // Copy error message. - | b <7 - |.else - | mr CARG1, L - | mr CARG2, L:SAVE0 - | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) - |.endif - | - |9: // Handle stack expansion on return from yield. - | mr CARG1, L - | srwi CARG2, RD, 3 - | bl extern lj_state_growstack // (lua_State *L, int n) - | li CRET1, 0 - | b <4 - |.endmacro - | - | coroutine_resume_wrap 1 // coroutine.resume - | coroutine_resume_wrap 0 // coroutine.wrap - | - |.ffunc coroutine_yield - | lp TMP0, L->cframe - | add TMP1, BASE, NARGS8:RC - | stp BASE, L->base - | andix. TMP0, TMP0, CFRAME_RESUME - | stp TMP1, L->top - | li CRET1, LUA_YIELD - | beq ->fff_fallback - | stp ZERO, L->cframe - | stb CRET1, L->status - | b ->vm_leave_unw - | - |//-- Math library ------------------------------------------------------- - | - |.ffunc_1 math_abs - | checknum CARG3 - |.if DUALNUM - | bne >2 - | srawi TMP1, CARG1, 31 - | xor TMP2, TMP1, CARG1 - |.if GPR64 - | lus TMP0, 0x8000 - | sub CARG1, TMP2, TMP1 - | cmplw CARG1, TMP0 - | beq >1 - |.else - | sub. CARG1, TMP2, TMP1 - | blt >1 - |.endif - |->fff_resi: - | lwz PC, FRAME_PC(BASE) - | la RA, -8(BASE) - | stw TISNUM, -8(BASE) - | stw CRET1, -4(BASE) - | b ->fff_res1 - |1: - | lus CARG3, 0x41e0 // 2^31. - | li CARG1, 0 - | b ->fff_restv - |2: - |.endif - | bge ->fff_fallback - | rlwinm CARG3, CARG3, 0, 1, 31 - | // Fallthrough. - | - |->fff_restv: - | // CARG3/CARG1 = TValue result. - | lwz PC, FRAME_PC(BASE) - | stw CARG3, -8(BASE) - | la RA, -8(BASE) - | stw CARG1, -4(BASE) - |->fff_res1: - | // RA = results, PC = return. - | li RD, (1+1)*8 - |->fff_res: - | // RA = results, RD = (nresults+1)*8, PC = return. - | andix. TMP0, PC, FRAME_TYPE - | mr MULTRES, RD - | bney ->vm_return - | lwz INS, -4(PC) - | decode_RB8 RB, INS - |5: - | cmplw RB, RD // More results expected? - | decode_RA8 TMP0, INS - | bgt >6 - | ins_next1 - | // Adjust BASE. KBASE is assumed to be set for the calling frame. - | sub BASE, RA, TMP0 - | ins_next2 - | - |6: // Fill up results with nil. - | subi TMP1, RD, 8 - | addi RD, RD, 8 - | stwx TISNIL, RA, TMP1 - | b <5 - | - |.macro math_extern, func - | .ffunc_n math_ .. func - | blex func - | b ->fff_resn - |.endmacro - | - |.macro math_extern2, func - | .ffunc_nn math_ .. func - | blex func - | b ->fff_resn - |.endmacro - | - |.macro math_round, func - | .ffunc_1 math_ .. func - | checknum CARG3; beqy ->fff_restv - | rlwinm TMP2, CARG3, 12, 21, 31 - | bge ->fff_fallback - | addic. TMP2, TMP2, -1023 // exp = exponent(x) - 1023 - | cmplwi cr1, TMP2, 31 // 0 <= exp < 31? - | subfic TMP0, TMP2, 31 - | blt >3 - | slwi TMP1, CARG3, 11 - | srwi TMP3, CARG1, 21 - | oris TMP1, TMP1, 0x8000 - | addi TMP2, TMP2, 1 - | or TMP1, TMP1, TMP3 - | slwi CARG2, CARG1, 11 - | bge cr1, >4 - | slw TMP3, TMP1, TMP2 - | srw RD, TMP1, TMP0 - | or TMP3, TMP3, CARG2 - | srawi TMP2, CARG3, 31 - |.if "func" == "floor" - | and TMP1, TMP3, TMP2 - | addic TMP0, TMP1, -1 - | subfe TMP1, TMP0, TMP1 - | add CARG1, RD, TMP1 - | xor CARG1, CARG1, TMP2 - | sub CARG1, CARG1, TMP2 - | b ->fff_resi - |.else - | andc TMP1, TMP3, TMP2 - | addic TMP0, TMP1, -1 - | subfe TMP1, TMP0, TMP1 - | add CARG1, RD, TMP1 - | cmpw CARG1, RD - | xor CARG1, CARG1, TMP2 - | sub CARG1, CARG1, TMP2 - | bge ->fff_resi - | // Overflow to 2^31. - | lus CARG3, 0x41e0 // 2^31. - | li CARG1, 0 - | b ->fff_restv - |.endif - |3: // |x| < 1 - | slwi TMP2, CARG3, 1 - | srawi TMP1, CARG3, 31 - | or TMP2, CARG1, TMP2 // ztest = (hi+hi) | lo - |.if "func" == "floor" - | and TMP1, TMP2, TMP1 // (ztest & sign) == 0 ? 0 : -1 - | subfic TMP2, TMP1, 0 - | subfe CARG1, CARG1, CARG1 - |.else - | andc TMP1, TMP2, TMP1 // (ztest & ~sign) == 0 ? 0 : 1 - | addic TMP2, TMP1, -1 - | subfe CARG1, TMP2, TMP1 - |.endif - | b ->fff_resi - |4: // exp >= 31. Check for -(2^31). - | xoris TMP1, TMP1, 0x8000 - | srawi TMP2, CARG3, 31 - |.if "func" == "floor" - | or TMP1, TMP1, CARG2 - |.endif - |.if PPE - | orc TMP1, TMP1, TMP2 - | cmpwi TMP1, 0 - |.else - | orc. TMP1, TMP1, TMP2 - |.endif - | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq - | lus CARG1, 0x8000 // -(2^31). - | beqy ->fff_resi - |5: - | lfd FARG1, 0(BASE) - | blex func - | b ->fff_resn - |.endmacro - | - |.if DUALNUM - | math_round floor - | math_round ceil - |.else - | // NYI: use internal implementation. - | math_extern floor - | math_extern ceil - |.endif - | - |.if SQRT - |.ffunc_n math_sqrt - | fsqrt FARG1, FARG1 - | b ->fff_resn - |.else - | math_extern sqrt - |.endif - | - |.ffunc math_log - | cmplwi NARGS8:RC, 8 - | lwz CARG3, 0(BASE) - | lfd FARG1, 0(BASE) - | bne ->fff_fallback // Need exactly 1 argument. - | checknum CARG3; bge ->fff_fallback - | blex log - | b ->fff_resn - | - | math_extern log10 - | math_extern exp - | math_extern sin - | math_extern cos - | math_extern tan - | math_extern asin - | math_extern acos - | math_extern atan - | math_extern sinh - | math_extern cosh - | math_extern tanh - | math_extern2 pow - | math_extern2 atan2 - | math_extern2 fmod - | - |.if DUALNUM - |.ffunc math_ldexp - | cmplwi NARGS8:RC, 16 - | lwz CARG3, 0(BASE) - | lfd FARG1, 0(BASE) - | lwz CARG4, 8(BASE) - |.if GPR64 - | lwz CARG2, 12(BASE) - |.else - | lwz CARG1, 12(BASE) - |.endif - | blt ->fff_fallback - | checknum CARG3; bge ->fff_fallback - | checknum CARG4; bne ->fff_fallback - |.else - |.ffunc_nn math_ldexp - |.if GPR64 - | toint CARG2, FARG2 - |.else - | toint CARG1, FARG2 - |.endif - |.endif - | blex ldexp - | b ->fff_resn - | - |.ffunc_n math_frexp - |.if GPR64 - | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) - |.else - | la CARG1, DISPATCH_GL(tmptv)(DISPATCH) - |.endif - | lwz PC, FRAME_PC(BASE) - | blex frexp - | lwz TMP1, DISPATCH_GL(tmptv)(DISPATCH) - | la RA, -8(BASE) - |.if not DUALNUM - | tonum_i FARG2, TMP1 - |.endif - | stfd FARG1, 0(RA) - | li RD, (2+1)*8 - |.if DUALNUM - | stw TISNUM, 8(RA) - | stw TMP1, 12(RA) - |.else - | stfd FARG2, 8(RA) - |.endif - | b ->fff_res - | - |.ffunc_n math_modf - |.if GPR64 - | la CARG2, -8(BASE) - |.else - | la CARG1, -8(BASE) - |.endif - | lwz PC, FRAME_PC(BASE) - | blex modf - | la RA, -8(BASE) - | stfd FARG1, 0(BASE) - | li RD, (2+1)*8 - | b ->fff_res - | - |.macro math_minmax, name, ismax - |.if DUALNUM - | .ffunc_1 name - | checknum CARG3 - | addi TMP1, BASE, 8 - | add TMP2, BASE, NARGS8:RC - | bne >4 - |1: // Handle integers. - | lwz CARG4, 0(TMP1) - | cmplw cr1, TMP1, TMP2 - | lwz CARG2, 4(TMP1) - | bge cr1, ->fff_resi - | checknum CARG4 - | xoris TMP0, CARG1, 0x8000 - | xoris TMP3, CARG2, 0x8000 - | bne >3 - | subfc TMP3, TMP3, TMP0 - | subfe TMP0, TMP0, TMP0 - |.if ismax - | andc TMP3, TMP3, TMP0 - |.else - | and TMP3, TMP3, TMP0 - |.endif - | add CARG1, TMP3, CARG2 - |.if GPR64 - | rldicl CARG1, CARG1, 0, 32 - |.endif - | addi TMP1, TMP1, 8 - | b <1 - |3: - | bge ->fff_fallback - | // Convert intermediate result to number and continue below. - | tonum_i FARG1, CARG1 - | lfd FARG2, 0(TMP1) - | b >6 - |4: - | lfd FARG1, 0(BASE) - | bge ->fff_fallback - |5: // Handle numbers. - | lwz CARG4, 0(TMP1) - | cmplw cr1, TMP1, TMP2 - | lfd FARG2, 0(TMP1) - | bge cr1, ->fff_resn - | checknum CARG4; bge >7 - |6: - | fsub f0, FARG1, FARG2 - | addi TMP1, TMP1, 8 - |.if ismax - | fsel FARG1, f0, FARG1, FARG2 - |.else - | fsel FARG1, f0, FARG2, FARG1 - |.endif - | b <5 - |7: // Convert integer to number and continue above. - | lwz CARG2, 4(TMP1) - | bne ->fff_fallback - | tonum_i FARG2, CARG2 - | b <6 - |.else - | .ffunc_n name - | li TMP1, 8 - |1: - | lwzx CARG2, BASE, TMP1 - | lfdx FARG2, BASE, TMP1 - | cmplw cr1, TMP1, NARGS8:RC - | checknum CARG2 - | bge cr1, ->fff_resn - | bge ->fff_fallback - | fsub f0, FARG1, FARG2 - | addi TMP1, TMP1, 8 - |.if ismax - | fsel FARG1, f0, FARG1, FARG2 - |.else - | fsel FARG1, f0, FARG2, FARG1 - |.endif - | b <1 - |.endif - |.endmacro - | - | math_minmax math_min, 0 - | math_minmax math_max, 1 - | - |//-- String library ----------------------------------------------------- - | - |.ffunc string_byte // Only handle the 1-arg case here. - | cmplwi NARGS8:RC, 8 - | lwz CARG3, 0(BASE) - | lwz STR:CARG1, 4(BASE) - | bne ->fff_fallback // Need exactly 1 argument. - | checkstr CARG3 - | bne ->fff_fallback - | lwz TMP0, STR:CARG1->len - |.if DUALNUM - | lbz CARG1, STR:CARG1[1] // Access is always ok (NUL at end). - | li RD, (0+1)*8 - | lwz PC, FRAME_PC(BASE) - | cmplwi TMP0, 0 - | la RA, -8(BASE) - | beqy ->fff_res - | b ->fff_resi - |.else - | lbz TMP1, STR:CARG1[1] // Access is always ok (NUL at end). - | addic TMP3, TMP0, -1 // RD = ((str->len != 0)+1)*8 - | subfe RD, TMP3, TMP0 - | stw TMP1, TONUM_LO // Inlined tonum_u f0, TMP1. - | addi RD, RD, 1 - | lfd f0, TONUM_D - | la RA, -8(BASE) - | lwz PC, FRAME_PC(BASE) - | fsub f0, f0, TOBIT - | slwi RD, RD, 3 - | stfd f0, 0(RA) - | b ->fff_res - |.endif - | - |.ffunc string_char // Only handle the 1-arg case here. - | ffgccheck - | cmplwi NARGS8:RC, 8 - | lwz CARG3, 0(BASE) - |.if DUALNUM - | lwz TMP0, 4(BASE) - | bne ->fff_fallback // Exactly 1 argument. - | checknum CARG3; bne ->fff_fallback - | la CARG2, 7(BASE) - |.else - | lfd FARG1, 0(BASE) - | bne ->fff_fallback // Exactly 1 argument. - | checknum CARG3; bge ->fff_fallback - | toint TMP0, FARG1 - | la CARG2, TMPD_BLO - |.endif - | li CARG3, 1 - | cmplwi TMP0, 255; bgt ->fff_fallback - |->fff_newstr: - | mr CARG1, L - | stp BASE, L->base - | stw PC, SAVE_PC - | bl extern lj_str_new // (lua_State *L, char *str, size_t l) - |->fff_resstr: - | // Returns GCstr *. - | lp BASE, L->base - | li CARG3, LJ_TSTR - | b ->fff_restv - | - |.ffunc string_sub - | ffgccheck - | cmplwi NARGS8:RC, 16 - | lwz CARG3, 16(BASE) - |.if not DUALNUM - | lfd f0, 16(BASE) - |.endif - | lwz TMP0, 0(BASE) - | lwz STR:CARG1, 4(BASE) - | blt ->fff_fallback - | lwz CARG2, 8(BASE) - |.if DUALNUM - | lwz TMP1, 12(BASE) - |.else - | lfd f1, 8(BASE) - |.endif - | li TMP2, -1 - | beq >1 - |.if DUALNUM - | checknum CARG3 - | lwz TMP2, 20(BASE) - | bne ->fff_fallback - |1: - | checknum CARG2; bne ->fff_fallback - |.else - | checknum CARG3; bge ->fff_fallback - | toint TMP2, f0 - |1: - | checknum CARG2; bge ->fff_fallback - |.endif - | checkstr TMP0; bne ->fff_fallback - |.if not DUALNUM - | toint TMP1, f1 - |.endif - | lwz TMP0, STR:CARG1->len - | cmplw TMP0, TMP2 // len < end? (unsigned compare) - | addi TMP3, TMP2, 1 - | blt >5 - |2: - | cmpwi TMP1, 0 // start <= 0? - | add TMP3, TMP1, TMP0 - | ble >7 - |3: - | sub CARG3, TMP2, TMP1 - | addi CARG2, STR:CARG1, #STR-1 - | srawi TMP0, CARG3, 31 - | addi CARG3, CARG3, 1 - | add CARG2, CARG2, TMP1 - | andc CARG3, CARG3, TMP0 - |.if GPR64 - | rldicl CARG2, CARG2, 0, 32 - | rldicl CARG3, CARG3, 0, 32 - |.endif - | b ->fff_newstr - | - |5: // Negative end or overflow. - | cmpw TMP0, TMP2 // len >= end? (signed compare) - | add TMP2, TMP0, TMP3 // Negative end: end = end+len+1. - | bge <2 - | mr TMP2, TMP0 // Overflow: end = len. - | b <2 - | - |7: // Negative start or underflow. - | .gpr64 extsw TMP1, TMP1 - | addic CARG3, TMP1, -1 - | subfe CARG3, CARG3, CARG3 - | srawi CARG2, TMP3, 31 // Note: modifies carry. - | andc TMP3, TMP3, CARG3 - | andc TMP1, TMP3, CARG2 - | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0) - | b <3 - | - |.macro ffstring_op, name - | .ffunc string_ .. name - | ffgccheck - | cmplwi NARGS8:RC, 8 - | lwz CARG3, 0(BASE) - | lwz STR:CARG2, 4(BASE) - | blt ->fff_fallback - | checkstr CARG3 - | la SBUF:CARG1, DISPATCH_GL(tmpbuf)(DISPATCH) - | bne ->fff_fallback - | lwz TMP0, SBUF:CARG1->b - | stw L, SBUF:CARG1->L - | stp BASE, L->base - | stw PC, SAVE_PC - | stw TMP0, SBUF:CARG1->p - | bl extern lj_buf_putstr_ .. name - | bl extern lj_buf_tostr - | b ->fff_resstr - |.endmacro - | - |ffstring_op reverse - |ffstring_op lower - |ffstring_op upper - | - |//-- Bit library -------------------------------------------------------- - | - |.macro .ffunc_bit, name - |.if DUALNUM - | .ffunc_1 bit_..name - | checknum CARG3; bnel ->fff_tobit_fb - |.else - | .ffunc_n bit_..name - | fadd FARG1, FARG1, TOBIT - | stfd FARG1, TMPD - | lwz CARG1, TMPD_LO - |.endif - |.endmacro - | - |.macro .ffunc_bit_op, name, ins - | .ffunc_bit name - | addi TMP1, BASE, 8 - | add TMP2, BASE, NARGS8:RC - |1: - | lwz CARG4, 0(TMP1) - | cmplw cr1, TMP1, TMP2 - |.if DUALNUM - | lwz CARG2, 4(TMP1) - |.else - | lfd FARG1, 0(TMP1) - |.endif - | bgey cr1, ->fff_resi - | checknum CARG4 - |.if DUALNUM - | bnel ->fff_bitop_fb - |.else - | fadd FARG1, FARG1, TOBIT - | bge ->fff_fallback - | stfd FARG1, TMPD - | lwz CARG2, TMPD_LO - |.endif - | ins CARG1, CARG1, CARG2 - | addi TMP1, TMP1, 8 - | b <1 - |.endmacro - | - |.ffunc_bit_op band, and - |.ffunc_bit_op bor, or - |.ffunc_bit_op bxor, xor - | - |.ffunc_bit bswap - | rotlwi TMP0, CARG1, 8 - | rlwimi TMP0, CARG1, 24, 0, 7 - | rlwimi TMP0, CARG1, 24, 16, 23 - | mr CRET1, TMP0 - | b ->fff_resi - | - |.ffunc_bit bnot - | not CRET1, CARG1 - | b ->fff_resi - | - |.macro .ffunc_bit_sh, name, ins, shmod - |.if DUALNUM - | .ffunc_2 bit_..name - | checknum CARG3; bnel ->fff_tobit_fb - | // Note: no inline conversion from number for 2nd argument! - | checknum CARG4; bne ->fff_fallback - |.else - | .ffunc_nn bit_..name - | fadd FARG1, FARG1, TOBIT - | fadd FARG2, FARG2, TOBIT - | stfd FARG1, TMPD - | lwz CARG1, TMPD_LO - | stfd FARG2, TMPD - | lwz CARG2, TMPD_LO - |.endif - |.if shmod == 1 - | rlwinm CARG2, CARG2, 0, 27, 31 - |.elif shmod == 2 - | neg CARG2, CARG2 - |.endif - | ins CRET1, CARG1, CARG2 - | b ->fff_resi - |.endmacro - | - |.ffunc_bit_sh lshift, slw, 1 - |.ffunc_bit_sh rshift, srw, 1 - |.ffunc_bit_sh arshift, sraw, 1 - |.ffunc_bit_sh rol, rotlw, 0 - |.ffunc_bit_sh ror, rotlw, 2 - | - |.ffunc_bit tobit - |.if DUALNUM - | b ->fff_resi - |.else - |->fff_resi: - | tonum_i FARG1, CRET1 - |.endif - |->fff_resn: - | lwz PC, FRAME_PC(BASE) - | la RA, -8(BASE) - | stfd FARG1, -8(BASE) - | b ->fff_res1 - | - |// Fallback FP number to bit conversion. - |->fff_tobit_fb: - |.if DUALNUM - | lfd FARG1, 0(BASE) - | bgt ->fff_fallback - | fadd FARG1, FARG1, TOBIT - | stfd FARG1, TMPD - | lwz CARG1, TMPD_LO - | blr - |.endif - |->fff_bitop_fb: - |.if DUALNUM - | lfd FARG1, 0(TMP1) - | bgt ->fff_fallback - | fadd FARG1, FARG1, TOBIT - | stfd FARG1, TMPD - | lwz CARG2, TMPD_LO - | blr - |.endif - | - |//----------------------------------------------------------------------- - | - |->fff_fallback: // Call fast function fallback handler. - | // BASE = new base, RB = CFUNC, RC = nargs*8 - | lp TMP3, CFUNC:RB->f - | add TMP1, BASE, NARGS8:RC - | lwz PC, FRAME_PC(BASE) // Fallback may overwrite PC. - | addi TMP0, TMP1, 8*LUA_MINSTACK - | lwz TMP2, L->maxstack - | stw PC, SAVE_PC // Redundant (but a defined value). - | .toc lp TMP3, 0(TMP3) - | cmplw TMP0, TMP2 - | stp BASE, L->base - | stp TMP1, L->top - | mr CARG1, L - | bgt >5 // Need to grow stack. - | mtctr TMP3 - | bctrl // (lua_State *L) - | // Either throws an error, or recovers and returns -1, 0 or nresults+1. - | lp BASE, L->base - | cmpwi CRET1, 0 - | slwi RD, CRET1, 3 - | la RA, -8(BASE) - | bgt ->fff_res // Returned nresults+1? - |1: // Returned 0 or -1: retry fast path. - | lp TMP0, L->top - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | sub NARGS8:RC, TMP0, BASE - | bne ->vm_call_tail // Returned -1? - | ins_callt // Returned 0: retry fast path. - | - |// Reconstruct previous base for vmeta_call during tailcall. - |->vm_call_tail: - | andix. TMP0, PC, FRAME_TYPE - | rlwinm TMP1, PC, 0, 0, 28 - | bne >3 - | lwz INS, -4(PC) - | decode_RA8 TMP1, INS - | addi TMP1, TMP1, 8 - |3: - | sub TMP2, BASE, TMP1 - | b ->vm_call_dispatch // Resolve again for tailcall. - | - |5: // Grow stack for fallback handler. - | li CARG2, LUA_MINSTACK - | bl extern lj_state_growstack // (lua_State *L, int n) - | lp BASE, L->base - | cmpw TMP0, TMP0 // Set 4*cr0+eq to force retry. - | b <1 - | - |->fff_gcstep: // Call GC step function. - | // BASE = new base, RC = nargs*8 - | mflr SAVE0 - | stp BASE, L->base - | add TMP0, BASE, NARGS8:RC - | stw PC, SAVE_PC // Redundant (but a defined value). - | stp TMP0, L->top - | mr CARG1, L - | bl extern lj_gc_step // (lua_State *L) - | lp BASE, L->base - | mtlr SAVE0 - | lp TMP0, L->top - | sub NARGS8:RC, TMP0, BASE - | lwz CFUNC:RB, FRAME_FUNC(BASE) - | blr - | - |//----------------------------------------------------------------------- - |//-- Special dispatch targets ------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_record: // Dispatch target for recording phase. - |.if JIT - | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | andix. TMP0, TMP3, HOOK_VMEVENT // No recording while in vmevent. - | bne >5 - | // Decrement the hookcount for consistency, but always do the call. - | lwz TMP2, DISPATCH_GL(hookcount)(DISPATCH) - | andix. TMP0, TMP3, HOOK_ACTIVE - | bne >1 - | subi TMP2, TMP2, 1 - | andi. TMP0, TMP3, LUA_MASKLINE|LUA_MASKCOUNT - | beqy >1 - | stw TMP2, DISPATCH_GL(hookcount)(DISPATCH) - | b >1 - |.endif - | - |->vm_rethook: // Dispatch target for return hooks. - | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | andix. TMP0, TMP3, HOOK_ACTIVE // Hook already active? - | beq >1 - |5: // Re-dispatch to static ins. - | addi TMP1, TMP1, GG_DISP2STATIC // Assumes decode_OPP TMP1, INS. - | lpx TMP0, DISPATCH, TMP1 - | mtctr TMP0 - | bctr - | - |->vm_inshook: // Dispatch target for instr/line hooks. - | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | lwz TMP2, DISPATCH_GL(hookcount)(DISPATCH) - | andix. TMP0, TMP3, HOOK_ACTIVE // Hook already active? - | rlwinm TMP0, TMP3, 31-LUA_HOOKLINE, 31, 0 - | bne <5 - | - | cmpwi cr1, TMP0, 0 - | addic. TMP2, TMP2, -1 - | beq cr1, <5 - | stw TMP2, DISPATCH_GL(hookcount)(DISPATCH) - | beq >1 - | bge cr1, <5 - |1: - | mr CARG1, L - | stw MULTRES, SAVE_MULTRES - | mr CARG2, PC - | stp BASE, L->base - | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. - | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) - |3: - | lp BASE, L->base - |4: // Re-dispatch to static ins. - | lwz INS, -4(PC) - | decode_OPP TMP1, INS - | decode_RB8 RB, INS - | addi TMP1, TMP1, GG_DISP2STATIC - | decode_RD8 RD, INS - | lpx TMP0, DISPATCH, TMP1 - | decode_RA8 RA, INS - | decode_RC8 RC, INS - | mtctr TMP0 - | bctr - | - |->cont_hook: // Continue from hook yield. - | addi PC, PC, 4 - | lwz MULTRES, -20(RB) // Restore MULTRES for *M ins. - | b <4 - | - |->vm_hotloop: // Hot loop counter underflow. - |.if JIT - | lwz LFUNC:TMP1, FRAME_FUNC(BASE) - | addi CARG1, DISPATCH, GG_DISP2J - | stw PC, SAVE_PC - | lwz TMP1, LFUNC:TMP1->pc - | mr CARG2, PC - | stw L, DISPATCH_J(L)(DISPATCH) - | lbz TMP1, PC2PROTO(framesize)(TMP1) - | stp BASE, L->base - | slwi TMP1, TMP1, 3 - | add TMP1, BASE, TMP1 - | stp TMP1, L->top - | bl extern lj_trace_hot // (jit_State *J, const BCIns *pc) - | b <3 - |.endif - | - |->vm_callhook: // Dispatch target for call hooks. - | mr CARG2, PC - |.if JIT - | b >1 - |.endif - | - |->vm_hotcall: // Hot call counter underflow. - |.if JIT - | ori CARG2, PC, 1 - |1: - |.endif - | add TMP0, BASE, RC - | stw PC, SAVE_PC - | mr CARG1, L - | stp BASE, L->base - | sub RA, RA, BASE - | stp TMP0, L->top - | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc) - | // Returns ASMFunction. - | lp BASE, L->base - | lp TMP0, L->top - | stw ZERO, SAVE_PC // Invalidate for subsequent line hook. - | sub NARGS8:RC, TMP0, BASE - | add RA, BASE, RA - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | lwz INS, -4(PC) - | mtctr CRET1 - | bctr - | - |->cont_stitch: // Trace stitching. - |.if JIT - | // RA = resultptr, RB = meta base - | lwz INS, -4(PC) - | lwz TRACE:TMP2, -20(RB) // Save previous trace. - | addic. TMP1, MULTRES, -8 - | decode_RA8 RC, INS // Call base. - | beq >2 - |1: // Move results down. - | lfd f0, 0(RA) - | addic. TMP1, TMP1, -8 - | addi RA, RA, 8 - | stfdx f0, BASE, RC - | addi RC, RC, 8 - | bne <1 - |2: - | decode_RA8 RA, INS - | decode_RB8 RB, INS - | add RA, RA, RB - |3: - | cmplw RA, RC - | bgt >9 // More results wanted? - | - | lhz TMP3, TRACE:TMP2->traceno - | lhz RD, TRACE:TMP2->link - | cmpw RD, TMP3 - | cmpwi cr1, RD, 0 - | beq ->cont_nop // Blacklisted. - | slwi RD, RD, 3 - | bne cr1, =>BC_JLOOP // Jump to stitched trace. - | - | // Stitch a new trace to the previous trace. - | stw TMP3, DISPATCH_J(exitno)(DISPATCH) - | stp L, DISPATCH_J(L)(DISPATCH) - | stp BASE, L->base - | addi CARG1, DISPATCH, GG_DISP2J - | mr CARG2, PC - | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) - | lp BASE, L->base - | b ->cont_nop - | - |9: - | stwx TISNIL, BASE, RC - | addi RC, RC, 8 - | b <3 - |.endif - | - |->vm_profhook: // Dispatch target for profiler hook. -#if LJ_HASPROFILE - | mr CARG1, L - | stw MULTRES, SAVE_MULTRES - | mr CARG2, PC - | stp BASE, L->base - | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) - | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. - | lp BASE, L->base - | subi PC, PC, 4 - | b ->cont_nop -#endif - | - |//----------------------------------------------------------------------- - |//-- Trace exit handler ------------------------------------------------- - |//----------------------------------------------------------------------- - | - |.macro savex_, a, b, c, d - | stfd f..a, 16+a*8(sp) - | stfd f..b, 16+b*8(sp) - | stfd f..c, 16+c*8(sp) - | stfd f..d, 16+d*8(sp) - |.endmacro - | - |->vm_exit_handler: - |.if JIT - | addi sp, sp, -(16+32*8+32*4) - | stmw r2, 16+32*8+2*4(sp) - | addi DISPATCH, JGL, -GG_DISP2G-32768 - | li CARG2, ~LJ_VMST_EXIT - | lwz CARG1, 16+32*8+32*4(sp) // Get stack chain. - | stw CARG2, DISPATCH_GL(vmstate)(DISPATCH) - | savex_ 0,1,2,3 - | stw CARG1, 0(sp) // Store extended stack chain. - | clrso TMP1 - | savex_ 4,5,6,7 - | addi CARG2, sp, 16+32*8+32*4 // Recompute original value of sp. - | savex_ 8,9,10,11 - | stw CARG2, 16+32*8+1*4(sp) // Store sp in RID_SP. - | savex_ 12,13,14,15 - | mflr CARG3 - | li TMP1, 0 - | savex_ 16,17,18,19 - | stw TMP1, 16+32*8+0*4(sp) // Clear RID_TMP. - | savex_ 20,21,22,23 - | lhz CARG4, 2(CARG3) // Load trace number. - | savex_ 24,25,26,27 - | lwz L, DISPATCH_GL(cur_L)(DISPATCH) - | savex_ 28,29,30,31 - | sub CARG3, TMP0, CARG3 // Compute exit number. - | lp BASE, DISPATCH_GL(jit_base)(DISPATCH) - | srwi CARG3, CARG3, 2 - | stp L, DISPATCH_J(L)(DISPATCH) - | subi CARG3, CARG3, 2 - | stp BASE, L->base - | stw CARG4, DISPATCH_J(parent)(DISPATCH) - | stw TMP1, DISPATCH_GL(jit_base)(DISPATCH) - | addi CARG1, DISPATCH, GG_DISP2J - | stw CARG3, DISPATCH_J(exitno)(DISPATCH) - | addi CARG2, sp, 16 - | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) - | // Returns MULTRES (unscaled) or negated error code. - | lp TMP1, L->cframe - | lwz TMP2, 0(sp) - | lp BASE, L->base - |.if GPR64 - | rldicr sp, TMP1, 0, 61 - |.else - | rlwinm sp, TMP1, 0, 0, 29 - |.endif - | lwz PC, SAVE_PC // Get SAVE_PC. - | stw TMP2, 0(sp) - | stw L, SAVE_L // Set SAVE_L (on-trace resume/yield). - | b >1 - |.endif - |->vm_exit_interp: - |.if JIT - | // CARG1 = MULTRES or negated error code, BASE, PC and JGL set. - | lwz L, SAVE_L - | addi DISPATCH, JGL, -GG_DISP2G-32768 - | stp BASE, L->base - |1: - | cmpwi CARG1, 0 - | blt >9 // Check for error from exit. - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | slwi MULTRES, CARG1, 3 - | li TMP2, 0 - | stw MULTRES, SAVE_MULTRES - | lwz TMP1, LFUNC:RB->pc - | stw TMP2, DISPATCH_GL(jit_base)(DISPATCH) - | lwz KBASE, PC2PROTO(k)(TMP1) - | // Setup type comparison constants. - | li TISNUM, LJ_TISNUM - | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | stw TMP3, TMPD - | li ZERO, 0 - | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). - | lfs TOBIT, TMPD - | stw TMP3, TMPD - | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) - | li TISNIL, LJ_TNIL - | stw TMP0, TONUM_HI - | lfs TONUM, TMPD - | // Modified copy of ins_next which handles function header dispatch, too. - | lwz INS, 0(PC) - | addi PC, PC, 4 - | // Assumes TISNIL == ~LJ_VMST_INTERP == -1. - | stw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) - | decode_OPP TMP1, INS - | decode_RA8 RA, INS - | lpx TMP0, DISPATCH, TMP1 - | mtctr TMP0 - | cmplwi TMP1, BC_FUNCF*4 // Function header? - | bge >2 - | decode_RB8 RB, INS - | decode_RD8 RD, INS - | decode_RC8 RC, INS - | bctr - |2: - | cmplwi TMP1, (BC_FUNCC+2)*4 // Fast function? - | blt >3 - | // Check frame below fast function. - | lwz TMP1, FRAME_PC(BASE) - | andix. TMP0, TMP1, FRAME_TYPE - | bney >3 // Trace stitching continuation? - | // Otherwise set KBASE for Lua function below fast function. - | lwz TMP2, -4(TMP1) - | decode_RA8 TMP0, TMP2 - | sub TMP1, BASE, TMP0 - | lwz LFUNC:TMP2, -12(TMP1) - | lwz TMP1, LFUNC:TMP2->pc - | lwz KBASE, PC2PROTO(k)(TMP1) - |3: - | subi RC, MULTRES, 8 - | add RA, RA, BASE - | bctr - | - |9: // Rethrow error from the right C frame. - | neg CARG2, CARG1 - | mr CARG1, L - | bl extern lj_err_throw // (lua_State *L, int errcode) - |.endif - | - |//----------------------------------------------------------------------- - |//-- Math helper functions ---------------------------------------------- - |//----------------------------------------------------------------------- - | - |// NYI: Use internal implementations of floor, ceil, trunc. - | - |->vm_modi: - | divwo. TMP0, CARG1, CARG2 - | bso >1 - |.if GPR64 - | xor CARG3, CARG1, CARG2 - | cmpwi CARG3, 0 - |.else - | xor. CARG3, CARG1, CARG2 - |.endif - | mullw TMP0, TMP0, CARG2 - | sub CARG1, CARG1, TMP0 - | bgelr - | cmpwi CARG1, 0; beqlr - | add CARG1, CARG1, CARG2 - | blr - |1: - | cmpwi CARG2, 0 - | li CARG1, 0 - | beqlr - | clrso TMP0 // Clear SO for -2147483648 % -1 and return 0. - | blr - | - |//----------------------------------------------------------------------- - |//-- Miscellaneous functions -------------------------------------------- - |//----------------------------------------------------------------------- - | - |// void lj_vm_cachesync(void *start, void *end) - |// Flush D-Cache and invalidate I-Cache. Assumes 32 byte cache line size. - |// This is a good lower bound, except for very ancient PPC models. - |->vm_cachesync: - |.if JIT or FFI - | // Compute start of first cache line and number of cache lines. - | rlwinm CARG1, CARG1, 0, 0, 26 - | sub CARG2, CARG2, CARG1 - | addi CARG2, CARG2, 31 - | rlwinm. CARG2, CARG2, 27, 5, 31 - | beqlr - | mtctr CARG2 - | mr CARG3, CARG1 - |1: // Flush D-Cache. - | dcbst r0, CARG1 - | addi CARG1, CARG1, 32 - | bdnz <1 - | sync - | mtctr CARG2 - |1: // Invalidate I-Cache. - | icbi r0, CARG3 - | addi CARG3, CARG3, 32 - | bdnz <1 - | isync - | blr - |.endif - | - |//----------------------------------------------------------------------- - |//-- FFI helper functions ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |// Handler for callback functions. Callback slot number in r11, g in r12. - |->vm_ffi_callback: - |.if FFI - |.type CTSTATE, CTState, PC - | saveregs - | lwz CTSTATE, GL:r12->ctype_state - | addi DISPATCH, r12, GG_G2DISP - | stw r11, CTSTATE->cb.slot - | stw r3, CTSTATE->cb.gpr[0] - | stfd f1, CTSTATE->cb.fpr[0] - | stw r4, CTSTATE->cb.gpr[1] - | stfd f2, CTSTATE->cb.fpr[1] - | stw r5, CTSTATE->cb.gpr[2] - | stfd f3, CTSTATE->cb.fpr[2] - | stw r6, CTSTATE->cb.gpr[3] - | stfd f4, CTSTATE->cb.fpr[3] - | stw r7, CTSTATE->cb.gpr[4] - | stfd f5, CTSTATE->cb.fpr[4] - | stw r8, CTSTATE->cb.gpr[5] - | stfd f6, CTSTATE->cb.fpr[5] - | stw r9, CTSTATE->cb.gpr[6] - | stfd f7, CTSTATE->cb.fpr[6] - | stw r10, CTSTATE->cb.gpr[7] - | stfd f8, CTSTATE->cb.fpr[7] - | addi TMP0, sp, CFRAME_SPACE+8 - | stw TMP0, CTSTATE->cb.stack - | mr CARG1, CTSTATE - | stw CTSTATE, SAVE_PC // Any value outside of bytecode is ok. - | mr CARG2, sp - | bl extern lj_ccallback_enter // (CTState *cts, void *cf) - | // Returns lua_State *. - | lp BASE, L:CRET1->base - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | lp RC, L:CRET1->top - | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | li ZERO, 0 - | mr L, CRET1 - | stw TMP3, TMPD - | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). - | stw TMP0, TONUM_HI - | li TISNIL, LJ_TNIL - | li_vmstate INTERP - | lfs TOBIT, TMPD - | stw TMP3, TMPD - | sub RC, RC, BASE - | st_vmstate - | lfs TONUM, TMPD - | ins_callt - |.endif - | - |->cont_ffi_callback: // Return from FFI callback. - |.if FFI - | lwz CTSTATE, DISPATCH_GL(ctype_state)(DISPATCH) - | stp BASE, L->base - | stp RB, L->top - | stp L, CTSTATE->L - | mr CARG1, CTSTATE - | mr CARG2, RA - | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) - | lwz CRET1, CTSTATE->cb.gpr[0] - | lfd FARG1, CTSTATE->cb.fpr[0] - | lwz CRET2, CTSTATE->cb.gpr[1] - | b ->vm_leave_unw - |.endif - | - |->vm_ffi_call: // Call C function via FFI. - | // Caveat: needs special frame unwinding, see below. - |.if FFI - | .type CCSTATE, CCallState, CARG1 - | lwz TMP1, CCSTATE->spadj - | mflr TMP0 - | lbz CARG2, CCSTATE->nsp - | lbz CARG3, CCSTATE->nfpr - | neg TMP1, TMP1 - | stw TMP0, 4(sp) - | cmpwi cr1, CARG3, 0 - | mr TMP2, sp - | addic. CARG2, CARG2, -1 - | stwux sp, sp, TMP1 - | crnot 4*cr1+eq, 4*cr1+eq // For vararg calls. - | stw r14, -4(TMP2) - | stw CCSTATE, -8(TMP2) - | mr r14, TMP2 - | la TMP1, CCSTATE->stack - | slwi CARG2, CARG2, 2 - | blty >2 - | la TMP2, 8(sp) - |1: - | lwzx TMP0, TMP1, CARG2 - | stwx TMP0, TMP2, CARG2 - | addic. CARG2, CARG2, -4 - | bge <1 - |2: - | bney cr1, >3 - | lfd f1, CCSTATE->fpr[0] - | lfd f2, CCSTATE->fpr[1] - | lfd f3, CCSTATE->fpr[2] - | lfd f4, CCSTATE->fpr[3] - | lfd f5, CCSTATE->fpr[4] - | lfd f6, CCSTATE->fpr[5] - | lfd f7, CCSTATE->fpr[6] - | lfd f8, CCSTATE->fpr[7] - |3: - | lp TMP0, CCSTATE->func - | lwz CARG2, CCSTATE->gpr[1] - | lwz CARG3, CCSTATE->gpr[2] - | lwz CARG4, CCSTATE->gpr[3] - | lwz CARG5, CCSTATE->gpr[4] - | mtctr TMP0 - | lwz r8, CCSTATE->gpr[5] - | lwz r9, CCSTATE->gpr[6] - | lwz r10, CCSTATE->gpr[7] - | lwz CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. - | bctrl - | lwz CCSTATE:TMP1, -8(r14) - | lwz TMP2, -4(r14) - | lwz TMP0, 4(r14) - | stw CARG1, CCSTATE:TMP1->gpr[0] - | stfd FARG1, CCSTATE:TMP1->fpr[0] - | stw CARG2, CCSTATE:TMP1->gpr[1] - | mtlr TMP0 - | stw CARG3, CCSTATE:TMP1->gpr[2] - | mr sp, r14 - | stw CARG4, CCSTATE:TMP1->gpr[3] - | mr r14, TMP2 - | blr - |.endif - |// Note: vm_ffi_call must be the last function in this object file! - | - |//----------------------------------------------------------------------- -} - -/* Generate the code for a single instruction. */ -static void build_ins(BuildCtx *ctx, BCOp op, int defop) -{ - int vk = 0; - |=>defop: - - switch (op) { - - /* -- Comparison ops ---------------------------------------------------- */ - - /* Remember: all ops branch for a true comparison, fall through otherwise. */ - - case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: - | // RA = src1*8, RD = src2*8, JMP with RD = target - |.if DUALNUM - | lwzux TMP0, RA, BASE - | addi PC, PC, 4 - | lwz CARG2, 4(RA) - | lwzux TMP1, RD, BASE - | lwz TMP2, -4(PC) - | checknum cr0, TMP0 - | lwz CARG3, 4(RD) - | decode_RD4 TMP2, TMP2 - | checknum cr1, TMP1 - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) - | bne cr0, >7 - | bne cr1, >8 - | cmpw CARG2, CARG3 - if (op == BC_ISLT) { - | bge >2 - } else if (op == BC_ISGE) { - | blt >2 - } else if (op == BC_ISLE) { - | bgt >2 - } else { - | ble >2 - } - |1: - | add PC, PC, TMP2 - |2: - | ins_next - | - |7: // RA is not an integer. - | bgt cr0, ->vmeta_comp - | // RA is a number. - | lfd f0, 0(RA) - | bgt cr1, ->vmeta_comp - | blt cr1, >4 - | // RA is a number, RD is an integer. - | tonum_i f1, CARG3 - | b >5 - | - |8: // RA is an integer, RD is not an integer. - | bgt cr1, ->vmeta_comp - | // RA is an integer, RD is a number. - | tonum_i f0, CARG2 - |4: - | lfd f1, 0(RD) - |5: - | fcmpu cr0, f0, f1 - if (op == BC_ISLT) { - | bge <2 - } else if (op == BC_ISGE) { - | blt <2 - } else if (op == BC_ISLE) { - | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq - | bge <2 - } else { - | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq - | blt <2 - } - | b <1 - |.else - | lwzx TMP0, BASE, RA - | addi PC, PC, 4 - | lfdx f0, BASE, RA - | lwzx TMP1, BASE, RD - | checknum cr0, TMP0 - | lwz TMP2, -4(PC) - | lfdx f1, BASE, RD - | checknum cr1, TMP1 - | decode_RD4 TMP2, TMP2 - | bge cr0, ->vmeta_comp - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) - | bge cr1, ->vmeta_comp - | fcmpu cr0, f0, f1 - if (op == BC_ISLT) { - | bge >1 - } else if (op == BC_ISGE) { - | blt >1 - } else if (op == BC_ISLE) { - | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq - | bge >1 - } else { - | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq - | blt >1 - } - | add PC, PC, TMP2 - |1: - | ins_next - |.endif - break; - - case BC_ISEQV: case BC_ISNEV: - vk = op == BC_ISEQV; - | // RA = src1*8, RD = src2*8, JMP with RD = target - |.if DUALNUM - | lwzux TMP0, RA, BASE - | addi PC, PC, 4 - | lwz CARG2, 4(RA) - | lwzux TMP1, RD, BASE - | checknum cr0, TMP0 - | lwz TMP2, -4(PC) - | checknum cr1, TMP1 - | decode_RD4 TMP2, TMP2 - | lwz CARG3, 4(RD) - | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) - if (vk) { - | ble cr7, ->BC_ISEQN_Z - } else { - | ble cr7, ->BC_ISNEN_Z - } - |.else - | lwzux TMP0, RA, BASE - | lwz TMP2, 0(PC) - | lfd f0, 0(RA) - | addi PC, PC, 4 - | lwzux TMP1, RD, BASE - | checknum cr0, TMP0 - | decode_RD4 TMP2, TMP2 - | lfd f1, 0(RD) - | checknum cr1, TMP1 - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) - | bge cr0, >5 - | bge cr1, >5 - | fcmpu cr0, f0, f1 - if (vk) { - | bne >1 - | add PC, PC, TMP2 - } else { - | beq >1 - | add PC, PC, TMP2 - } - |1: - | ins_next - |.endif - |5: // Either or both types are not numbers. - |.if not DUALNUM - | lwz CARG2, 4(RA) - | lwz CARG3, 4(RD) - |.endif - |.if FFI - | cmpwi cr7, TMP0, LJ_TCDATA - | cmpwi cr5, TMP1, LJ_TCDATA - |.endif - | not TMP3, TMP0 - | cmplw TMP0, TMP1 - | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive? - |.if FFI - | cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq - |.endif - | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata? - |.if FFI - | beq cr7, ->vmeta_equal_cd - |.endif - | cmplw cr5, CARG2, CARG3 - | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive. - | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type. - | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv. - | mr SAVE0, PC - | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2. - | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2. - if (vk) { - | bne cr0, >6 - | add PC, PC, TMP2 - |6: - } else { - | beq cr0, >6 - | add PC, PC, TMP2 - |6: - } - |.if DUALNUM - | bge cr0, >2 // Done if 1 or 2. - |1: - | ins_next - |2: - |.else - | blt cr0, <1 // Done if 1 or 2. - |.endif - | blt cr6, <1 // Done if not tab/ud. - | - | // Different tables or userdatas. Need to check __eq metamethod. - | // Field metatable must be at same offset for GCtab and GCudata! - | lwz TAB:TMP2, TAB:CARG2->metatable - | li CARG4, 1-vk // ne = 0 or 1. - | cmplwi TAB:TMP2, 0 - | beq <1 // No metatable? - | lbz TMP2, TAB:TMP2->nomm - | andix. TMP2, TMP2, 1<vmeta_equal // Handle __eq metamethod. - break; - - case BC_ISEQS: case BC_ISNES: - vk = op == BC_ISEQS; - | // RA = src*8, RD = str_const*8 (~), JMP with RD = target - | lwzux TMP0, RA, BASE - | srwi RD, RD, 1 - | lwz STR:TMP3, 4(RA) - | lwz TMP2, 0(PC) - | subfic RD, RD, -4 - | addi PC, PC, 4 - |.if FFI - | cmpwi TMP0, LJ_TCDATA - |.endif - | lwzx STR:TMP1, KBASE, RD // KBASE-4-str_const*4 - | .gpr64 extsw TMP0, TMP0 - | subfic TMP0, TMP0, LJ_TSTR - |.if FFI - | beq ->vmeta_equal_cd - |.endif - | sub TMP1, STR:TMP1, STR:TMP3 - | or TMP0, TMP0, TMP1 - | decode_RD4 TMP2, TMP2 - | subfic TMP0, TMP0, 0 - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) - | subfe TMP1, TMP1, TMP1 - if (vk) { - | andc TMP2, TMP2, TMP1 - } else { - | and TMP2, TMP2, TMP1 - } - | add PC, PC, TMP2 - | ins_next - break; - - case BC_ISEQN: case BC_ISNEN: - vk = op == BC_ISEQN; - | // RA = src*8, RD = num_const*8, JMP with RD = target - |.if DUALNUM - | lwzux TMP0, RA, BASE - | addi PC, PC, 4 - | lwz CARG2, 4(RA) - | lwzux TMP1, RD, KBASE - | checknum cr0, TMP0 - | lwz TMP2, -4(PC) - | checknum cr1, TMP1 - | decode_RD4 TMP2, TMP2 - | lwz CARG3, 4(RD) - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) - if (vk) { - |->BC_ISEQN_Z: - } else { - |->BC_ISNEN_Z: - } - | bne cr0, >7 - | bne cr1, >8 - | cmpw CARG2, CARG3 - |4: - |.else - if (vk) { - |->BC_ISEQN_Z: // Dummy label. - } else { - |->BC_ISNEN_Z: // Dummy label. - } - | lwzx TMP0, BASE, RA - | addi PC, PC, 4 - | lfdx f0, BASE, RA - | lwz TMP2, -4(PC) - | lfdx f1, KBASE, RD - | decode_RD4 TMP2, TMP2 - | checknum TMP0 - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) - | bge >3 - | fcmpu cr0, f0, f1 - |.endif - if (vk) { - | bne >1 - | add PC, PC, TMP2 - |1: - |.if not FFI - |3: - |.endif - } else { - | beq >2 - |1: - |.if not FFI - |3: - |.endif - | add PC, PC, TMP2 - |2: - } - | ins_next - |.if FFI - |3: - | cmpwi TMP0, LJ_TCDATA - | beq ->vmeta_equal_cd - | b <1 - |.endif - |.if DUALNUM - |7: // RA is not an integer. - | bge cr0, <3 - | // RA is a number. - | lfd f0, 0(RA) - | blt cr1, >1 - | // RA is a number, RD is an integer. - | tonum_i f1, CARG3 - | b >2 - | - |8: // RA is an integer, RD is a number. - | tonum_i f0, CARG2 - |1: - | lfd f1, 0(RD) - |2: - | fcmpu cr0, f0, f1 - | b <4 - |.endif - break; - - case BC_ISEQP: case BC_ISNEP: - vk = op == BC_ISEQP; - | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target - | lwzx TMP0, BASE, RA - | srwi TMP1, RD, 3 - | lwz TMP2, 0(PC) - | not TMP1, TMP1 - | addi PC, PC, 4 - |.if FFI - | cmpwi TMP0, LJ_TCDATA - |.endif - | sub TMP0, TMP0, TMP1 - |.if FFI - | beq ->vmeta_equal_cd - |.endif - | decode_RD4 TMP2, TMP2 - | .gpr64 extsw TMP0, TMP0 - | addic TMP0, TMP0, -1 - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) - | subfe TMP1, TMP1, TMP1 - if (vk) { - | and TMP2, TMP2, TMP1 - } else { - | andc TMP2, TMP2, TMP1 - } - | add PC, PC, TMP2 - | ins_next - break; - - /* -- Unary test and copy ops ------------------------------------------- */ - - case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: - | // RA = dst*8 or unused, RD = src*8, JMP with RD = target - | lwzx TMP0, BASE, RD - | lwz INS, 0(PC) - | addi PC, PC, 4 - if (op == BC_IST || op == BC_ISF) { - | .gpr64 extsw TMP0, TMP0 - | subfic TMP0, TMP0, LJ_TTRUE - | decode_RD4 TMP2, INS - | subfe TMP1, TMP1, TMP1 - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) - if (op == BC_IST) { - | andc TMP2, TMP2, TMP1 - } else { - | and TMP2, TMP2, TMP1 - } - | add PC, PC, TMP2 - } else { - | li TMP1, LJ_TFALSE - | lfdx f0, BASE, RD - | cmplw TMP0, TMP1 - if (op == BC_ISTC) { - | bge >1 - } else { - | blt >1 - } - | addis PC, PC, -(BCBIAS_J*4 >> 16) - | decode_RD4 TMP2, INS - | stfdx f0, BASE, RA - | add PC, PC, TMP2 - |1: - } - | ins_next - break; - - case BC_ISTYPE: - | // RA = src*8, RD = -type*8 - | lwzx TMP0, BASE, RA - | srwi TMP1, RD, 3 - | ins_next1 - |.if not PPE and not GPR64 - | add. TMP0, TMP0, TMP1 - |.else - | neg TMP1, TMP1 - | cmpw TMP0, TMP1 - |.endif - | bne ->vmeta_istype - | ins_next2 - break; - case BC_ISNUM: - | // RA = src*8, RD = -(TISNUM-1)*8 - | lwzx TMP0, BASE, RA - | ins_next1 - | checknum TMP0 - | bge ->vmeta_istype - | ins_next2 - break; - - /* -- Unary ops --------------------------------------------------------- */ - - case BC_MOV: - | // RA = dst*8, RD = src*8 - | ins_next1 - | lfdx f0, BASE, RD - | stfdx f0, BASE, RA - | ins_next2 - break; - case BC_NOT: - | // RA = dst*8, RD = src*8 - | ins_next1 - | lwzx TMP0, BASE, RD - | .gpr64 extsw TMP0, TMP0 - | subfic TMP1, TMP0, LJ_TTRUE - | adde TMP0, TMP0, TMP1 - | stwx TMP0, BASE, RA - | ins_next2 - break; - case BC_UNM: - | // RA = dst*8, RD = src*8 - | lwzux TMP1, RD, BASE - | lwz TMP0, 4(RD) - | checknum TMP1 - |.if DUALNUM - | bne >5 - |.if GPR64 - | lus TMP2, 0x8000 - | neg TMP0, TMP0 - | cmplw TMP0, TMP2 - | beq >4 - |.else - | nego. TMP0, TMP0 - | bso >4 - |1: - |.endif - | ins_next1 - | stwux TISNUM, RA, BASE - | stw TMP0, 4(RA) - |3: - | ins_next2 - |4: - |.if not GPR64 - | // Potential overflow. - | checkov TMP1, <1 // Ignore unrelated overflow. - |.endif - | lus TMP1, 0x41e0 // 2^31. - | li TMP0, 0 - | b >7 - |.endif - |5: - | bge ->vmeta_unm - | xoris TMP1, TMP1, 0x8000 - |7: - | ins_next1 - | stwux TMP1, RA, BASE - | stw TMP0, 4(RA) - |.if DUALNUM - | b <3 - |.else - | ins_next2 - |.endif - break; - case BC_LEN: - | // RA = dst*8, RD = src*8 - | lwzux TMP0, RD, BASE - | lwz CARG1, 4(RD) - | checkstr TMP0; bne >2 - | lwz CRET1, STR:CARG1->len - |1: - |.if DUALNUM - | ins_next1 - | stwux TISNUM, RA, BASE - | stw CRET1, 4(RA) - |.else - | tonum_u f0, CRET1 // Result is a non-negative integer. - | ins_next1 - | stfdx f0, BASE, RA - |.endif - | ins_next2 - |2: - | checktab TMP0; bne ->vmeta_len -#if LJ_52 - | lwz TAB:TMP2, TAB:CARG1->metatable - | cmplwi TAB:TMP2, 0 - | bne >9 - |3: -#endif - |->BC_LEN_Z: - | bl extern lj_tab_len // (GCtab *t) - | // Returns uint32_t (but less than 2^31). - | b <1 -#if LJ_52 - |9: - | lbz TMP0, TAB:TMP2->nomm - | andix. TMP0, TMP0, 1<vmeta_len -#endif - break; - - /* -- Binary ops -------------------------------------------------------- */ - - |.macro ins_arithpre - | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 - ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); - ||switch (vk) { - ||case 0: - | lwzx TMP1, BASE, RB - | .if DUALNUM - | lwzx TMP2, KBASE, RC - | .endif - | lfdx f14, BASE, RB - | lfdx f15, KBASE, RC - | .if DUALNUM - | checknum cr0, TMP1 - | checknum cr1, TMP2 - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | bge ->vmeta_arith_vn - | .else - | checknum TMP1; bge ->vmeta_arith_vn - | .endif - || break; - ||case 1: - | lwzx TMP1, BASE, RB - | .if DUALNUM - | lwzx TMP2, KBASE, RC - | .endif - | lfdx f15, BASE, RB - | lfdx f14, KBASE, RC - | .if DUALNUM - | checknum cr0, TMP1 - | checknum cr1, TMP2 - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | bge ->vmeta_arith_nv - | .else - | checknum TMP1; bge ->vmeta_arith_nv - | .endif - || break; - ||default: - | lwzx TMP1, BASE, RB - | lwzx TMP2, BASE, RC - | lfdx f14, BASE, RB - | lfdx f15, BASE, RC - | checknum cr0, TMP1 - | checknum cr1, TMP2 - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | bge ->vmeta_arith_vv - || break; - ||} - |.endmacro - | - |.macro ins_arithfallback, ins - ||switch (vk) { - ||case 0: - | ins ->vmeta_arith_vn2 - || break; - ||case 1: - | ins ->vmeta_arith_nv2 - || break; - ||default: - | ins ->vmeta_arith_vv2 - || break; - ||} - |.endmacro - | - |.macro intmod, a, b, c - | bl ->vm_modi - |.endmacro - | - |.macro fpmod, a, b, c - |->BC_MODVN_Z: - | fdiv FARG1, b, c - | // NYI: Use internal implementation of floor. - | blex floor // floor(b/c) - | fmul a, FARG1, c - | fsub a, b, a // b - floor(b/c)*c - |.endmacro - | - |.macro ins_arithfp, fpins - | ins_arithpre - |.if "fpins" == "fpmod_" - | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. - |.else - | fpins f0, f14, f15 - | ins_next1 - | stfdx f0, BASE, RA - | ins_next2 - |.endif - |.endmacro - | - |.macro ins_arithdn, intins, fpins - | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 - ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); - ||switch (vk) { - ||case 0: - | lwzux TMP1, RB, BASE - | lwzux TMP2, RC, KBASE - | lwz CARG1, 4(RB) - | checknum cr0, TMP1 - | lwz CARG2, 4(RC) - || break; - ||case 1: - | lwzux TMP1, RB, BASE - | lwzux TMP2, RC, KBASE - | lwz CARG2, 4(RB) - | checknum cr0, TMP1 - | lwz CARG1, 4(RC) - || break; - ||default: - | lwzux TMP1, RB, BASE - | lwzux TMP2, RC, BASE - | lwz CARG1, 4(RB) - | checknum cr0, TMP1 - | lwz CARG2, 4(RC) - || break; - ||} - | checknum cr1, TMP2 - | bne >5 - | bne cr1, >5 - | intins CARG1, CARG1, CARG2 - | bso >4 - |1: - | ins_next1 - | stwux TISNUM, RA, BASE - | stw CARG1, 4(RA) - |2: - | ins_next2 - |4: // Overflow. - | checkov TMP0, <1 // Ignore unrelated overflow. - | ins_arithfallback b - |5: // FP variant. - ||if (vk == 1) { - | lfd f15, 0(RB) - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | lfd f14, 0(RC) - ||} else { - | lfd f14, 0(RB) - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | lfd f15, 0(RC) - ||} - | ins_arithfallback bge - |.if "fpins" == "fpmod_" - | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. - |.else - | fpins f0, f14, f15 - | ins_next1 - | stfdx f0, BASE, RA - | b <2 - |.endif - |.endmacro - | - |.macro ins_arith, intins, fpins - |.if DUALNUM - | ins_arithdn intins, fpins - |.else - | ins_arithfp fpins - |.endif - |.endmacro - - case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: - |.if GPR64 - |.macro addo32., y, a, b - | // Need to check overflow for (a<<32) + (b<<32). - | rldicr TMP0, a, 32, 31 - | rldicr TMP3, b, 32, 31 - | addo. TMP0, TMP0, TMP3 - | add y, a, b - |.endmacro - | ins_arith addo32., fadd - |.else - | ins_arith addo., fadd - |.endif - break; - case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: - |.if GPR64 - |.macro subo32., y, a, b - | // Need to check overflow for (a<<32) - (b<<32). - | rldicr TMP0, a, 32, 31 - | rldicr TMP3, b, 32, 31 - | subo. TMP0, TMP0, TMP3 - | sub y, a, b - |.endmacro - | ins_arith subo32., fsub - |.else - | ins_arith subo., fsub - |.endif - break; - case BC_MULVN: case BC_MULNV: case BC_MULVV: - | ins_arith mullwo., fmul - break; - case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: - | ins_arithfp fdiv - break; - case BC_MODVN: - | ins_arith intmod, fpmod - break; - case BC_MODNV: case BC_MODVV: - | ins_arith intmod, fpmod_ - break; - case BC_POW: - | // NYI: (partial) integer arithmetic. - | lwzx TMP1, BASE, RB - | lfdx FARG1, BASE, RB - | lwzx TMP2, BASE, RC - | lfdx FARG2, BASE, RC - | checknum cr0, TMP1 - | checknum cr1, TMP2 - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | bge ->vmeta_arith_vv - | blex pow - | ins_next1 - | stfdx FARG1, BASE, RA - | ins_next2 - break; - - case BC_CAT: - | // RA = dst*8, RB = src_start*8, RC = src_end*8 - | sub CARG3, RC, RB - | stp BASE, L->base - | add CARG2, BASE, RC - | mr SAVE0, RB - |->BC_CAT_Z: - | stw PC, SAVE_PC - | mr CARG1, L - | srwi CARG3, CARG3, 3 - | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left) - | // Returns NULL (finished) or TValue * (metamethod). - | cmplwi CRET1, 0 - | lp BASE, L->base - | bne ->vmeta_binop - | ins_next1 - | lfdx f0, BASE, SAVE0 // Copy result from RB to RA. - | stfdx f0, BASE, RA - | ins_next2 - break; - - /* -- Constant ops ------------------------------------------------------ */ - - case BC_KSTR: - | // RA = dst*8, RD = str_const*8 (~) - | srwi TMP1, RD, 1 - | subfic TMP1, TMP1, -4 - | ins_next1 - | lwzx TMP0, KBASE, TMP1 // KBASE-4-str_const*4 - | li TMP2, LJ_TSTR - | stwux TMP2, RA, BASE - | stw TMP0, 4(RA) - | ins_next2 - break; - case BC_KCDATA: - |.if FFI - | // RA = dst*8, RD = cdata_const*8 (~) - | srwi TMP1, RD, 1 - | subfic TMP1, TMP1, -4 - | ins_next1 - | lwzx TMP0, KBASE, TMP1 // KBASE-4-cdata_const*4 - | li TMP2, LJ_TCDATA - | stwux TMP2, RA, BASE - | stw TMP0, 4(RA) - | ins_next2 - |.endif - break; - case BC_KSHORT: - | // RA = dst*8, RD = int16_literal*8 - |.if DUALNUM - | slwi RD, RD, 13 - | srawi RD, RD, 16 - | ins_next1 - | stwux TISNUM, RA, BASE - | stw RD, 4(RA) - | ins_next2 - |.else - | // The soft-float approach is faster. - | slwi RD, RD, 13 - | srawi TMP1, RD, 31 - | xor TMP2, TMP1, RD - | sub TMP2, TMP2, TMP1 // TMP2 = abs(x) - | cntlzw TMP3, TMP2 - | subfic TMP1, TMP3, 0x40d // TMP1 = exponent-1 - | slw TMP2, TMP2, TMP3 // TMP2 = left aligned mantissa - | subfic TMP3, RD, 0 - | slwi TMP1, TMP1, 20 - | rlwimi RD, TMP2, 21, 1, 31 // hi = sign(x) | (mantissa>>11) - | subfe TMP0, TMP0, TMP0 - | add RD, RD, TMP1 // hi = hi + exponent-1 - | and RD, RD, TMP0 // hi = x == 0 ? 0 : hi - | ins_next1 - | stwux RD, RA, BASE - | stw ZERO, 4(RA) - | ins_next2 - |.endif - break; - case BC_KNUM: - | // RA = dst*8, RD = num_const*8 - | ins_next1 - | lfdx f0, KBASE, RD - | stfdx f0, BASE, RA - | ins_next2 - break; - case BC_KPRI: - | // RA = dst*8, RD = primitive_type*8 (~) - | srwi TMP1, RD, 3 - | not TMP0, TMP1 - | ins_next1 - | stwx TMP0, BASE, RA - | ins_next2 - break; - case BC_KNIL: - | // RA = base*8, RD = end*8 - | stwx TISNIL, BASE, RA - | addi RA, RA, 8 - |1: - | stwx TISNIL, BASE, RA - | cmpw RA, RD - | addi RA, RA, 8 - | blt <1 - | ins_next_ - break; - - /* -- Upvalue and function ops ------------------------------------------ */ - - case BC_UGET: - | // RA = dst*8, RD = uvnum*8 - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | srwi RD, RD, 1 - | addi RD, RD, offsetof(GCfuncL, uvptr) - | lwzx UPVAL:RB, LFUNC:RB, RD - | ins_next1 - | lwz TMP1, UPVAL:RB->v - | lfd f0, 0(TMP1) - | stfdx f0, BASE, RA - | ins_next2 - break; - case BC_USETV: - | // RA = uvnum*8, RD = src*8 - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | srwi RA, RA, 1 - | addi RA, RA, offsetof(GCfuncL, uvptr) - | lfdux f0, RD, BASE - | lwzx UPVAL:RB, LFUNC:RB, RA - | lbz TMP3, UPVAL:RB->marked - | lwz CARG2, UPVAL:RB->v - | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) - | lbz TMP0, UPVAL:RB->closed - | lwz TMP2, 0(RD) - | stfd f0, 0(CARG2) - | cmplwi cr1, TMP0, 0 - | lwz TMP1, 4(RD) - | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq - | subi TMP2, TMP2, (LJ_TNUMX+1) - | bne >2 // Upvalue is closed and black? - |1: - | ins_next - | - |2: // Check if new value is collectable. - | cmplwi TMP2, LJ_TISGCV - (LJ_TNUMX+1) - | bge <1 // tvisgcv(v) - | lbz TMP3, GCOBJ:TMP1->gch.marked - | andix. TMP3, TMP3, LJ_GC_WHITES // iswhite(v) - | la CARG1, GG_DISP2G(DISPATCH) - | // Crossed a write barrier. Move the barrier forward. - | beq <1 - | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) - | b <1 - break; - case BC_USETS: - | // RA = uvnum*8, RD = str_const*8 (~) - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | srwi TMP1, RD, 1 - | srwi RA, RA, 1 - | subfic TMP1, TMP1, -4 - | addi RA, RA, offsetof(GCfuncL, uvptr) - | lwzx STR:TMP1, KBASE, TMP1 // KBASE-4-str_const*4 - | lwzx UPVAL:RB, LFUNC:RB, RA - | lbz TMP3, UPVAL:RB->marked - | lwz CARG2, UPVAL:RB->v - | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) - | lbz TMP3, STR:TMP1->marked - | lbz TMP2, UPVAL:RB->closed - | li TMP0, LJ_TSTR - | stw STR:TMP1, 4(CARG2) - | stw TMP0, 0(CARG2) - | bne >2 - |1: - | ins_next - | - |2: // Check if string is white and ensure upvalue is closed. - | andix. TMP3, TMP3, LJ_GC_WHITES // iswhite(str) - | cmplwi cr1, TMP2, 0 - | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq - | la CARG1, GG_DISP2G(DISPATCH) - | // Crossed a write barrier. Move the barrier forward. - | beq <1 - | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) - | b <1 - break; - case BC_USETN: - | // RA = uvnum*8, RD = num_const*8 - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | srwi RA, RA, 1 - | addi RA, RA, offsetof(GCfuncL, uvptr) - | lfdx f0, KBASE, RD - | lwzx UPVAL:RB, LFUNC:RB, RA - | ins_next1 - | lwz TMP1, UPVAL:RB->v - | stfd f0, 0(TMP1) - | ins_next2 - break; - case BC_USETP: - | // RA = uvnum*8, RD = primitive_type*8 (~) - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | srwi RA, RA, 1 - | srwi TMP0, RD, 3 - | addi RA, RA, offsetof(GCfuncL, uvptr) - | not TMP0, TMP0 - | lwzx UPVAL:RB, LFUNC:RB, RA - | ins_next1 - | lwz TMP1, UPVAL:RB->v - | stw TMP0, 0(TMP1) - | ins_next2 - break; - - case BC_UCLO: - | // RA = level*8, RD = target - | lwz TMP1, L->openupval - | branch_RD // Do this first since RD is not saved. - | stp BASE, L->base - | cmplwi TMP1, 0 - | mr CARG1, L - | beq >1 - | add CARG2, BASE, RA - | bl extern lj_func_closeuv // (lua_State *L, TValue *level) - | lp BASE, L->base - |1: - | ins_next - break; - - case BC_FNEW: - | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype) - | srwi TMP1, RD, 1 - | stp BASE, L->base - | subfic TMP1, TMP1, -4 - | stw PC, SAVE_PC - | lwzx CARG2, KBASE, TMP1 // KBASE-4-tab_const*4 - | mr CARG1, L - | lwz CARG3, FRAME_FUNC(BASE) - | // (lua_State *L, GCproto *pt, GCfuncL *parent) - | bl extern lj_func_newL_gc - | // Returns GCfuncL *. - | lp BASE, L->base - | li TMP0, LJ_TFUNC - | stwux TMP0, RA, BASE - | stw LFUNC:CRET1, 4(RA) - | ins_next - break; - - /* -- Table ops --------------------------------------------------------- */ - - case BC_TNEW: - case BC_TDUP: - | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~) - | lwz TMP0, DISPATCH_GL(gc.total)(DISPATCH) - | mr CARG1, L - | lwz TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) - | stp BASE, L->base - | cmplw TMP0, TMP1 - | stw PC, SAVE_PC - | bge >5 - |1: - if (op == BC_TNEW) { - | rlwinm CARG2, RD, 29, 21, 31 - | rlwinm CARG3, RD, 18, 27, 31 - | cmpwi CARG2, 0x7ff; beq >3 - |2: - | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) - | // Returns Table *. - } else { - | srwi TMP1, RD, 1 - | subfic TMP1, TMP1, -4 - | lwzx CARG2, KBASE, TMP1 // KBASE-4-tab_const*4 - | bl extern lj_tab_dup // (lua_State *L, Table *kt) - | // Returns Table *. - } - | lp BASE, L->base - | li TMP0, LJ_TTAB - | stwux TMP0, RA, BASE - | stw TAB:CRET1, 4(RA) - | ins_next - if (op == BC_TNEW) { - |3: - | li CARG2, 0x801 - | b <2 - } - |5: - | mr SAVE0, RD - | bl extern lj_gc_step_fixtop // (lua_State *L) - | mr RD, SAVE0 - | mr CARG1, L - | b <1 - break; - - case BC_GGET: - | // RA = dst*8, RD = str_const*8 (~) - case BC_GSET: - | // RA = src*8, RD = str_const*8 (~) - | lwz LFUNC:TMP2, FRAME_FUNC(BASE) - | srwi TMP1, RD, 1 - | lwz TAB:RB, LFUNC:TMP2->env - | subfic TMP1, TMP1, -4 - | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4 - if (op == BC_GGET) { - | b ->BC_TGETS_Z - } else { - | b ->BC_TSETS_Z - } - break; - - case BC_TGETV: - | // RA = dst*8, RB = table*8, RC = key*8 - | lwzux CARG1, RB, BASE - | lwzux CARG2, RC, BASE - | lwz TAB:RB, 4(RB) - |.if DUALNUM - | lwz RC, 4(RC) - |.else - | lfd f0, 0(RC) - |.endif - | checktab CARG1 - | checknum cr1, CARG2 - | bne ->vmeta_tgetv - |.if DUALNUM - | lwz TMP0, TAB:RB->asize - | bne cr1, >5 - | lwz TMP1, TAB:RB->array - | cmplw TMP0, RC - | slwi TMP2, RC, 3 - |.else - | bge cr1, >5 - | // Convert number key to integer, check for integerness and range. - | fctiwz f1, f0 - | fadd f2, f0, TOBIT - | stfd f1, TMPD - | lwz TMP0, TAB:RB->asize - | fsub f2, f2, TOBIT - | lwz TMP2, TMPD_LO - | lwz TMP1, TAB:RB->array - | fcmpu cr1, f0, f2 - | cmplw cr0, TMP0, TMP2 - | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+eq - | slwi TMP2, TMP2, 3 - |.endif - | ble ->vmeta_tgetv // Integer key and in array part? - | lwzx TMP0, TMP1, TMP2 - | lfdx f14, TMP1, TMP2 - | checknil TMP0; beq >2 - |1: - | ins_next1 - | stfdx f14, BASE, RA - | ins_next2 - | - |2: // Check for __index if table value is nil. - | lwz TAB:TMP2, TAB:RB->metatable - | cmplwi TAB:TMP2, 0 - | beq <1 // No metatable: done. - | lbz TMP0, TAB:TMP2->nomm - | andix. TMP0, TMP0, 1<vmeta_tgetv - | - |5: - | checkstr CARG2; bne ->vmeta_tgetv - |.if not DUALNUM - | lwz STR:RC, 4(RC) - |.endif - | b ->BC_TGETS_Z // String key? - break; - case BC_TGETS: - | // RA = dst*8, RB = table*8, RC = str_const*8 (~) - | lwzux CARG1, RB, BASE - | srwi TMP1, RC, 1 - | lwz TAB:RB, 4(RB) - | subfic TMP1, TMP1, -4 - | checktab CARG1 - | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4 - | bne ->vmeta_tgets1 - |->BC_TGETS_Z: - | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 - | lwz TMP0, TAB:RB->hmask - | lwz TMP1, STR:RC->hash - | lwz NODE:TMP2, TAB:RB->node - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask - | slwi TMP0, TMP1, 5 - | slwi TMP1, TMP1, 3 - | sub TMP1, TMP0, TMP1 - | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) - |1: - | lwz CARG1, NODE:TMP2->key - | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2) - | lwz CARG2, NODE:TMP2->val - | lwz TMP1, 4+offsetof(Node, val)(NODE:TMP2) - | checkstr CARG1; bne >4 - | cmpw TMP0, STR:RC; bne >4 - | checknil CARG2; beq >5 // Key found, but nil value? - |3: - | stwux CARG2, RA, BASE - | stw TMP1, 4(RA) - | ins_next - | - |4: // Follow hash chain. - | lwz NODE:TMP2, NODE:TMP2->next - | cmplwi NODE:TMP2, 0 - | bne <1 - | // End of hash chain: key not found, nil result. - | li CARG2, LJ_TNIL - | - |5: // Check for __index if table value is nil. - | lwz TAB:TMP2, TAB:RB->metatable - | cmplwi TAB:TMP2, 0 - | beq <3 // No metatable: done. - | lbz TMP0, TAB:TMP2->nomm - | andix. TMP0, TMP0, 1<vmeta_tgets - break; - case BC_TGETB: - | // RA = dst*8, RB = table*8, RC = index*8 - | lwzux CARG1, RB, BASE - | srwi TMP0, RC, 3 - | lwz TAB:RB, 4(RB) - | checktab CARG1; bne ->vmeta_tgetb - | lwz TMP1, TAB:RB->asize - | lwz TMP2, TAB:RB->array - | cmplw TMP0, TMP1; bge ->vmeta_tgetb - | lwzx TMP1, TMP2, RC - | lfdx f0, TMP2, RC - | checknil TMP1; beq >5 - |1: - | ins_next1 - | stfdx f0, BASE, RA - | ins_next2 - | - |5: // Check for __index if table value is nil. - | lwz TAB:TMP2, TAB:RB->metatable - | cmplwi TAB:TMP2, 0 - | beq <1 // No metatable: done. - | lbz TMP2, TAB:TMP2->nomm - | andix. TMP2, TMP2, 1<vmeta_tgetb // Caveat: preserve TMP0! - break; - case BC_TGETR: - | // RA = dst*8, RB = table*8, RC = key*8 - | add RB, BASE, RB - | lwz TAB:CARG1, 4(RB) - |.if DUALNUM - | add RC, BASE, RC - | lwz TMP0, TAB:CARG1->asize - | lwz CARG2, 4(RC) - | lwz TMP1, TAB:CARG1->array - |.else - | lfdx f0, BASE, RC - | lwz TMP0, TAB:CARG1->asize - | toint CARG2, f0 - | lwz TMP1, TAB:CARG1->array - |.endif - | cmplw TMP0, CARG2 - | slwi TMP2, CARG2, 3 - | ble ->vmeta_tgetr // In array part? - | lfdx f14, TMP1, TMP2 - |->BC_TGETR_Z: - | ins_next1 - | stfdx f14, BASE, RA - | ins_next2 - break; - - case BC_TSETV: - | // RA = src*8, RB = table*8, RC = key*8 - | lwzux CARG1, RB, BASE - | lwzux CARG2, RC, BASE - | lwz TAB:RB, 4(RB) - |.if DUALNUM - | lwz RC, 4(RC) - |.else - | lfd f0, 0(RC) - |.endif - | checktab CARG1 - | checknum cr1, CARG2 - | bne ->vmeta_tsetv - |.if DUALNUM - | lwz TMP0, TAB:RB->asize - | bne cr1, >5 - | lwz TMP1, TAB:RB->array - | cmplw TMP0, RC - | slwi TMP0, RC, 3 - |.else - | bge cr1, >5 - | // Convert number key to integer, check for integerness and range. - | fctiwz f1, f0 - | fadd f2, f0, TOBIT - | stfd f1, TMPD - | lwz TMP0, TAB:RB->asize - | fsub f2, f2, TOBIT - | lwz TMP2, TMPD_LO - | lwz TMP1, TAB:RB->array - | fcmpu cr1, f0, f2 - | cmplw cr0, TMP0, TMP2 - | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+eq - | slwi TMP0, TMP2, 3 - |.endif - | ble ->vmeta_tsetv // Integer key and in array part? - | lwzx TMP2, TMP1, TMP0 - | lbz TMP3, TAB:RB->marked - | lfdx f14, BASE, RA - | checknil TMP2; beq >3 - |1: - | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table) - | stfdx f14, TMP1, TMP0 - | bne >7 - |2: - | ins_next - | - |3: // Check for __newindex if previous value is nil. - | lwz TAB:TMP2, TAB:RB->metatable - | cmplwi TAB:TMP2, 0 - | beq <1 // No metatable: done. - | lbz TMP2, TAB:TMP2->nomm - | andix. TMP2, TMP2, 1<vmeta_tsetv - | - |5: - | checkstr CARG2; bne ->vmeta_tsetv - |.if not DUALNUM - | lwz STR:RC, 4(RC) - |.endif - | b ->BC_TSETS_Z // String key? - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, TMP3, TMP0 - | b <2 - break; - case BC_TSETS: - | // RA = src*8, RB = table*8, RC = str_const*8 (~) - | lwzux CARG1, RB, BASE - | srwi TMP1, RC, 1 - | lwz TAB:RB, 4(RB) - | subfic TMP1, TMP1, -4 - | checktab CARG1 - | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4 - | bne ->vmeta_tsets1 - |->BC_TSETS_Z: - | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = src*8 - | lwz TMP0, TAB:RB->hmask - | lwz TMP1, STR:RC->hash - | lwz NODE:TMP2, TAB:RB->node - | stb ZERO, TAB:RB->nomm // Clear metamethod cache. - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask - | lfdx f14, BASE, RA - | slwi TMP0, TMP1, 5 - | slwi TMP1, TMP1, 3 - | sub TMP1, TMP0, TMP1 - | lbz TMP3, TAB:RB->marked - | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) - |1: - | lwz CARG1, NODE:TMP2->key - | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2) - | lwz CARG2, NODE:TMP2->val - | lwz NODE:TMP1, NODE:TMP2->next - | checkstr CARG1; bne >5 - | cmpw TMP0, STR:RC; bne >5 - | checknil CARG2; beq >4 // Key found, but nil value? - |2: - | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) - | stfd f14, NODE:TMP2->val - | bne >7 - |3: - | ins_next - | - |4: // Check for __newindex if previous value is nil. - | lwz TAB:TMP1, TAB:RB->metatable - | cmplwi TAB:TMP1, 0 - | beq <2 // No metatable: done. - | lbz TMP0, TAB:TMP1->nomm - | andix. TMP0, TMP0, 1<vmeta_tsets - | - |5: // Follow hash chain. - | cmplwi NODE:TMP1, 0 - | mr NODE:TMP2, NODE:TMP1 - | bne <1 - | // End of hash chain: key not found, add a new one. - | - | // But check for __newindex first. - | lwz TAB:TMP1, TAB:RB->metatable - | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) - | stw PC, SAVE_PC - | mr CARG1, L - | cmplwi TAB:TMP1, 0 - | stp BASE, L->base - | beq >6 // No metatable: continue. - | lbz TMP0, TAB:TMP1->nomm - | andix. TMP0, TMP0, 1<vmeta_tsets // 'no __newindex' flag NOT set: check. - |6: - | li TMP0, LJ_TSTR - | stw STR:RC, 4(CARG3) - | mr CARG2, TAB:RB - | stw TMP0, 0(CARG3) - | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) - | // Returns TValue *. - | lp BASE, L->base - | stfd f14, 0(CRET1) - | b <3 // No 2nd write barrier needed. - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, TMP3, TMP0 - | b <3 - break; - case BC_TSETB: - | // RA = src*8, RB = table*8, RC = index*8 - | lwzux CARG1, RB, BASE - | srwi TMP0, RC, 3 - | lwz TAB:RB, 4(RB) - | checktab CARG1; bne ->vmeta_tsetb - | lwz TMP1, TAB:RB->asize - | lwz TMP2, TAB:RB->array - | lbz TMP3, TAB:RB->marked - | cmplw TMP0, TMP1 - | lfdx f14, BASE, RA - | bge ->vmeta_tsetb - | lwzx TMP1, TMP2, RC - | checknil TMP1; beq >5 - |1: - | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) - | stfdx f14, TMP2, RC - | bne >7 - |2: - | ins_next - | - |5: // Check for __newindex if previous value is nil. - | lwz TAB:TMP1, TAB:RB->metatable - | cmplwi TAB:TMP1, 0 - | beq <1 // No metatable: done. - | lbz TMP1, TAB:TMP1->nomm - | andix. TMP1, TMP1, 1<vmeta_tsetb // Caveat: preserve TMP0! - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, TMP3, TMP0 - | b <2 - break; - case BC_TSETR: - | // RA = dst*8, RB = table*8, RC = key*8 - | add RB, BASE, RB - | lwz TAB:CARG2, 4(RB) - |.if DUALNUM - | add RC, BASE, RC - | lbz TMP3, TAB:CARG2->marked - | lwz TMP0, TAB:CARG2->asize - | lwz CARG3, 4(RC) - | lwz TMP1, TAB:CARG2->array - |.else - | lfdx f0, BASE, RC - | lbz TMP3, TAB:CARG2->marked - | lwz TMP0, TAB:CARG2->asize - | toint CARG3, f0 - | lwz TMP1, TAB:CARG2->array - |.endif - | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table) - | bne >7 - |2: - | cmplw TMP0, CARG3 - | slwi TMP2, CARG3, 3 - | lfdx f14, BASE, RA - | ble ->vmeta_tsetr // In array part? - | ins_next1 - | stfdx f14, TMP1, TMP2 - | ins_next2 - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP3, TMP2 - | b <2 - break; - - - case BC_TSETM: - | // RA = base*8 (table at base-1), RD = num_const*8 (start index) - | add RA, BASE, RA - |1: - | add TMP3, KBASE, RD - | lwz TAB:CARG2, -4(RA) // Guaranteed to be a table. - | addic. TMP0, MULTRES, -8 - | lwz TMP3, 4(TMP3) // Integer constant is in lo-word. - | srwi CARG3, TMP0, 3 - | beq >4 // Nothing to copy? - | add CARG3, CARG3, TMP3 - | lwz TMP2, TAB:CARG2->asize - | slwi TMP1, TMP3, 3 - | lbz TMP3, TAB:CARG2->marked - | cmplw CARG3, TMP2 - | add TMP2, RA, TMP0 - | lwz TMP0, TAB:CARG2->array - | bgt >5 - | add TMP1, TMP1, TMP0 - | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) - |3: // Copy result slots to table. - | lfd f0, 0(RA) - | addi RA, RA, 8 - | cmpw cr1, RA, TMP2 - | stfd f0, 0(TMP1) - | addi TMP1, TMP1, 8 - | blt cr1, <3 - | bne >7 - |4: - | ins_next - | - |5: // Need to resize array part. - | stp BASE, L->base - | mr CARG1, L - | stw PC, SAVE_PC - | mr SAVE0, RD - | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) - | // Must not reallocate the stack. - | mr RD, SAVE0 - | b <1 - | - |7: // Possible table write barrier for any value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP3, TMP0 - | b <4 - break; - - /* -- Calls and vararg handling ----------------------------------------- */ - - case BC_CALLM: - | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8 - | add NARGS8:RC, NARGS8:RC, MULTRES - | // Fall through. Assumes BC_CALL follows. - break; - case BC_CALL: - | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8 - | mr TMP2, BASE - | lwzux TMP0, BASE, RA - | lwz LFUNC:RB, 4(BASE) - | subi NARGS8:RC, NARGS8:RC, 8 - | addi BASE, BASE, 8 - | checkfunc TMP0; bne ->vmeta_call - | ins_call - break; - - case BC_CALLMT: - | // RA = base*8, (RB = 0,) RC = extra_nargs*8 - | add NARGS8:RC, NARGS8:RC, MULTRES - | // Fall through. Assumes BC_CALLT follows. - break; - case BC_CALLT: - | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 - | lwzux TMP0, RA, BASE - | lwz LFUNC:RB, 4(RA) - | subi NARGS8:RC, NARGS8:RC, 8 - | lwz TMP1, FRAME_PC(BASE) - | checkfunc TMP0 - | addi RA, RA, 8 - | bne ->vmeta_callt - |->BC_CALLT_Z: - | andix. TMP0, TMP1, FRAME_TYPE // Caveat: preserve cr0 until the crand. - | lbz TMP3, LFUNC:RB->ffid - | xori TMP2, TMP1, FRAME_VARG - | cmplwi cr1, NARGS8:RC, 0 - | bne >7 - |1: - | stw LFUNC:RB, FRAME_FUNC(BASE) // Copy function down, but keep PC. - | li TMP2, 0 - | cmplwi cr7, TMP3, 1 // (> FF_C) Calling a fast function? - | beq cr1, >3 - |2: - | addi TMP3, TMP2, 8 - | lfdx f0, RA, TMP2 - | cmplw cr1, TMP3, NARGS8:RC - | stfdx f0, BASE, TMP2 - | mr TMP2, TMP3 - | bne cr1, <2 - |3: - | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+gt - | beq >5 - |4: - | ins_callt - | - |5: // Tailcall to a fast function with a Lua frame below. - | lwz INS, -4(TMP1) - | decode_RA8 RA, INS - | sub TMP1, BASE, RA - | lwz LFUNC:TMP1, FRAME_FUNC-8(TMP1) - | lwz TMP1, LFUNC:TMP1->pc - | lwz KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE. - | b <4 - | - |7: // Tailcall from a vararg function. - | andix. TMP0, TMP2, FRAME_TYPEP - | bne <1 // Vararg frame below? - | sub BASE, BASE, TMP2 // Relocate BASE down. - | lwz TMP1, FRAME_PC(BASE) - | andix. TMP0, TMP1, FRAME_TYPE - | b <1 - break; - - case BC_ITERC: - | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8)) - | mr TMP2, BASE - | add BASE, BASE, RA - | lwz TMP1, -24(BASE) - | lwz LFUNC:RB, -20(BASE) - | lfd f1, -8(BASE) - | lfd f0, -16(BASE) - | stw TMP1, 0(BASE) // Copy callable. - | stw LFUNC:RB, 4(BASE) - | checkfunc TMP1 - | stfd f1, 16(BASE) // Copy control var. - | li NARGS8:RC, 16 // Iterators get 2 arguments. - | stfdu f0, 8(BASE) // Copy state. - | bne ->vmeta_call - | ins_call - break; - - case BC_ITERN: - | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) - |.if JIT - | // NYI: add hotloop, record BC_ITERN. - |.endif - | add RA, BASE, RA - | lwz TAB:RB, -12(RA) - | lwz RC, -4(RA) // Get index from control var. - | lwz TMP0, TAB:RB->asize - | lwz TMP1, TAB:RB->array - | addi PC, PC, 4 - |1: // Traverse array part. - | cmplw RC, TMP0 - | slwi TMP3, RC, 3 - | bge >5 // Index points after array part? - | lwzx TMP2, TMP1, TMP3 - | lfdx f0, TMP1, TMP3 - | checknil TMP2 - | lwz INS, -4(PC) - | beq >4 - |.if DUALNUM - | stw RC, 4(RA) - | stw TISNUM, 0(RA) - |.else - | tonum_u f1, RC - |.endif - | addi RC, RC, 1 - | addis TMP3, PC, -(BCBIAS_J*4 >> 16) - | stfd f0, 8(RA) - | decode_RD4 TMP1, INS - | stw RC, -4(RA) // Update control var. - | add PC, TMP1, TMP3 - |.if not DUALNUM - | stfd f1, 0(RA) - |.endif - |3: - | ins_next - | - |4: // Skip holes in array part. - | addi RC, RC, 1 - | b <1 - | - |5: // Traverse hash part. - | lwz TMP1, TAB:RB->hmask - | sub RC, RC, TMP0 - | lwz TMP2, TAB:RB->node - |6: - | cmplw RC, TMP1 // End of iteration? Branch to ITERL+1. - | slwi TMP3, RC, 5 - | bgty <3 - | slwi RB, RC, 3 - | sub TMP3, TMP3, RB - | lwzx RB, TMP2, TMP3 - | lfdx f0, TMP2, TMP3 - | add NODE:TMP3, TMP2, TMP3 - | checknil RB - | lwz INS, -4(PC) - | beq >7 - | lfd f1, NODE:TMP3->key - | addis TMP2, PC, -(BCBIAS_J*4 >> 16) - | stfd f0, 8(RA) - | add RC, RC, TMP0 - | decode_RD4 TMP1, INS - | stfd f1, 0(RA) - | addi RC, RC, 1 - | add PC, TMP1, TMP2 - | stw RC, -4(RA) // Update control var. - | b <3 - | - |7: // Skip holes in hash part. - | addi RC, RC, 1 - | b <6 - break; - - case BC_ISNEXT: - | // RA = base*8, RD = target (points to ITERN) - | add RA, BASE, RA - | lwz TMP0, -24(RA) - | lwz CFUNC:TMP1, -20(RA) - | lwz TMP2, -16(RA) - | lwz TMP3, -8(RA) - | cmpwi cr0, TMP2, LJ_TTAB - | cmpwi cr1, TMP0, LJ_TFUNC - | cmpwi cr6, TMP3, LJ_TNIL - | bne cr1, >5 - | lbz TMP1, CFUNC:TMP1->ffid - | crand 4*cr0+eq, 4*cr0+eq, 4*cr6+eq - | cmpwi cr7, TMP1, FF_next_N - | srwi TMP0, RD, 1 - | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq - | add TMP3, PC, TMP0 - | bne cr0, >5 - | lus TMP1, 0xfffe - | ori TMP1, TMP1, 0x7fff - | stw ZERO, -4(RA) // Initialize control var. - | stw TMP1, -8(RA) - | addis PC, TMP3, -(BCBIAS_J*4 >> 16) - |1: - | ins_next - |5: // Despecialize bytecode if any of the checks fail. - | li TMP0, BC_JMP - | li TMP1, BC_ITERC - | stb TMP0, -1(PC) - | addis PC, TMP3, -(BCBIAS_J*4 >> 16) - | stb TMP1, 3(PC) - | b <1 - break; - - case BC_VARG: - | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 - | lwz TMP0, FRAME_PC(BASE) - | add RC, BASE, RC - | add RA, BASE, RA - | addi RC, RC, FRAME_VARG - | add TMP2, RA, RB - | subi TMP3, BASE, 8 // TMP3 = vtop - | sub RC, RC, TMP0 // RC = vbase - | // Note: RC may now be even _above_ BASE if nargs was < numparams. - | cmplwi cr1, RB, 0 - |.if PPE - | sub TMP1, TMP3, RC - | cmpwi TMP1, 0 - |.else - | sub. TMP1, TMP3, RC - |.endif - | beq cr1, >5 // Copy all varargs? - | subi TMP2, TMP2, 16 - | ble >2 // No vararg slots? - |1: // Copy vararg slots to destination slots. - | lfd f0, 0(RC) - | addi RC, RC, 8 - | stfd f0, 0(RA) - | cmplw RA, TMP2 - | cmplw cr1, RC, TMP3 - | bge >3 // All destination slots filled? - | addi RA, RA, 8 - | blt cr1, <1 // More vararg slots? - |2: // Fill up remainder with nil. - | stw TISNIL, 0(RA) - | cmplw RA, TMP2 - | addi RA, RA, 8 - | blt <2 - |3: - | ins_next - | - |5: // Copy all varargs. - | lwz TMP0, L->maxstack - | li MULTRES, 8 // MULTRES = (0+1)*8 - | bley <3 // No vararg slots? - | add TMP2, RA, TMP1 - | cmplw TMP2, TMP0 - | addi MULTRES, TMP1, 8 - | bgt >7 - |6: - | lfd f0, 0(RC) - | addi RC, RC, 8 - | stfd f0, 0(RA) - | cmplw RC, TMP3 - | addi RA, RA, 8 - | blt <6 // More vararg slots? - | b <3 - | - |7: // Grow stack for varargs. - | mr CARG1, L - | stp RA, L->top - | sub SAVE0, RC, BASE // Need delta, because BASE may change. - | stp BASE, L->base - | sub RA, RA, BASE - | stw PC, SAVE_PC - | srwi CARG2, TMP1, 3 - | bl extern lj_state_growstack // (lua_State *L, int n) - | lp BASE, L->base - | add RA, BASE, RA - | add RC, BASE, SAVE0 - | subi TMP3, BASE, 8 - | b <6 - break; - - /* -- Returns ----------------------------------------------------------- */ - - case BC_RETM: - | // RA = results*8, RD = extra_nresults*8 - | add RD, RD, MULTRES // MULTRES >= 8, so RD >= 8. - | // Fall through. Assumes BC_RET follows. - break; - - case BC_RET: - | // RA = results*8, RD = (nresults+1)*8 - | lwz PC, FRAME_PC(BASE) - | add RA, BASE, RA - | mr MULTRES, RD - |1: - | andix. TMP0, PC, FRAME_TYPE - | xori TMP1, PC, FRAME_VARG - | bne ->BC_RETV_Z - | - |->BC_RET_Z: - | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return - | lwz INS, -4(PC) - | cmpwi RD, 8 - | subi TMP2, BASE, 8 - | subi RC, RD, 8 - | decode_RB8 RB, INS - | beq >3 - | li TMP1, 0 - |2: - | addi TMP3, TMP1, 8 - | lfdx f0, RA, TMP1 - | cmpw TMP3, RC - | stfdx f0, TMP2, TMP1 - | beq >3 - | addi TMP1, TMP3, 8 - | lfdx f1, RA, TMP3 - | cmpw TMP1, RC - | stfdx f1, TMP2, TMP3 - | bne <2 - |3: - |5: - | cmplw RB, RD - | decode_RA8 RA, INS - | bgt >6 - | sub BASE, TMP2, RA - | lwz LFUNC:TMP1, FRAME_FUNC(BASE) - | ins_next1 - | lwz TMP1, LFUNC:TMP1->pc - | lwz KBASE, PC2PROTO(k)(TMP1) - | ins_next2 - | - |6: // Fill up results with nil. - | subi TMP1, RD, 8 - | addi RD, RD, 8 - | stwx TISNIL, TMP2, TMP1 - | b <5 - | - |->BC_RETV_Z: // Non-standard return case. - | andix. TMP2, TMP1, FRAME_TYPEP - | bne ->vm_return - | // Return from vararg function: relocate BASE down. - | sub BASE, BASE, TMP1 - | lwz PC, FRAME_PC(BASE) - | b <1 - break; - - case BC_RET0: case BC_RET1: - | // RA = results*8, RD = (nresults+1)*8 - | lwz PC, FRAME_PC(BASE) - | add RA, BASE, RA - | mr MULTRES, RD - | andix. TMP0, PC, FRAME_TYPE - | xori TMP1, PC, FRAME_VARG - | bney ->BC_RETV_Z - | - | lwz INS, -4(PC) - | subi TMP2, BASE, 8 - | decode_RB8 RB, INS - if (op == BC_RET1) { - | lfd f0, 0(RA) - | stfd f0, 0(TMP2) - } - |5: - | cmplw RB, RD - | decode_RA8 RA, INS - | bgt >6 - | sub BASE, TMP2, RA - | lwz LFUNC:TMP1, FRAME_FUNC(BASE) - | ins_next1 - | lwz TMP1, LFUNC:TMP1->pc - | lwz KBASE, PC2PROTO(k)(TMP1) - | ins_next2 - | - |6: // Fill up results with nil. - | subi TMP1, RD, 8 - | addi RD, RD, 8 - | stwx TISNIL, TMP2, TMP1 - | b <5 - break; - - /* -- Loops and branches ------------------------------------------------ */ - - case BC_FORL: - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_IFORL follows. - break; - - case BC_JFORI: - case BC_JFORL: -#if !LJ_HASJIT - break; -#endif - case BC_FORI: - case BC_IFORL: - | // RA = base*8, RD = target (after end of loop or start of loop) - vk = (op == BC_IFORL || op == BC_JFORL); - |.if DUALNUM - | // Integer loop. - | lwzux TMP1, RA, BASE - | lwz CARG1, FORL_IDX*8+4(RA) - | cmplw cr0, TMP1, TISNUM - if (vk) { - | lwz CARG3, FORL_STEP*8+4(RA) - | bne >9 - |.if GPR64 - | // Need to check overflow for (a<<32) + (b<<32). - | rldicr TMP0, CARG1, 32, 31 - | rldicr TMP2, CARG3, 32, 31 - | add CARG1, CARG1, CARG3 - | addo. TMP0, TMP0, TMP2 - |.else - | addo. CARG1, CARG1, CARG3 - |.endif - | cmpwi cr6, CARG3, 0 - | lwz CARG2, FORL_STOP*8+4(RA) - | bso >6 - |4: - | stw CARG1, FORL_IDX*8+4(RA) - } else { - | lwz TMP3, FORL_STEP*8(RA) - | lwz CARG3, FORL_STEP*8+4(RA) - | lwz TMP2, FORL_STOP*8(RA) - | lwz CARG2, FORL_STOP*8+4(RA) - | cmplw cr7, TMP3, TISNUM - | cmplw cr1, TMP2, TISNUM - | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq - | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq - | cmpwi cr6, CARG3, 0 - | bne >9 - } - | blt cr6, >5 - | cmpw CARG1, CARG2 - |1: - | stw TISNUM, FORL_EXT*8(RA) - if (op != BC_JFORL) { - | srwi RD, RD, 1 - } - | stw CARG1, FORL_EXT*8+4(RA) - if (op != BC_JFORL) { - | add RD, PC, RD - } - if (op == BC_FORI) { - | bgt >3 // See FP loop below. - } else if (op == BC_JFORI) { - | addis PC, RD, -(BCBIAS_J*4 >> 16) - | bley >7 - } else if (op == BC_IFORL) { - | bgt >2 - | addis PC, RD, -(BCBIAS_J*4 >> 16) - } else { - | bley =>BC_JLOOP - } - |2: - | ins_next - |5: // Invert check for negative step. - | cmpw CARG2, CARG1 - | b <1 - if (vk) { - |6: // Potential overflow. - | checkov TMP0, <4 // Ignore unrelated overflow. - | b <2 - } - |.endif - if (vk) { - |.if DUALNUM - |9: // FP loop. - | lfd f1, FORL_IDX*8(RA) - |.else - | lfdux f1, RA, BASE - |.endif - | lfd f3, FORL_STEP*8(RA) - | lfd f2, FORL_STOP*8(RA) - | lwz TMP3, FORL_STEP*8(RA) - | fadd f1, f1, f3 - | stfd f1, FORL_IDX*8(RA) - } else { - |.if DUALNUM - |9: // FP loop. - |.else - | lwzux TMP1, RA, BASE - | lwz TMP3, FORL_STEP*8(RA) - | lwz TMP2, FORL_STOP*8(RA) - | cmplw cr0, TMP1, TISNUM - | cmplw cr7, TMP3, TISNUM - | cmplw cr1, TMP2, TISNUM - |.endif - | lfd f1, FORL_IDX*8(RA) - | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | lfd f2, FORL_STOP*8(RA) - | bge ->vmeta_for - } - | cmpwi cr6, TMP3, 0 - if (op != BC_JFORL) { - | srwi RD, RD, 1 - } - | stfd f1, FORL_EXT*8(RA) - if (op != BC_JFORL) { - | add RD, PC, RD - } - | fcmpu cr0, f1, f2 - if (op == BC_JFORI) { - | addis PC, RD, -(BCBIAS_J*4 >> 16) - } - | blt cr6, >5 - if (op == BC_FORI) { - | bgt >3 - } else if (op == BC_IFORL) { - |.if DUALNUM - | bgty <2 - |.else - | bgt >2 - |.endif - |1: - | addis PC, RD, -(BCBIAS_J*4 >> 16) - } else if (op == BC_JFORI) { - | bley >7 - } else { - | bley =>BC_JLOOP - } - |.if DUALNUM - | b <2 - |.else - |2: - | ins_next - |.endif - |5: // Negative step. - if (op == BC_FORI) { - | bge <2 - |3: // Used by integer loop, too. - | addis PC, RD, -(BCBIAS_J*4 >> 16) - } else if (op == BC_IFORL) { - | bgey <1 - } else if (op == BC_JFORI) { - | bgey >7 - } else { - | bgey =>BC_JLOOP - } - | b <2 - if (op == BC_JFORI) { - |7: - | lwz INS, -4(PC) - | decode_RD8 RD, INS - | b =>BC_JLOOP - } - break; - - case BC_ITERL: - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_IITERL follows. - break; - - case BC_JITERL: -#if !LJ_HASJIT - break; -#endif - case BC_IITERL: - | // RA = base*8, RD = target - | lwzux TMP1, RA, BASE - | lwz TMP2, 4(RA) - | checknil TMP1; beq >1 // Stop if iterator returned nil. - if (op == BC_JITERL) { - | stw TMP1, -8(RA) - | stw TMP2, -4(RA) - | b =>BC_JLOOP - } else { - | branch_RD // Otherwise save control var + branch. - | stw TMP1, -8(RA) - | stw TMP2, -4(RA) - } - |1: - | ins_next - break; - - case BC_LOOP: - | // RA = base*8, RD = target (loop extent) - | // Note: RA/RD is only used by trace recorder to determine scope/extent - | // This opcode does NOT jump, it's only purpose is to detect a hot loop. - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_ILOOP follows. - break; - - case BC_ILOOP: - | // RA = base*8, RD = target (loop extent) - | ins_next - break; - - case BC_JLOOP: - |.if JIT - | // RA = base*8 (ignored), RD = traceno*8 - | lwz TMP1, DISPATCH_J(trace)(DISPATCH) - | srwi RD, RD, 1 - | // Traces on PPC don't store the trace number, so use 0. - | stw ZERO, DISPATCH_GL(vmstate)(DISPATCH) - | lwzx TRACE:TMP2, TMP1, RD - | clrso TMP1 - | lp TMP2, TRACE:TMP2->mcode - | stw BASE, DISPATCH_GL(jit_base)(DISPATCH) - | mtctr TMP2 - | addi JGL, DISPATCH, GG_DISP2G+32768 - | stw L, DISPATCH_GL(tmpbuf.L)(DISPATCH) - | bctr - |.endif - break; - - case BC_JMP: - | // RA = base*8 (only used by trace recorder), RD = target - | branch_RD - | ins_next - break; - - /* -- Function headers -------------------------------------------------- */ - - case BC_FUNCF: - |.if JIT - | hotcall - |.endif - case BC_FUNCV: /* NYI: compiled vararg functions. */ - | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. - break; - - case BC_JFUNCF: -#if !LJ_HASJIT - break; -#endif - case BC_IFUNCF: - | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 - | lwz TMP2, L->maxstack - | lbz TMP1, -4+PC2PROTO(numparams)(PC) - | lwz KBASE, -4+PC2PROTO(k)(PC) - | cmplw RA, TMP2 - | slwi TMP1, TMP1, 3 - | bgt ->vm_growstack_l - if (op != BC_JFUNCF) { - | ins_next1 - } - |2: - | cmplw NARGS8:RC, TMP1 // Check for missing parameters. - | blt >3 - if (op == BC_JFUNCF) { - | decode_RD8 RD, INS - | b =>BC_JLOOP - } else { - | ins_next2 - } - | - |3: // Clear missing parameters. - | stwx TISNIL, BASE, NARGS8:RC - | addi NARGS8:RC, NARGS8:RC, 8 - | b <2 - break; - - case BC_JFUNCV: -#if !LJ_HASJIT - break; -#endif - | NYI // NYI: compiled vararg functions - break; /* NYI: compiled vararg functions. */ - - case BC_IFUNCV: - | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 - | lwz TMP2, L->maxstack - | add TMP1, BASE, RC - | add TMP0, RA, RC - | stw LFUNC:RB, 4(TMP1) // Store copy of LFUNC. - | addi TMP3, RC, 8+FRAME_VARG - | lwz KBASE, -4+PC2PROTO(k)(PC) - | cmplw TMP0, TMP2 - | stw TMP3, 0(TMP1) // Store delta + FRAME_VARG. - | bge ->vm_growstack_l - | lbz TMP2, -4+PC2PROTO(numparams)(PC) - | mr RA, BASE - | mr RC, TMP1 - | ins_next1 - | cmpwi TMP2, 0 - | addi BASE, TMP1, 8 - | beq >3 - |1: - | cmplw RA, RC // Less args than parameters? - | lwz TMP0, 0(RA) - | lwz TMP3, 4(RA) - | bge >4 - | stw TISNIL, 0(RA) // Clear old fixarg slot (help the GC). - | addi RA, RA, 8 - |2: - | addic. TMP2, TMP2, -1 - | stw TMP0, 8(TMP1) - | stw TMP3, 12(TMP1) - | addi TMP1, TMP1, 8 - | bne <1 - |3: - | ins_next2 - | - |4: // Clear missing parameters. - | li TMP0, LJ_TNIL - | b <2 - break; - - case BC_FUNCC: - case BC_FUNCCW: - | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8 - if (op == BC_FUNCC) { - | lp RD, CFUNC:RB->f - } else { - | lp RD, DISPATCH_GL(wrapf)(DISPATCH) - } - | add TMP1, RA, NARGS8:RC - | lwz TMP2, L->maxstack - | .toc lp TMP3, 0(RD) - | add RC, BASE, NARGS8:RC - | stp BASE, L->base - | cmplw TMP1, TMP2 - | stp RC, L->top - | li_vmstate C - |.if TOC - | mtctr TMP3 - |.else - | mtctr RD - |.endif - if (op == BC_FUNCCW) { - | lp CARG2, CFUNC:RB->f - } - | mr CARG1, L - | bgt ->vm_growstack_c // Need to grow stack. - | .toc lp TOCREG, TOC_OFS(RD) - | .tocenv lp ENVREG, ENV_OFS(RD) - | st_vmstate - | bctrl // (lua_State *L [, lua_CFunction f]) - | // Returns nresults. - | lp BASE, L->base - | .toc ld TOCREG, SAVE_TOC - | slwi RD, CRET1, 3 - | lp TMP1, L->top - | li_vmstate INTERP - | lwz PC, FRAME_PC(BASE) // Fetch PC of caller. - | stw L, DISPATCH_GL(cur_L)(DISPATCH) - | sub RA, TMP1, RD // RA = L->top - nresults*8 - | st_vmstate - | b ->vm_returnc - break; - - /* ---------------------------------------------------------------------- */ - - default: - fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); - exit(2); - break; - } -} - -static int build_backend(BuildCtx *ctx) -{ - int op; - - dasm_growpc(Dst, BC__MAX); - - build_subroutines(ctx); - - |.code_op - for (op = 0; op < BC__MAX; op++) - build_ins(ctx, (BCOp)op, op); - - return BC__MAX; -} - -/* Emit pseudo frame-info for all assembler functions. */ -static void emit_asm_debug(BuildCtx *ctx) -{ - int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); - int i; - switch (ctx->mode) { - case BUILD_elfasm: - fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); - fprintf(ctx->fp, - ".Lframe0:\n" - "\t.long .LECIE0-.LSCIE0\n" - ".LSCIE0:\n" - "\t.long 0xffffffff\n" - "\t.byte 0x1\n" - "\t.string \"\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -4\n" - "\t.byte 65\n" - "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n" - "\t.align 2\n" - ".LECIE0:\n\n"); - fprintf(ctx->fp, - ".LSFDE0:\n" - "\t.long .LEFDE0-.LASFDE0\n" - ".LASFDE0:\n" - "\t.long .Lframe0\n" - "\t.long .Lbegin\n" - "\t.long %d\n" - "\t.byte 0xe\n\t.uleb128 %d\n" - "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" - "\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n", - fcofs, CFRAME_SIZE); - for (i = 14; i <= 31; i++) - fprintf(ctx->fp, - "\t.byte %d\n\t.uleb128 %d\n" - "\t.byte %d\n\t.uleb128 %d\n", - 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i)); - fprintf(ctx->fp, - "\t.align 2\n" - ".LEFDE0:\n\n"); -#if LJ_HASFFI - fprintf(ctx->fp, - ".LSFDE1:\n" - "\t.long .LEFDE1-.LASFDE1\n" - ".LASFDE1:\n" - "\t.long .Lframe0\n" -#if LJ_TARGET_PS3 - "\t.long .lj_vm_ffi_call\n" -#else - "\t.long lj_vm_ffi_call\n" -#endif - "\t.long %d\n" - "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" - "\t.byte 0x8e\n\t.uleb128 2\n" - "\t.byte 0xd\n\t.uleb128 0xe\n" - "\t.align 2\n" - ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); -#endif -#if !LJ_NO_UNWIND - fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); - fprintf(ctx->fp, - ".Lframe1:\n" - "\t.long .LECIE1-.LSCIE1\n" - ".LSCIE1:\n" - "\t.long 0\n" - "\t.byte 0x1\n" - "\t.string \"zPR\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -4\n" - "\t.byte 65\n" - "\t.uleb128 6\n" /* augmentation length */ - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.long lj_err_unwind_dwarf-.\n" - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n" - "\t.align 2\n" - ".LECIE1:\n\n"); - fprintf(ctx->fp, - ".LSFDE2:\n" - "\t.long .LEFDE2-.LASFDE2\n" - ".LASFDE2:\n" - "\t.long .LASFDE2-.Lframe1\n" - "\t.long .Lbegin-.\n" - "\t.long %d\n" - "\t.uleb128 0\n" /* augmentation length */ - "\t.byte 0xe\n\t.uleb128 %d\n" - "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" - "\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n", - fcofs, CFRAME_SIZE); - for (i = 14; i <= 31; i++) - fprintf(ctx->fp, - "\t.byte %d\n\t.uleb128 %d\n" - "\t.byte %d\n\t.uleb128 %d\n", - 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i)); - fprintf(ctx->fp, - "\t.align 2\n" - ".LEFDE2:\n\n"); -#if LJ_HASFFI - fprintf(ctx->fp, - ".Lframe2:\n" - "\t.long .LECIE2-.LSCIE2\n" - ".LSCIE2:\n" - "\t.long 0\n" - "\t.byte 0x1\n" - "\t.string \"zR\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -4\n" - "\t.byte 65\n" - "\t.uleb128 1\n" /* augmentation length */ - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n" - "\t.align 2\n" - ".LECIE2:\n\n"); - fprintf(ctx->fp, - ".LSFDE3:\n" - "\t.long .LEFDE3-.LASFDE3\n" - ".LASFDE3:\n" - "\t.long .LASFDE3-.Lframe2\n" - "\t.long lj_vm_ffi_call-.\n" - "\t.long %d\n" - "\t.uleb128 0\n" /* augmentation length */ - "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" - "\t.byte 0x8e\n\t.uleb128 2\n" - "\t.byte 0xd\n\t.uleb128 0xe\n" - "\t.align 2\n" - ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); -#endif -#endif - break; - default: - break; - } -} - diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc deleted file mode 100644 index 211ae7b922..0000000000 --- a/src/vm_x86.dasc +++ /dev/null @@ -1,5780 +0,0 @@ -|// Low-level VM code for x86 CPUs. -|// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -| -|.if P64 -|.arch x64 -|.else -|.arch x86 -|.endif -|.section code_op, code_sub -| -|.actionlist build_actionlist -|.globals GLOB_ -|.globalnames globnames -|.externnames extnames -| -|//----------------------------------------------------------------------- -| -|.if P64 -|.define X64, 1 -|.if WIN -|.define X64WIN, 1 -|.endif -|.endif -| -|// Fixed register assignments for the interpreter. -|// This is very fragile and has many dependencies. Caveat emptor. -|.define BASE, edx // Not C callee-save, refetched anyway. -|.if not X64 -|.define KBASE, edi // Must be C callee-save. -|.define KBASEa, KBASE -|.define PC, esi // Must be C callee-save. -|.define PCa, PC -|.define DISPATCH, ebx // Must be C callee-save. -|.elif X64WIN -|.define KBASE, edi // Must be C callee-save. -|.define KBASEa, rdi -|.define PC, esi // Must be C callee-save. -|.define PCa, rsi -|.define DISPATCH, ebx // Must be C callee-save. -|.else -|.define KBASE, r15d // Must be C callee-save. -|.define KBASEa, r15 -|.define PC, ebx // Must be C callee-save. -|.define PCa, rbx -|.define DISPATCH, r14d // Must be C callee-save. -|.endif -| -|.define RA, ecx -|.define RAH, ch -|.define RAL, cl -|.define RB, ebp // Must be ebp (C callee-save). -|.define RC, eax // Must be eax. -|.define RCW, ax -|.define RCH, ah -|.define RCL, al -|.define OP, RB -|.define RD, RC -|.define RDW, RCW -|.define RDL, RCL -|.if X64 -|.define RAa, rcx -|.define RBa, rbp -|.define RCa, rax -|.define RDa, rax -|.else -|.define RAa, RA -|.define RBa, RB -|.define RCa, RC -|.define RDa, RD -|.endif -| -|.if not X64 -|.define FCARG1, ecx // x86 fastcall arguments. -|.define FCARG2, edx -|.elif X64WIN -|.define CARG1, rcx // x64/WIN64 C call arguments. -|.define CARG2, rdx -|.define CARG3, r8 -|.define CARG4, r9 -|.define CARG1d, ecx -|.define CARG2d, edx -|.define CARG3d, r8d -|.define CARG4d, r9d -|.define FCARG1, CARG1d // Upwards compatible to x86 fastcall. -|.define FCARG2, CARG2d -|.else -|.define CARG1, rdi // x64/POSIX C call arguments. -|.define CARG2, rsi -|.define CARG3, rdx -|.define CARG4, rcx -|.define CARG5, r8 -|.define CARG6, r9 -|.define CARG1d, edi -|.define CARG2d, esi -|.define CARG3d, edx -|.define CARG4d, ecx -|.define CARG5d, r8d -|.define CARG6d, r9d -|.define FCARG1, CARG1d // Simulate x86 fastcall. -|.define FCARG2, CARG2d -|.endif -| -|// Type definitions. Some of these are only used for documentation. -|.type L, lua_State -|.type GL, global_State -|.type TVALUE, TValue -|.type GCOBJ, GCobj -|.type STR, GCstr -|.type TAB, GCtab -|.type LFUNC, GCfuncL -|.type CFUNC, GCfuncC -|.type PROTO, GCproto -|.type UPVAL, GCupval -|.type NODE, Node -|.type NARGS, int -|.type TRACE, GCtrace -|.type SBUF, SBuf -| -|// Stack layout while in interpreter. Must match with lj_frame.h. -|//----------------------------------------------------------------------- -|.if not X64 // x86 stack layout. -| -|.if WIN -| -|.define CFRAME_SPACE, aword*9 // Delta for esp (see <--). -|.macro saveregs_ -| push edi; push esi; push ebx -| push extern lj_err_unwind_win -| fs; push dword [0] -| fs; mov [0], esp -| sub esp, CFRAME_SPACE -|.endmacro -|.macro restoreregs -| add esp, CFRAME_SPACE -| fs; pop dword [0] -| pop edi // Short for esp += 4. -| pop ebx; pop esi; pop edi; pop ebp -|.endmacro -| -|.else -| -|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--). -|.macro saveregs_ -| push edi; push esi; push ebx -| sub esp, CFRAME_SPACE -|.endmacro -|.macro restoreregs -| add esp, CFRAME_SPACE -| pop ebx; pop esi; pop edi; pop ebp -|.endmacro -| -|.endif -| -|.macro saveregs -| push ebp; saveregs_ -|.endmacro -| -|.if WIN -|.define SAVE_ERRF, aword [esp+aword*19] // vm_pcall/vm_cpcall only. -|.define SAVE_NRES, aword [esp+aword*18] -|.define SAVE_CFRAME, aword [esp+aword*17] -|.define SAVE_L, aword [esp+aword*16] -|//----- 16 byte aligned, ^^^ arguments from C caller -|.define SAVE_RET, aword [esp+aword*15] //<-- esp entering interpreter. -|.define SAVE_R4, aword [esp+aword*14] -|.define SAVE_R3, aword [esp+aword*13] -|.define SAVE_R2, aword [esp+aword*12] -|//----- 16 byte aligned -|.define SAVE_R1, aword [esp+aword*11] -|.define SEH_FUNC, aword [esp+aword*10] -|.define SEH_NEXT, aword [esp+aword*9] //<-- esp after register saves. -|.define UNUSED2, aword [esp+aword*8] -|//----- 16 byte aligned -|.define UNUSED1, aword [esp+aword*7] -|.define SAVE_PC, aword [esp+aword*6] -|.define TMP2, aword [esp+aword*5] -|.define TMP1, aword [esp+aword*4] -|//----- 16 byte aligned -|.define ARG4, aword [esp+aword*3] -|.define ARG3, aword [esp+aword*2] -|.define ARG2, aword [esp+aword*1] -|.define ARG1, aword [esp] //<-- esp while in interpreter. -|//----- 16 byte aligned, ^^^ arguments for C callee -|.else -|.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only. -|.define SAVE_NRES, aword [esp+aword*14] -|.define SAVE_CFRAME, aword [esp+aword*13] -|.define SAVE_L, aword [esp+aword*12] -|//----- 16 byte aligned, ^^^ arguments from C caller -|.define SAVE_RET, aword [esp+aword*11] //<-- esp entering interpreter. -|.define SAVE_R4, aword [esp+aword*10] -|.define SAVE_R3, aword [esp+aword*9] -|.define SAVE_R2, aword [esp+aword*8] -|//----- 16 byte aligned -|.define SAVE_R1, aword [esp+aword*7] //<-- esp after register saves. -|.define SAVE_PC, aword [esp+aword*6] -|.define TMP2, aword [esp+aword*5] -|.define TMP1, aword [esp+aword*4] -|//----- 16 byte aligned -|.define ARG4, aword [esp+aword*3] -|.define ARG3, aword [esp+aword*2] -|.define ARG2, aword [esp+aword*1] -|.define ARG1, aword [esp] //<-- esp while in interpreter. -|//----- 16 byte aligned, ^^^ arguments for C callee -|.endif -| -|// FPARGx overlaps ARGx and ARG(x+1) on x86. -|.define FPARG3, qword [esp+qword*1] -|.define FPARG1, qword [esp] -|// TMPQ overlaps TMP1/TMP2. ARG5/MULTRES overlap TMP1/TMP2 (and TMPQ). -|.define TMPQ, qword [esp+aword*4] -|.define TMP3, ARG4 -|.define ARG5, TMP1 -|.define TMPa, TMP1 -|.define MULTRES, TMP2 -| -|// Arguments for vm_call and vm_pcall. -|.define INARG_BASE, SAVE_CFRAME // Overwritten by SAVE_CFRAME! -| -|// Arguments for vm_cpcall. -|.define INARG_CP_CALL, SAVE_ERRF -|.define INARG_CP_UD, SAVE_NRES -|.define INARG_CP_FUNC, SAVE_CFRAME -| -|//----------------------------------------------------------------------- -|.elif X64WIN // x64/Windows stack layout -| -|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). -|.macro saveregs_ -| push rdi; push rsi; push rbx -| sub rsp, CFRAME_SPACE -|.endmacro -|.macro saveregs -| push rbp; saveregs_ -|.endmacro -|.macro restoreregs -| add rsp, CFRAME_SPACE -| pop rbx; pop rsi; pop rdi; pop rbp -|.endmacro -| -|.define SAVE_CFRAME, aword [rsp+aword*13] -|.define SAVE_PC, dword [rsp+dword*25] -|.define SAVE_L, dword [rsp+dword*24] -|.define SAVE_ERRF, dword [rsp+dword*23] -|.define SAVE_NRES, dword [rsp+dword*22] -|.define TMP2, dword [rsp+dword*21] -|.define TMP1, dword [rsp+dword*20] -|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter -|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter. -|.define SAVE_R4, aword [rsp+aword*8] -|.define SAVE_R3, aword [rsp+aword*7] -|.define SAVE_R2, aword [rsp+aword*6] -|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves. -|.define ARG5, aword [rsp+aword*4] -|.define CSAVE_4, aword [rsp+aword*3] -|.define CSAVE_3, aword [rsp+aword*2] -|.define CSAVE_2, aword [rsp+aword*1] -|.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter. -|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee -| -|// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ). -|.define TMPQ, qword [rsp+aword*10] -|.define MULTRES, TMP2 -|.define TMPa, ARG5 -|.define ARG5d, dword [rsp+aword*4] -|.define TMP3, ARG5d -| -|//----------------------------------------------------------------------- -|.else // x64/POSIX stack layout -| -|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). -|.macro saveregs_ -| push rbx; push r15; push r14 -|.if NO_UNWIND -| push r13; push r12 -|.endif -| sub rsp, CFRAME_SPACE -|.endmacro -|.macro saveregs -| push rbp; saveregs_ -|.endmacro -|.macro restoreregs -| add rsp, CFRAME_SPACE -|.if NO_UNWIND -| pop r12; pop r13 -|.endif -| pop r14; pop r15; pop rbx; pop rbp -|.endmacro -| -|//----- 16 byte aligned, -|.if NO_UNWIND -|.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter. -|.define SAVE_R4, aword [rsp+aword*10] -|.define SAVE_R3, aword [rsp+aword*9] -|.define SAVE_R2, aword [rsp+aword*8] -|.define SAVE_R1, aword [rsp+aword*7] -|.define SAVE_RU2, aword [rsp+aword*6] -|.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves. -|.else -|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter. -|.define SAVE_R4, aword [rsp+aword*8] -|.define SAVE_R3, aword [rsp+aword*7] -|.define SAVE_R2, aword [rsp+aword*6] -|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves. -|.endif -|.define SAVE_CFRAME, aword [rsp+aword*4] -|.define SAVE_PC, dword [rsp+dword*7] -|.define SAVE_L, dword [rsp+dword*6] -|.define SAVE_ERRF, dword [rsp+dword*5] -|.define SAVE_NRES, dword [rsp+dword*4] -|.define TMPa, aword [rsp+aword*1] -|.define TMP2, dword [rsp+dword*1] -|.define TMP1, dword [rsp] //<-- rsp while in interpreter. -|//----- 16 byte aligned -| -|// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ). -|.define TMPQ, qword [rsp] -|.define TMP3, dword [rsp+aword*1] -|.define MULTRES, TMP2 -| -|.endif -| -|//----------------------------------------------------------------------- -| -|// Instruction headers. -|.macro ins_A; .endmacro -|.macro ins_AD; .endmacro -|.macro ins_AJ; .endmacro -|.macro ins_ABC; movzx RB, RCH; movzx RC, RCL; .endmacro -|.macro ins_AB_; movzx RB, RCH; .endmacro -|.macro ins_A_C; movzx RC, RCL; .endmacro -|.macro ins_AND; not RDa; .endmacro -| -|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster). -|.macro ins_NEXT -| mov RC, [PC] -| movzx RA, RCH -| movzx OP, RCL -| add PC, 4 -| shr RC, 16 -|.if X64 -| jmp aword [DISPATCH+OP*8] -|.else -| jmp aword [DISPATCH+OP*4] -|.endif -|.endmacro -| -|// Instruction footer. -|.if 1 -| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. -| .define ins_next, ins_NEXT -| .define ins_next_, ins_NEXT -|.else -| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. -| // Affects only certain kinds of benchmarks (and only with -j off). -| // Around 10%-30% slower on Core2, a lot more slower on P4. -| .macro ins_next -| jmp ->ins_next -| .endmacro -| .macro ins_next_ -| ->ins_next: -| ins_NEXT -| .endmacro -|.endif -| -|// Call decode and dispatch. -|.macro ins_callt -| // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-4] = PC -| mov PC, LFUNC:RB->pc -| mov RA, [PC] -| movzx OP, RAL -| movzx RA, RAH -| add PC, 4 -|.if X64 -| jmp aword [DISPATCH+OP*8] -|.else -| jmp aword [DISPATCH+OP*4] -|.endif -|.endmacro -| -|.macro ins_call -| // BASE = new base, RB = LFUNC, RD = nargs+1 -| mov [BASE-4], PC -| ins_callt -|.endmacro -| -|//----------------------------------------------------------------------- -| -|// Macros to test operand types. -|.macro checktp, reg, tp; cmp dword [BASE+reg*8+4], tp; .endmacro -|.macro checknum, reg, target; checktp reg, LJ_TISNUM; jae target; .endmacro -|.macro checkint, reg, target; checktp reg, LJ_TISNUM; jne target; .endmacro -|.macro checkstr, reg, target; checktp reg, LJ_TSTR; jne target; .endmacro -|.macro checktab, reg, target; checktp reg, LJ_TTAB; jne target; .endmacro -| -|// These operands must be used with movzx. -|.define PC_OP, byte [PC-4] -|.define PC_RA, byte [PC-3] -|.define PC_RB, byte [PC-1] -|.define PC_RC, byte [PC-2] -|.define PC_RD, word [PC-2] -| -|.macro branchPC, reg -| lea PC, [PC+reg*4-BCBIAS_J*4] -|.endmacro -| -|// Assumes DISPATCH is relative to GL. -#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) -#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) -| -#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) -| -|// Decrement hashed hotcount and trigger trace recorder if zero. -|.macro hotloop, reg -| mov reg, PC -| shr reg, 1 -| and reg, HOTCOUNT_PCMASK -| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP -| jb ->vm_hotloop -|.endmacro -| -|.macro hotcall, reg -| mov reg, PC -| shr reg, 1 -| and reg, HOTCOUNT_PCMASK -| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL -| jb ->vm_hotcall -|.endmacro -| -|// Set current VM state. -|.macro set_vmstate, st -| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st -|.endmacro -| -|// x87 compares. -|.macro fcomparepp // Compare and pop st0 >< st1. -| fucomip st1 -| fpop -|.endmacro -| -|.macro fpop1; fstp st1; .endmacro -| -|// Synthesize SSE FP constants. -|.macro sseconst_abs, reg, tmp // Synthesize abs mask. -|.if X64 -| mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp -|.else -| pxor reg, reg; pcmpeqd reg, reg; psrlq reg, 1 -|.endif -|.endmacro -| -|.macro sseconst_hi, reg, tmp, val // Synthesize hi-32 bit const. -|.if X64 -| mov64 tmp, U64x(val,00000000); movd reg, tmp -|.else -| mov tmp, 0x .. val; movd reg, tmp; pshufd reg, reg, 0x51 -|.endif -|.endmacro -| -|.macro sseconst_sign, reg, tmp // Synthesize sign mask. -| sseconst_hi reg, tmp, 80000000 -|.endmacro -|.macro sseconst_1, reg, tmp // Synthesize 1.0. -| sseconst_hi reg, tmp, 3ff00000 -|.endmacro -|.macro sseconst_m1, reg, tmp // Synthesize -1.0. -| sseconst_hi reg, tmp, bff00000 -|.endmacro -|.macro sseconst_2p52, reg, tmp // Synthesize 2^52. -| sseconst_hi reg, tmp, 43300000 -|.endmacro -|.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51. -| sseconst_hi reg, tmp, 43380000 -|.endmacro -| -|// Move table write barrier back. Overwrites reg. -|.macro barrierback, tab, reg -| and byte tab->marked, (uint8_t)~LJ_GC_BLACK // black2gray(tab) -| mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)] -| mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab -| mov tab->gclist, reg -|.endmacro -| -|//----------------------------------------------------------------------- - -/* Generate subroutines used by opcodes and other parts of the VM. */ -/* The .code_sub section should be last to help static branch prediction. */ -static void build_subroutines(BuildCtx *ctx) -{ - |.code_sub - | - |//----------------------------------------------------------------------- - |//-- Return handling ---------------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_returnp: - | test PC, FRAME_P - | jz ->cont_dispatch - | - | // Return from pcall or xpcall fast func. - | and PC, -8 - | sub BASE, PC // Restore caller base. - | lea RAa, [RA+PC-8] // Rebase RA and prepend one result. - | mov PC, [BASE-4] // Fetch PC of previous frame. - | // Prepending may overwrite the pcall frame, so do it at the end. - | mov dword [BASE+RA+4], LJ_TTRUE // Prepend true to results. - | - |->vm_returnc: - | add RD, 1 // RD = nresults+1 - | jz ->vm_unwind_yield - | mov MULTRES, RD - | test PC, FRAME_TYPE - | jz ->BC_RET_Z // Handle regular return to Lua. - | - |->vm_return: - | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return - | xor PC, FRAME_C - | test PC, FRAME_TYPE - | jnz ->vm_returnp - | - | // Return to C. - | set_vmstate C - | and PC, -8 - | sub PC, BASE - | neg PC // Previous base = BASE - delta. - | - | sub RD, 1 - | jz >2 - |1: // Move results down. - |.if X64 - | mov RBa, [BASE+RA] - | mov [BASE-8], RBa - |.else - | mov RB, [BASE+RA] - | mov [BASE-8], RB - | mov RB, [BASE+RA+4] - | mov [BASE-4], RB - |.endif - | add BASE, 8 - | sub RD, 1 - | jnz <1 - |2: - | mov L:RB, SAVE_L - | mov L:RB->base, PC - |3: - | mov RD, MULTRES - | mov RA, SAVE_NRES // RA = wanted nresults+1 - |4: - | cmp RA, RD - | jne >6 // More/less results wanted? - |5: - | sub BASE, 8 - | mov L:RB->top, BASE - | - |->vm_leave_cp: - | mov RAa, SAVE_CFRAME // Restore previous C frame. - | mov L:RB->cframe, RAa - | xor eax, eax // Ok return status for vm_pcall. - | - |->vm_leave_unw: - | restoreregs - | ret - | - |6: - | jb >7 // Less results wanted? - | // More results wanted. Check stack size and fill up results with nil. - | cmp BASE, L:RB->maxstack - | ja >8 - | mov dword [BASE-4], LJ_TNIL - | add BASE, 8 - | add RD, 1 - | jmp <4 - | - |7: // Less results wanted. - | test RA, RA - | jz <5 // But check for LUA_MULTRET+1. - | sub RA, RD // Negative result! - | lea BASE, [BASE+RA*8] // Correct top. - | jmp <5 - | - |8: // Corner case: need to grow stack for filling up results. - | // This can happen if: - | // - A C function grows the stack (a lot). - | // - The GC shrinks the stack in between. - | // - A return back from a lua_call() with (high) nresults adjustment. - | mov L:RB->top, BASE // Save current top held in BASE (yes). - | mov MULTRES, RD // Need to fill only remainder with nil. - | mov FCARG2, RA - | mov FCARG1, L:RB - | call extern lj_state_growstack@8 // (lua_State *L, int n) - | mov BASE, L:RB->top // Need the (realloced) L->top in BASE. - | jmp <3 - | - |->vm_unwind_yield: - | mov al, LUA_YIELD - | jmp ->vm_unwind_c_eh - | - |->vm_unwind_c@8: // Unwind C stack, return from vm_pcall. - | // (void *cframe, int errcode) - |.if X64 - | mov eax, CARG2d // Error return status for vm_pcall. - | mov rsp, CARG1 - |.else - | mov eax, FCARG2 // Error return status for vm_pcall. - | mov esp, FCARG1 - |.if WIN - | lea FCARG1, SEH_NEXT - | fs; mov [0], FCARG1 - |.endif - |.endif - |->vm_unwind_c_eh: // Landing pad for external unwinder. - | mov L:RB, SAVE_L - | mov GL:RB, L:RB->glref - | mov dword GL:RB->vmstate, ~LJ_VMST_C - | jmp ->vm_leave_unw - | - |->vm_unwind_rethrow: - |.if X64 and not X64WIN - | mov FCARG1, SAVE_L - | mov FCARG2, eax - | restoreregs - | jmp extern lj_err_throw@8 // (lua_State *L, int errcode) - |.endif - | - |->vm_unwind_ff@4: // Unwind C stack, return from ff pcall. - | // (void *cframe) - |.if X64 - | and CARG1, CFRAME_RAWMASK - | mov rsp, CARG1 - |.else - | and FCARG1, CFRAME_RAWMASK - | mov esp, FCARG1 - |.if WIN - | lea FCARG1, SEH_NEXT - | fs; mov [0], FCARG1 - |.endif - |.endif - |->vm_unwind_ff_eh: // Landing pad for external unwinder. - | mov L:RB, SAVE_L - | mov RAa, -8 // Results start at BASE+RA = BASE-8. - | mov RD, 1+1 // Really 1+2 results, incr. later. - | mov BASE, L:RB->base - | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. - | add DISPATCH, GG_G2DISP - | mov PC, [BASE-4] // Fetch PC of previous frame. - | mov dword [BASE-4], LJ_TFALSE // Prepend false to error message. - | set_vmstate INTERP - | jmp ->vm_returnc // Increments RD/MULTRES and returns. - | - |.if WIN and not X64 - |->vm_rtlunwind@16: // Thin layer around RtlUnwind. - | // (void *cframe, void *excptrec, void *unwinder, int errcode) - | mov [esp], FCARG1 // Return value for RtlUnwind. - | push FCARG2 // Exception record for RtlUnwind. - | push 0 // Ignored by RtlUnwind. - | push dword [FCARG1+CFRAME_OFS_SEH] - | call extern RtlUnwind@16 // Violates ABI (clobbers too much). - | mov FCARG1, eax - | mov FCARG2, [esp+4] // errcode (for vm_unwind_c). - | ret // Jump to unwinder. - |.endif - | - |//----------------------------------------------------------------------- - |//-- Grow stack for calls ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_growstack_c: // Grow stack for C function. - | mov FCARG2, LUA_MINSTACK - | jmp >2 - | - |->vm_growstack_v: // Grow stack for vararg Lua function. - | sub RD, 8 - | jmp >1 - | - |->vm_growstack_f: // Grow stack for fixarg Lua function. - | // BASE = new base, RD = nargs+1, RB = L, PC = first PC - | lea RD, [BASE+NARGS:RD*8-8] - |1: - | movzx RA, byte [PC-4+PC2PROTO(framesize)] - | add PC, 4 // Must point after first instruction. - | mov L:RB->base, BASE - | mov L:RB->top, RD - | mov SAVE_PC, PC - | mov FCARG2, RA - |2: - | // RB = L, L->base = new base, L->top = top - | mov FCARG1, L:RB - | call extern lj_state_growstack@8 // (lua_State *L, int n) - | mov BASE, L:RB->base - | mov RD, L:RB->top - | mov LFUNC:RB, [BASE-8] - | sub RD, BASE - | shr RD, 3 - | add NARGS:RD, 1 - | // BASE = new base, RB = LFUNC, RD = nargs+1 - | ins_callt // Just retry the call. - | - |//----------------------------------------------------------------------- - |//-- Entry points into the assembler VM --------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_resume: // Setup C frame and resume thread. - | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) - | saveregs - |.if X64 - | mov L:RB, CARG1d // Caveat: CARG1d may be RA. - | mov SAVE_L, CARG1d - | mov RA, CARG2d - |.else - | mov L:RB, SAVE_L - | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME! - |.endif - | mov PC, FRAME_CP - | xor RD, RD - | lea KBASEa, [esp+CFRAME_RESUME] - | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. - | add DISPATCH, GG_G2DISP - | mov SAVE_PC, RD // Any value outside of bytecode is ok. - | mov SAVE_CFRAME, RDa - |.if X64 - | mov SAVE_NRES, RD - | mov SAVE_ERRF, RD - |.endif - | mov L:RB->cframe, KBASEa - | cmp byte L:RB->status, RDL - | je >2 // Initial resume (like a call). - | - | // Resume after yield (like a return). - | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB - | set_vmstate INTERP - | mov byte L:RB->status, RDL - | mov BASE, L:RB->base - | mov RD, L:RB->top - | sub RD, RA - | shr RD, 3 - | add RD, 1 // RD = nresults+1 - | sub RA, BASE // RA = resultofs - | mov PC, [BASE-4] - | mov MULTRES, RD - | test PC, FRAME_TYPE - | jz ->BC_RET_Z - | jmp ->vm_return - | - |->vm_pcall: // Setup protected C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) - | saveregs - | mov PC, FRAME_CP - |.if X64 - | mov SAVE_ERRF, CARG4d - |.endif - | jmp >1 - | - |->vm_call: // Setup C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1) - | saveregs - | mov PC, FRAME_C - | - |1: // Entry point for vm_pcall above (PC = ftype). - |.if X64 - | mov SAVE_NRES, CARG3d - | mov L:RB, CARG1d // Caveat: CARG1d may be RA. - | mov SAVE_L, CARG1d - | mov RA, CARG2d - |.else - | mov L:RB, SAVE_L - | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME! - |.endif - | - | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. - | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. - | mov SAVE_CFRAME, KBASEa - | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. - | add DISPATCH, GG_G2DISP - |.if X64 - | mov L:RB->cframe, rsp - |.else - | mov L:RB->cframe, esp - |.endif - | - |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype). - | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB - | set_vmstate INTERP - | mov BASE, L:RB->base // BASE = old base (used in vmeta_call). - | add PC, RA - | sub PC, BASE // PC = frame delta + frame type - | - | mov RD, L:RB->top - | sub RD, RA - | shr NARGS:RD, 3 - | add NARGS:RD, 1 // RD = nargs+1 - | - |->vm_call_dispatch: - | mov LFUNC:RB, [RA-8] - | cmp dword [RA-4], LJ_TFUNC - | jne ->vmeta_call // Ensure KBASE defined and != BASE. - | - |->vm_call_dispatch_f: - | mov BASE, RA - | ins_call - | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC - | - |->vm_cpcall: // Setup protected C frame, call C. - | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) - | saveregs - |.if X64 - | mov L:RB, CARG1d // Caveat: CARG1d may be RA. - | mov SAVE_L, CARG1d - |.else - | mov L:RB, SAVE_L - | // Caveat: INARG_CP_* and SAVE_CFRAME/SAVE_NRES/SAVE_ERRF overlap! - | mov RC, INARG_CP_UD // Get args before they are overwritten. - | mov RA, INARG_CP_FUNC - | mov BASE, INARG_CP_CALL - |.endif - | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. - | - | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). - | sub KBASE, L:RB->top - | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. - | mov SAVE_ERRF, 0 // No error function. - | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame. - | add DISPATCH, GG_G2DISP - | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). - | - |.if X64 - | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. - | mov SAVE_CFRAME, KBASEa - | mov L:RB->cframe, rsp - | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB - | - | call CARG4 // (lua_State *L, lua_CFunction func, void *ud) - |.else - | mov ARG3, RC // Have to copy args downwards. - | mov ARG2, RA - | mov ARG1, L:RB - | - | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. - | mov SAVE_CFRAME, KBASE - | mov L:RB->cframe, esp - | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB - | - | call BASE // (lua_State *L, lua_CFunction func, void *ud) - |.endif - | // TValue * (new base) or NULL returned in eax (RC). - | test RC, RC - | jz ->vm_leave_cp // No base? Just remove C frame. - | mov RA, RC - | mov PC, FRAME_CP - | jmp <2 // Else continue with the call. - | - |//----------------------------------------------------------------------- - |//-- Metamethod handling ------------------------------------------------ - |//----------------------------------------------------------------------- - | - |//-- Continuation dispatch ---------------------------------------------- - | - |->cont_dispatch: - | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES) - | add RA, BASE - | and PC, -8 - | mov RB, BASE - | sub BASE, PC // Restore caller BASE. - | mov dword [RA+RD*8-4], LJ_TNIL // Ensure one valid arg. - | mov RC, RA // ... in [RC] - | mov PC, [RB-12] // Restore PC from [cont|PC]. - |.if X64 - | movsxd RAa, dword [RB-16] // May be negative on WIN64 with debug. - |.if FFI - | cmp RA, 1 - | jbe >1 - |.endif - | lea KBASEa, qword [=>0] - | add RAa, KBASEa - |.else - | mov RA, dword [RB-16] - |.if FFI - | cmp RA, 1 - | jbe >1 - |.endif - |.endif - | mov LFUNC:KBASE, [BASE-8] - | mov KBASE, LFUNC:KBASE->pc - | mov KBASE, [KBASE+PC2PROTO(k)] - | // BASE = base, RC = result, RB = meta base - | jmp RAa // Jump to continuation. - | - |.if FFI - |1: - | je ->cont_ffi_callback // cont = 1: return from FFI callback. - | // cont = 0: Tail call from C function. - | sub RB, BASE - | shr RB, 3 - | lea RD, [RB-1] - | jmp ->vm_call_tail - |.endif - | - |->cont_cat: // BASE = base, RC = result, RB = mbase - | movzx RA, PC_RB - | sub RB, 16 - | lea RA, [BASE+RA*8] - | sub RA, RB - | je ->cont_ra - | neg RA - | shr RA, 3 - |.if X64WIN - | mov CARG3d, RA - | mov L:CARG1d, SAVE_L - | mov L:CARG1d->base, BASE - | mov RCa, [RC] - | mov [RB], RCa - | mov CARG2d, RB - |.elif X64 - | mov L:CARG1d, SAVE_L - | mov L:CARG1d->base, BASE - | mov CARG3d, RA - | mov RAa, [RC] - | mov [RB], RAa - | mov CARG2d, RB - |.else - | mov ARG3, RA - | mov RA, [RC+4] - | mov RC, [RC] - | mov [RB+4], RA - | mov [RB], RC - | mov ARG2, RB - |.endif - | jmp ->BC_CAT_Z - | - |//-- Table indexing metamethods ----------------------------------------- - | - |->vmeta_tgets: - | mov TMP1, RC // RC = GCstr * - | mov TMP2, LJ_TSTR - | lea RCa, TMP1 // Store temp. TValue in TMP1/TMP2. - | cmp PC_OP, BC_GGET - | jne >1 - | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. - | mov [RA], TAB:RB // RB = GCtab * - | mov dword [RA+4], LJ_TTAB - | mov RB, RA - | jmp >2 - | - |->vmeta_tgetb: - | movzx RC, PC_RC - |.if DUALNUM - | mov TMP2, LJ_TISNUM - | mov TMP1, RC - |.else - | cvtsi2sd xmm0, RC - | movsd TMPQ, xmm0 - |.endif - | lea RCa, TMPQ // Store temp. TValue in TMPQ. - | jmp >1 - | - |->vmeta_tgetv: - | movzx RC, PC_RC // Reload TValue *k from RC. - | lea RC, [BASE+RC*8] - |1: - | movzx RB, PC_RB // Reload TValue *t from RB. - | lea RB, [BASE+RB*8] - |2: - |.if X64 - | mov L:CARG1d, SAVE_L - | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE. - | mov CARG2d, RB - | mov CARG3, RCa // May be 64 bit ptr to stack. - | mov L:RB, L:CARG1d - |.else - | mov ARG2, RB - | mov L:RB, SAVE_L - | mov ARG3, RC - | mov ARG1, L:RB - | mov L:RB->base, BASE - |.endif - | mov SAVE_PC, PC - | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) - | // TValue * (finished) or NULL (metamethod) returned in eax (RC). - | mov BASE, L:RB->base - | test RC, RC - | jz >3 - |->cont_ra: // BASE = base, RC = result - | movzx RA, PC_RA - |.if X64 - | mov RBa, [RC] - | mov [BASE+RA*8], RBa - |.else - | mov RB, [RC+4] - | mov RC, [RC] - | mov [BASE+RA*8+4], RB - | mov [BASE+RA*8], RC - |.endif - | ins_next - | - |3: // Call __index metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k - | mov RA, L:RB->top - | mov [RA-12], PC // [cont|PC] - | lea PC, [RA+FRAME_CONT] - | sub PC, BASE - | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here. - | mov NARGS:RD, 2+1 // 2 args for func(t, k). - | jmp ->vm_call_dispatch_f - | - |->vmeta_tgetr: - | mov FCARG1, TAB:RB - | mov RB, BASE // Save BASE. - | mov FCARG2, RC // Caveat: FCARG2 == BASE - | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key) - | // cTValue * or NULL returned in eax (RC). - | movzx RA, PC_RA - | mov BASE, RB // Restore BASE. - | test RC, RC - | jnz ->BC_TGETR_Z - | mov dword [BASE+RA*8+4], LJ_TNIL - | jmp ->BC_TGETR2_Z - | - |//----------------------------------------------------------------------- - | - |->vmeta_tsets: - | mov TMP1, RC // RC = GCstr * - | mov TMP2, LJ_TSTR - | lea RCa, TMP1 // Store temp. TValue in TMP1/TMP2. - | cmp PC_OP, BC_GSET - | jne >1 - | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. - | mov [RA], TAB:RB // RB = GCtab * - | mov dword [RA+4], LJ_TTAB - | mov RB, RA - | jmp >2 - | - |->vmeta_tsetb: - | movzx RC, PC_RC - |.if DUALNUM - | mov TMP2, LJ_TISNUM - | mov TMP1, RC - |.else - | cvtsi2sd xmm0, RC - | movsd TMPQ, xmm0 - |.endif - | lea RCa, TMPQ // Store temp. TValue in TMPQ. - | jmp >1 - | - |->vmeta_tsetv: - | movzx RC, PC_RC // Reload TValue *k from RC. - | lea RC, [BASE+RC*8] - |1: - | movzx RB, PC_RB // Reload TValue *t from RB. - | lea RB, [BASE+RB*8] - |2: - |.if X64 - | mov L:CARG1d, SAVE_L - | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE. - | mov CARG2d, RB - | mov CARG3, RCa // May be 64 bit ptr to stack. - | mov L:RB, L:CARG1d - |.else - | mov ARG2, RB - | mov L:RB, SAVE_L - | mov ARG3, RC - | mov ARG1, L:RB - | mov L:RB->base, BASE - |.endif - | mov SAVE_PC, PC - | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) - | // TValue * (finished) or NULL (metamethod) returned in eax (RC). - | mov BASE, L:RB->base - | test RC, RC - | jz >3 - | // NOBARRIER: lj_meta_tset ensures the table is not black. - | movzx RA, PC_RA - |.if X64 - | mov RBa, [BASE+RA*8] - | mov [RC], RBa - |.else - | mov RB, [BASE+RA*8+4] - | mov RA, [BASE+RA*8] - | mov [RC+4], RB - | mov [RC], RA - |.endif - |->cont_nop: // BASE = base, (RC = result) - | ins_next - | - |3: // Call __newindex metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) - | mov RA, L:RB->top - | mov [RA-12], PC // [cont|PC] - | movzx RC, PC_RA - | // Copy value to third argument. - |.if X64 - | mov RBa, [BASE+RC*8] - | mov [RA+16], RBa - |.else - | mov RB, [BASE+RC*8+4] - | mov RC, [BASE+RC*8] - | mov [RA+20], RB - | mov [RA+16], RC - |.endif - | lea PC, [RA+FRAME_CONT] - | sub PC, BASE - | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here. - | mov NARGS:RD, 3+1 // 3 args for func(t, k, v). - | jmp ->vm_call_dispatch_f - | - |->vmeta_tsetr: - |.if X64WIN - | mov L:CARG1d, SAVE_L - | mov CARG3d, RC - | mov L:CARG1d->base, BASE - | xchg CARG2d, TAB:RB // Caveat: CARG2d == BASE. - |.elif X64 - | mov L:CARG1d, SAVE_L - | mov CARG2d, TAB:RB - | mov L:CARG1d->base, BASE - | mov RB, BASE // Save BASE. - | mov CARG3d, RC // Caveat: CARG3d == BASE. - |.else - | mov L:RA, SAVE_L - | mov ARG2, TAB:RB - | mov RB, BASE // Save BASE. - | mov ARG3, RC - | mov ARG1, L:RA - | mov L:RA->base, BASE - |.endif - | mov SAVE_PC, PC - | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) - | // TValue * returned in eax (RC). - | movzx RA, PC_RA - | mov BASE, RB // Restore BASE. - | jmp ->BC_TSETR_Z - | - |//-- Comparison metamethods --------------------------------------------- - | - |->vmeta_comp: - |.if X64 - | mov L:RB, SAVE_L - | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d == BASE. - |.if X64WIN - | lea CARG3d, [BASE+RD*8] - | lea CARG2d, [BASE+RA*8] - |.else - | lea CARG2d, [BASE+RA*8] - | lea CARG3d, [BASE+RD*8] - |.endif - | mov CARG1d, L:RB // Caveat: CARG1d/CARG4d == RA. - | movzx CARG4d, PC_OP - |.else - | movzx RB, PC_OP - | lea RD, [BASE+RD*8] - | lea RA, [BASE+RA*8] - | mov ARG4, RB - | mov L:RB, SAVE_L - | mov ARG3, RD - | mov ARG2, RA - | mov ARG1, L:RB - | mov L:RB->base, BASE - |.endif - | mov SAVE_PC, PC - | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) - | // 0/1 or TValue * (metamethod) returned in eax (RC). - |3: - | mov BASE, L:RB->base - | cmp RC, 1 - | ja ->vmeta_binop - |4: - | lea PC, [PC+4] - | jb >6 - |5: - | movzx RD, PC_RD - | branchPC RD - |6: - | ins_next - | - |->cont_condt: // BASE = base, RC = result - | add PC, 4 - | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is true. - | jb <5 - | jmp <6 - | - |->cont_condf: // BASE = base, RC = result - | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is false. - | jmp <4 - | - |->vmeta_equal: - | sub PC, 4 - |.if X64WIN - | mov CARG3d, RD - | mov CARG4d, RB - | mov L:RB, SAVE_L - | mov L:RB->base, BASE // Caveat: CARG2d == BASE. - | mov CARG2d, RA - | mov CARG1d, L:RB // Caveat: CARG1d == RA. - |.elif X64 - | mov CARG2d, RA - | mov CARG4d, RB // Caveat: CARG4d == RA. - | mov L:RB, SAVE_L - | mov L:RB->base, BASE // Caveat: CARG3d == BASE. - | mov CARG3d, RD - | mov CARG1d, L:RB - |.else - | mov ARG4, RB - | mov L:RB, SAVE_L - | mov ARG3, RD - | mov ARG2, RA - | mov ARG1, L:RB - | mov L:RB->base, BASE - |.endif - | mov SAVE_PC, PC - | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) - | // 0/1 or TValue * (metamethod) returned in eax (RC). - | jmp <3 - | - |->vmeta_equal_cd: - |.if FFI - | sub PC, 4 - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - | mov FCARG1, L:RB - | mov FCARG2, dword [PC-4] - | mov SAVE_PC, PC - | call extern lj_meta_equal_cd@8 // (lua_State *L, BCIns ins) - | // 0/1 or TValue * (metamethod) returned in eax (RC). - | jmp <3 - |.endif - | - |->vmeta_istype: - |.if X64 - | mov L:RB, SAVE_L - | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE. - | mov CARG2d, RA - | movzx CARG3d, PC_RD - | mov L:CARG1d, L:RB - |.else - | movzx RD, PC_RD - | mov ARG2, RA - | mov L:RB, SAVE_L - | mov ARG3, RD - | mov ARG1, L:RB - | mov L:RB->base, BASE - |.endif - | mov SAVE_PC, PC - | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) - | mov BASE, L:RB->base - | jmp <6 - | - |//-- Arithmetic metamethods --------------------------------------------- - | - |->vmeta_arith_vno: - |.if DUALNUM - | movzx RB, PC_RB - |.endif - |->vmeta_arith_vn: - | lea RC, [KBASE+RC*8] - | jmp >1 - | - |->vmeta_arith_nvo: - |.if DUALNUM - | movzx RC, PC_RC - |.endif - |->vmeta_arith_nv: - | lea RC, [KBASE+RC*8] - | lea RB, [BASE+RB*8] - | xchg RB, RC - | jmp >2 - | - |->vmeta_unm: - | lea RC, [BASE+RD*8] - | mov RB, RC - | jmp >2 - | - |->vmeta_arith_vvo: - |.if DUALNUM - | movzx RB, PC_RB - |.endif - |->vmeta_arith_vv: - | lea RC, [BASE+RC*8] - |1: - | lea RB, [BASE+RB*8] - |2: - | lea RA, [BASE+RA*8] - |.if X64WIN - | mov CARG3d, RB - | mov CARG4d, RC - | movzx RC, PC_OP - | mov ARG5d, RC - | mov L:RB, SAVE_L - | mov L:RB->base, BASE // Caveat: CARG2d == BASE. - | mov CARG2d, RA - | mov CARG1d, L:RB // Caveat: CARG1d == RA. - |.elif X64 - | movzx CARG5d, PC_OP - | mov CARG2d, RA - | mov CARG4d, RC // Caveat: CARG4d == RA. - | mov L:CARG1d, SAVE_L - | mov L:CARG1d->base, BASE // Caveat: CARG3d == BASE. - | mov CARG3d, RB - | mov L:RB, L:CARG1d - |.else - | mov ARG3, RB - | mov L:RB, SAVE_L - | mov ARG4, RC - | movzx RC, PC_OP - | mov ARG2, RA - | mov ARG5, RC - | mov ARG1, L:RB - | mov L:RB->base, BASE - |.endif - | mov SAVE_PC, PC - | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) - | // NULL (finished) or TValue * (metamethod) returned in eax (RC). - | mov BASE, L:RB->base - | test RC, RC - | jz ->cont_nop - | - | // Call metamethod for binary op. - |->vmeta_binop: - | // BASE = base, RC = new base, stack = cont/func/o1/o2 - | mov RA, RC - | sub RC, BASE - | mov [RA-12], PC // [cont|PC] - | lea PC, [RC+FRAME_CONT] - | mov NARGS:RD, 2+1 // 2 args for func(o1, o2). - | jmp ->vm_call_dispatch - | - |->vmeta_len: - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - | lea FCARG2, [BASE+RD*8] // Caveat: FCARG2 == BASE - | mov L:FCARG1, L:RB - | mov SAVE_PC, PC - | call extern lj_meta_len@8 // (lua_State *L, TValue *o) - | // NULL (retry) or TValue * (metamethod) returned in eax (RC). - | mov BASE, L:RB->base -#if LJ_52 - | test RC, RC - | jne ->vmeta_binop // Binop call for compatibility. - | movzx RD, PC_RD - | mov TAB:FCARG1, [BASE+RD*8] - | jmp ->BC_LEN_Z -#else - | jmp ->vmeta_binop // Binop call for compatibility. -#endif - | - |//-- Call metamethod ---------------------------------------------------- - | - |->vmeta_call_ra: - | lea RA, [BASE+RA*8+8] - |->vmeta_call: // Resolve and call __call metamethod. - | // BASE = old base, RA = new base, RC = nargs+1, PC = return - | mov TMP2, RA // Save RA, RC for us. - | mov TMP1, NARGS:RD - | sub RA, 8 - |.if X64 - | mov L:RB, SAVE_L - | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE. - | mov CARG2d, RA - | lea CARG3d, [RA+NARGS:RD*8] - | mov CARG1d, L:RB // Caveat: CARG1d may be RA. - |.else - | lea RC, [RA+NARGS:RD*8] - | mov L:RB, SAVE_L - | mov ARG2, RA - | mov ARG3, RC - | mov ARG1, L:RB - | mov L:RB->base, BASE // This is the callers base! - |.endif - | mov SAVE_PC, PC - | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) - | mov BASE, L:RB->base - | mov RA, TMP2 - | mov NARGS:RD, TMP1 - | mov LFUNC:RB, [RA-8] - | add NARGS:RD, 1 - | // This is fragile. L->base must not move, KBASE must always be defined. - | cmp KBASE, BASE // Continue with CALLT if flag set. - | je ->BC_CALLT_Z - | mov BASE, RA - | ins_call // Otherwise call resolved metamethod. - | - |//-- Argument coercion for 'for' statement ------------------------------ - | - |->vmeta_for: - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - | mov FCARG2, RA // Caveat: FCARG2 == BASE - | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA - | mov SAVE_PC, PC - | call extern lj_meta_for@8 // (lua_State *L, TValue *base) - | mov BASE, L:RB->base - | mov RC, [PC-4] - | movzx RA, RCH - | movzx OP, RCL - | shr RC, 16 - |.if X64 - | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Retry FORI or JFORI. - |.else - | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] // Retry FORI or JFORI. - |.endif - | - |//----------------------------------------------------------------------- - |//-- Fast functions ----------------------------------------------------- - |//----------------------------------------------------------------------- - | - |.macro .ffunc, name - |->ff_ .. name: - |.endmacro - | - |.macro .ffunc_1, name - |->ff_ .. name: - | cmp NARGS:RD, 1+1; jb ->fff_fallback - |.endmacro - | - |.macro .ffunc_2, name - |->ff_ .. name: - | cmp NARGS:RD, 2+1; jb ->fff_fallback - |.endmacro - | - |.macro .ffunc_nsse, name, op - | .ffunc_1 name - | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback - | op xmm0, qword [BASE] - |.endmacro - | - |.macro .ffunc_nsse, name - | .ffunc_nsse name, movsd - |.endmacro - | - |.macro .ffunc_nnsse, name - | .ffunc_2 name - | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback - | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback - | movsd xmm0, qword [BASE] - | movsd xmm1, qword [BASE+8] - |.endmacro - | - |.macro .ffunc_nnr, name - | .ffunc_2 name - | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback - | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback - | fld qword [BASE+8] - | fld qword [BASE] - |.endmacro - | - |// Inlined GC threshold check. Caveat: uses label 1. - |.macro ffgccheck - | mov RB, [DISPATCH+DISPATCH_GL(gc.total)] - | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)] - | jb >1 - | call ->fff_gcstep - |1: - |.endmacro - | - |//-- Base library: checks ----------------------------------------------- - | - |.ffunc_1 assert - | mov RB, [BASE+4] - | cmp RB, LJ_TISTRUECOND; jae ->fff_fallback - | mov PC, [BASE-4] - | mov MULTRES, RD - | mov [BASE-4], RB - | mov RB, [BASE] - | mov [BASE-8], RB - | sub RD, 2 - | jz >2 - | mov RA, BASE - |1: - | add RA, 8 - |.if X64 - | mov RBa, [RA] - | mov [RA-8], RBa - |.else - | mov RB, [RA+4] - | mov [RA-4], RB - | mov RB, [RA] - | mov [RA-8], RB - |.endif - | sub RD, 1 - | jnz <1 - |2: - | mov RD, MULTRES - | jmp ->fff_res_ - | - |.ffunc_1 type - | mov RB, [BASE+4] - |.if X64 - | mov RA, RB - | sar RA, 15 - | cmp RA, -2 - | je >3 - |.endif - | mov RC, ~LJ_TNUMX - | not RB - | cmp RC, RB - | cmova RC, RB - |2: - | mov CFUNC:RB, [BASE-8] - | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))] - | mov PC, [BASE-4] - | mov dword [BASE-4], LJ_TSTR - | mov [BASE-8], STR:RC - | jmp ->fff_res1 - |.if X64 - |3: - | mov RC, ~LJ_TLIGHTUD - | jmp <2 - |.endif - | - |//-- Base library: getters and setters --------------------------------- - | - |.ffunc_1 getmetatable - | mov RB, [BASE+4] - | mov PC, [BASE-4] - | cmp RB, LJ_TTAB; jne >6 - |1: // Field metatable must be at same offset for GCtab and GCudata! - | mov TAB:RB, [BASE] - | mov TAB:RB, TAB:RB->metatable - |2: - | test TAB:RB, TAB:RB - | mov dword [BASE-4], LJ_TNIL - | jz ->fff_res1 - | mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+4*(GCROOT_MMNAME+MM_metatable)] - | mov dword [BASE-4], LJ_TTAB // Store metatable as default result. - | mov [BASE-8], TAB:RB - | mov RA, TAB:RB->hmask - | and RA, STR:RC->hash - | imul RA, #NODE - | add NODE:RA, TAB:RB->node - |3: // Rearranged logic, because we expect _not_ to find the key. - | cmp dword NODE:RA->key.it, LJ_TSTR - | jne >4 - | cmp dword NODE:RA->key.gcr, STR:RC - | je >5 - |4: - | mov NODE:RA, NODE:RA->next - | test NODE:RA, NODE:RA - | jnz <3 - | jmp ->fff_res1 // Not found, keep default result. - |5: - | mov RB, [RA+4] - | cmp RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value. - | mov RC, [RA] - | mov [BASE-4], RB // Return value of mt.__metatable. - | mov [BASE-8], RC - | jmp ->fff_res1 - | - |6: - | cmp RB, LJ_TUDATA; je <1 - |.if X64 - | cmp RB, LJ_TNUMX; ja >8 - | cmp RB, LJ_TISNUM; jbe >7 - | mov RB, LJ_TLIGHTUD - | jmp >8 - |7: - |.else - | cmp RB, LJ_TISNUM; ja >8 - |.endif - | mov RB, LJ_TNUMX - |8: - | not RB - | mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(gcroot[GCROOT_BASEMT])] - | jmp <2 - | - |.ffunc_2 setmetatable - | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback - | // Fast path: no mt for table yet and not clearing the mt. - | mov TAB:RB, [BASE] - | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback - | cmp dword [BASE+12], LJ_TTAB; jne ->fff_fallback - | mov TAB:RC, [BASE+8] - | mov TAB:RB->metatable, TAB:RC - | mov PC, [BASE-4] - | mov dword [BASE-4], LJ_TTAB // Return original table. - | mov [BASE-8], TAB:RB - | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) - | jz >1 - | // Possible write barrier. Table is black, but skip iswhite(mt) check. - | barrierback TAB:RB, RC - |1: - | jmp ->fff_res1 - | - |.ffunc_2 rawget - | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback - |.if X64WIN - | mov RB, BASE // Save BASE. - | lea CARG3d, [BASE+8] - | mov CARG2d, [BASE] // Caveat: CARG2d == BASE. - | mov CARG1d, SAVE_L - |.elif X64 - | mov RB, BASE // Save BASE. - | mov CARG2d, [BASE] - | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE. - | mov CARG1d, SAVE_L - |.else - | mov TAB:RD, [BASE] - | mov L:RB, SAVE_L - | mov ARG2, TAB:RD - | mov ARG1, L:RB - | mov RB, BASE // Save BASE. - | add BASE, 8 - | mov ARG3, BASE - |.endif - | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) - | // cTValue * returned in eax (RD). - | mov BASE, RB // Restore BASE. - | // Copy table slot. - |.if X64 - | mov RBa, [RD] - | mov PC, [BASE-4] - | mov [BASE-8], RBa - |.else - | mov RB, [RD] - | mov RD, [RD+4] - | mov PC, [BASE-4] - | mov [BASE-8], RB - | mov [BASE-4], RD - |.endif - | jmp ->fff_res1 - | - |//-- Base library: conversions ------------------------------------------ - | - |.ffunc tonumber - | // Only handles the number case inline (without a base argument). - | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. - | cmp dword [BASE+4], LJ_TISNUM - |.if DUALNUM - | jne >1 - | mov RB, dword [BASE]; jmp ->fff_resi - |1: - | ja ->fff_fallback - |.else - | jae ->fff_fallback - |.endif - | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 - | - |.ffunc_1 tostring - | // Only handles the string or number case inline. - | mov PC, [BASE-4] - | cmp dword [BASE+4], LJ_TSTR; jne >3 - | // A __tostring method in the string base metatable is ignored. - | mov STR:RD, [BASE] - |2: - | mov dword [BASE-4], LJ_TSTR - | mov [BASE-8], STR:RD - | jmp ->fff_res1 - |3: // Handle numbers inline, unless a number base metatable is present. - | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback - | cmp dword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0 - | jne ->fff_fallback - | ffgccheck // Caveat: uses label 1. - | mov L:RB, SAVE_L - | mov L:RB->base, BASE // Add frame since C call can throw. - | mov SAVE_PC, PC // Redundant (but a defined value). - |.if X64 and not X64WIN - | mov FCARG2, BASE // Otherwise: FCARG2 == BASE - |.endif - | mov L:FCARG1, L:RB - |.if DUALNUM - | call extern lj_strfmt_number@8 // (lua_State *L, cTValue *o) - |.else - | call extern lj_strfmt_num@8 // (lua_State *L, lua_Number *np) - |.endif - | // GCstr returned in eax (RD). - | mov BASE, L:RB->base - | jmp <2 - | - |//-- Base library: iterators ------------------------------------------- - | - |.ffunc_1 next - | je >2 // Missing 2nd arg? - |1: - | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback - | mov L:RB, SAVE_L - | mov L:RB->base, BASE // Add frame since C call can throw. - | mov L:RB->top, BASE // Dummy frame length is ok. - | mov PC, [BASE-4] - |.if X64WIN - | lea CARG3d, [BASE+8] - | mov CARG2d, [BASE] // Caveat: CARG2d == BASE. - | mov CARG1d, L:RB - |.elif X64 - | mov CARG2d, [BASE] - | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE. - | mov CARG1d, L:RB - |.else - | mov TAB:RD, [BASE] - | mov ARG2, TAB:RD - | mov ARG1, L:RB - | add BASE, 8 - | mov ARG3, BASE - |.endif - | mov SAVE_PC, PC // Needed for ITERN fallback. - | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) - | // Flag returned in eax (RD). - | mov BASE, L:RB->base - | test RD, RD; jz >3 // End of traversal? - | // Copy key and value to results. - |.if X64 - | mov RBa, [BASE+8] - | mov RDa, [BASE+16] - | mov [BASE-8], RBa - | mov [BASE], RDa - |.else - | mov RB, [BASE+8] - | mov RD, [BASE+12] - | mov [BASE-8], RB - | mov [BASE-4], RD - | mov RB, [BASE+16] - | mov RD, [BASE+20] - | mov [BASE], RB - | mov [BASE+4], RD - |.endif - |->fff_res2: - | mov RD, 1+2 - | jmp ->fff_res - |2: // Set missing 2nd arg to nil. - | mov dword [BASE+12], LJ_TNIL - | jmp <1 - |3: // End of traversal: return nil. - | mov dword [BASE-4], LJ_TNIL - | jmp ->fff_res1 - | - |.ffunc_1 pairs - | mov TAB:RB, [BASE] - | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback -#if LJ_52 - | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback -#endif - | mov CFUNC:RB, [BASE-8] - | mov CFUNC:RD, CFUNC:RB->upvalue[0] - | mov PC, [BASE-4] - | mov dword [BASE-4], LJ_TFUNC - | mov [BASE-8], CFUNC:RD - | mov dword [BASE+12], LJ_TNIL - | mov RD, 1+3 - | jmp ->fff_res - | - |.ffunc_2 ipairs_aux - | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback - | cmp dword [BASE+12], LJ_TISNUM - |.if DUALNUM - | jne ->fff_fallback - |.else - | jae ->fff_fallback - |.endif - | mov PC, [BASE-4] - |.if DUALNUM - | mov RD, dword [BASE+8] - | add RD, 1 - | mov dword [BASE-4], LJ_TISNUM - | mov dword [BASE-8], RD - |.else - | movsd xmm0, qword [BASE+8] - | sseconst_1 xmm1, RBa - | addsd xmm0, xmm1 - | cvttsd2si RD, xmm0 - | movsd qword [BASE-8], xmm0 - |.endif - | mov TAB:RB, [BASE] - | cmp RD, TAB:RB->asize; jae >2 // Not in array part? - | shl RD, 3 - | add RD, TAB:RB->array - |1: - | cmp dword [RD+4], LJ_TNIL; je ->fff_res0 - | // Copy array slot. - |.if X64 - | mov RBa, [RD] - | mov [BASE], RBa - |.else - | mov RB, [RD] - | mov RD, [RD+4] - | mov [BASE], RB - | mov [BASE+4], RD - |.endif - | jmp ->fff_res2 - |2: // Check for empty hash part first. Otherwise call C function. - | cmp dword TAB:RB->hmask, 0; je ->fff_res0 - | mov FCARG1, TAB:RB - | mov RB, BASE // Save BASE. - | mov FCARG2, RD // Caveat: FCARG2 == BASE - | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key) - | // cTValue * or NULL returned in eax (RD). - | mov BASE, RB - | test RD, RD - | jnz <1 - |->fff_res0: - | mov RD, 1+0 - | jmp ->fff_res - | - |.ffunc_1 ipairs - | mov TAB:RB, [BASE] - | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback -#if LJ_52 - | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback -#endif - | mov CFUNC:RB, [BASE-8] - | mov CFUNC:RD, CFUNC:RB->upvalue[0] - | mov PC, [BASE-4] - | mov dword [BASE-4], LJ_TFUNC - | mov [BASE-8], CFUNC:RD - |.if DUALNUM - | mov dword [BASE+12], LJ_TISNUM - | mov dword [BASE+8], 0 - |.else - | xorps xmm0, xmm0 - | movsd qword [BASE+8], xmm0 - |.endif - | mov RD, 1+3 - | jmp ->fff_res - | - |//-- Base library: catch errors ---------------------------------------- - | - |.ffunc_1 pcall - | lea RA, [BASE+8] - | sub NARGS:RD, 1 - | mov PC, 8+FRAME_PCALL - |1: - | movzx RB, byte [DISPATCH+DISPATCH_GL(hookmask)] - | shr RB, HOOK_ACTIVE_SHIFT - | and RB, 1 - | add PC, RB // Remember active hook before pcall. - | jmp ->vm_call_dispatch - | - |.ffunc_2 xpcall - | cmp dword [BASE+12], LJ_TFUNC; jne ->fff_fallback - | mov RB, [BASE+4] // Swap function and traceback. - | mov [BASE+12], RB - | mov dword [BASE+4], LJ_TFUNC - | mov LFUNC:RB, [BASE] - | mov PC, [BASE+8] - | mov [BASE+8], LFUNC:RB - | mov [BASE], PC - | lea RA, [BASE+16] - | sub NARGS:RD, 2 - | mov PC, 16+FRAME_PCALL - | jmp <1 - | - |//-- Coroutine library -------------------------------------------------- - | - |.macro coroutine_resume_wrap, resume - |.if resume - |.ffunc_1 coroutine_resume - | mov L:RB, [BASE] - |.else - |.ffunc coroutine_wrap_aux - | mov CFUNC:RB, [BASE-8] - | mov L:RB, CFUNC:RB->upvalue[0].gcr - |.endif - | mov PC, [BASE-4] - | mov SAVE_PC, PC - |.if X64 - | mov TMP1, L:RB - |.else - | mov ARG1, L:RB - |.endif - |.if resume - | cmp dword [BASE+4], LJ_TTHREAD; jne ->fff_fallback - |.endif - | cmp aword L:RB->cframe, 0; jne ->fff_fallback - | cmp byte L:RB->status, LUA_YIELD; ja ->fff_fallback - | mov RA, L:RB->top - | je >1 // Status != LUA_YIELD (i.e. 0)? - | cmp RA, L:RB->base // Check for presence of initial func. - | je ->fff_fallback - |1: - |.if resume - | lea PC, [RA+NARGS:RD*8-16] // Check stack space (-1-thread). - |.else - | lea PC, [RA+NARGS:RD*8-8] // Check stack space (-1). - |.endif - | cmp PC, L:RB->maxstack; ja ->fff_fallback - | mov L:RB->top, PC - | - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - |.if resume - | add BASE, 8 // Keep resumed thread in stack for GC. - |.endif - | mov L:RB->top, BASE - |.if resume - | lea RB, [BASE+NARGS:RD*8-24] // RB = end of source for stack move. - |.else - | lea RB, [BASE+NARGS:RD*8-16] // RB = end of source for stack move. - |.endif - | sub RBa, PCa // Relative to PC. - | - | cmp PC, RA - | je >3 - |2: // Move args to coroutine. - |.if X64 - | mov RCa, [PC+RB] - | mov [PC-8], RCa - |.else - | mov RC, [PC+RB+4] - | mov [PC-4], RC - | mov RC, [PC+RB] - | mov [PC-8], RC - |.endif - | sub PC, 8 - | cmp PC, RA - | jne <2 - |3: - |.if X64 - | mov CARG2d, RA - | mov CARG1d, TMP1 - |.else - | mov ARG2, RA - | xor RA, RA - | mov ARG4, RA - | mov ARG3, RA - |.endif - | call ->vm_resume // (lua_State *L, TValue *base, 0, 0) - | - | mov L:RB, SAVE_L - |.if X64 - | mov L:PC, TMP1 - |.else - | mov L:PC, ARG1 // The callee doesn't modify SAVE_L. - |.endif - | mov BASE, L:RB->base - | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB - | set_vmstate INTERP - | - | cmp eax, LUA_YIELD - | ja >8 - |4: - | mov RA, L:PC->base - | mov KBASE, L:PC->top - | mov L:PC->top, RA // Clear coroutine stack. - | mov PC, KBASE - | sub PC, RA - | je >6 // No results? - | lea RD, [BASE+PC] - | shr PC, 3 - | cmp RD, L:RB->maxstack - | ja >9 // Need to grow stack? - | - | mov RB, BASE - | sub RBa, RAa - |5: // Move results from coroutine. - |.if X64 - | mov RDa, [RA] - | mov [RA+RB], RDa - |.else - | mov RD, [RA] - | mov [RA+RB], RD - | mov RD, [RA+4] - | mov [RA+RB+4], RD - |.endif - | add RA, 8 - | cmp RA, KBASE - | jne <5 - |6: - |.if resume - | lea RD, [PC+2] // nresults+1 = 1 + true + results. - | mov dword [BASE-4], LJ_TTRUE // Prepend true to results. - |.else - | lea RD, [PC+1] // nresults+1 = 1 + results. - |.endif - |7: - | mov PC, SAVE_PC - | mov MULTRES, RD - |.if resume - | mov RAa, -8 - |.else - | xor RA, RA - |.endif - | test PC, FRAME_TYPE - | jz ->BC_RET_Z - | jmp ->vm_return - | - |8: // Coroutine returned with error (at co->top-1). - |.if resume - | mov dword [BASE-4], LJ_TFALSE // Prepend false to results. - | mov RA, L:PC->top - | sub RA, 8 - | mov L:PC->top, RA // Clear error from coroutine stack. - | // Copy error message. - |.if X64 - | mov RDa, [RA] - | mov [BASE], RDa - |.else - | mov RD, [RA] - | mov [BASE], RD - | mov RD, [RA+4] - | mov [BASE+4], RD - |.endif - | mov RD, 1+2 // nresults+1 = 1 + false + error. - | jmp <7 - |.else - | mov FCARG2, L:PC - | mov FCARG1, L:RB - | call extern lj_ffh_coroutine_wrap_err@8 // (lua_State *L, lua_State *co) - | // Error function does not return. - |.endif - | - |9: // Handle stack expansion on return from yield. - |.if X64 - | mov L:RA, TMP1 - |.else - | mov L:RA, ARG1 // The callee doesn't modify SAVE_L. - |.endif - | mov L:RA->top, KBASE // Undo coroutine stack clearing. - | mov FCARG2, PC - | mov FCARG1, L:RB - | call extern lj_state_growstack@8 // (lua_State *L, int n) - |.if X64 - | mov L:PC, TMP1 - |.else - | mov L:PC, ARG1 - |.endif - | mov BASE, L:RB->base - | jmp <4 // Retry the stack move. - |.endmacro - | - | coroutine_resume_wrap 1 // coroutine.resume - | coroutine_resume_wrap 0 // coroutine.wrap - | - |.ffunc coroutine_yield - | mov L:RB, SAVE_L - | test aword L:RB->cframe, CFRAME_RESUME - | jz ->fff_fallback - | mov L:RB->base, BASE - | lea RD, [BASE+NARGS:RD*8-8] - | mov L:RB->top, RD - | xor RD, RD - | mov aword L:RB->cframe, RDa - | mov al, LUA_YIELD - | mov byte L:RB->status, al - | jmp ->vm_leave_unw - | - |//-- Math library ------------------------------------------------------- - | - |.if not DUALNUM - |->fff_resi: // Dummy. - |.endif - | - |->fff_resn: - | mov PC, [BASE-4] - | fstp qword [BASE-8] - | jmp ->fff_res1 - | - | .ffunc_1 math_abs - |.if DUALNUM - | cmp dword [BASE+4], LJ_TISNUM; jne >2 - | mov RB, dword [BASE] - | cmp RB, 0; jns ->fff_resi - | neg RB; js >1 - |->fff_resbit: - |->fff_resi: - | mov PC, [BASE-4] - | mov dword [BASE-4], LJ_TISNUM - | mov dword [BASE-8], RB - | jmp ->fff_res1 - |1: - | mov PC, [BASE-4] - | mov dword [BASE-4], 0x41e00000 // 2^31. - | mov dword [BASE-8], 0 - | jmp ->fff_res1 - |2: - | ja ->fff_fallback - |.else - | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback - |.endif - | movsd xmm0, qword [BASE] - | sseconst_abs xmm1, RDa - | andps xmm0, xmm1 - |->fff_resxmm0: - | mov PC, [BASE-4] - | movsd qword [BASE-8], xmm0 - | // fallthrough - | - |->fff_res1: - | mov RD, 1+1 - |->fff_res: - | mov MULTRES, RD - |->fff_res_: - | test PC, FRAME_TYPE - | jnz >7 - |5: - | cmp PC_RB, RDL // More results expected? - | ja >6 - | // Adjust BASE. KBASE is assumed to be set for the calling frame. - | movzx RA, PC_RA - | not RAa // Note: ~RA = -(RA+1) - | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8 - | ins_next - | - |6: // Fill up results with nil. - | mov dword [BASE+RD*8-12], LJ_TNIL - | add RD, 1 - | jmp <5 - | - |7: // Non-standard return case. - | mov RAa, -8 // Results start at BASE+RA = BASE-8. - | jmp ->vm_return - | - |.if X64 - |.define fff_resfp, fff_resxmm0 - |.else - |.define fff_resfp, fff_resn - |.endif - | - |.macro math_round, func - | .ffunc math_ .. func - |.if DUALNUM - | cmp dword [BASE+4], LJ_TISNUM; jne >1 - | mov RB, dword [BASE]; jmp ->fff_resi - |1: - | ja ->fff_fallback - |.else - | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback - |.endif - | movsd xmm0, qword [BASE] - | call ->vm_ .. func .. _sse - |.if DUALNUM - | cvttsd2si RB, xmm0 - | cmp RB, 0x80000000 - | jne ->fff_resi - | cvtsi2sd xmm1, RB - | ucomisd xmm0, xmm1 - | jp ->fff_resxmm0 - | je ->fff_resi - |.endif - | jmp ->fff_resxmm0 - |.endmacro - | - | math_round floor - | math_round ceil - | - |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 - | - |.ffunc math_log - | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. - | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback - | movsd xmm0, qword [BASE] - |.if not X64 - | movsd FPARG1, xmm0 - |.endif - | mov RB, BASE - | call extern log - | mov BASE, RB - | jmp ->fff_resfp - | - |.macro math_extern, func - | .ffunc_nsse math_ .. func - |.if not X64 - | movsd FPARG1, xmm0 - |.endif - | mov RB, BASE - | call extern func - | mov BASE, RB - | jmp ->fff_resfp - |.endmacro - | - |.macro math_extern2, func - | .ffunc_nnsse math_ .. func - |.if not X64 - | movsd FPARG1, xmm0 - | movsd FPARG3, xmm1 - |.endif - | mov RB, BASE - | call extern func - | mov BASE, RB - | jmp ->fff_resfp - |.endmacro - | - | math_extern log10 - | math_extern exp - | math_extern sin - | math_extern cos - | math_extern tan - | math_extern asin - | math_extern acos - | math_extern atan - | math_extern sinh - | math_extern cosh - | math_extern tanh - | math_extern2 pow - | math_extern2 atan2 - | math_extern2 fmod - | - |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn - | - |.ffunc_1 math_frexp - | mov RB, [BASE+4] - | cmp RB, LJ_TISNUM; jae ->fff_fallback - | mov PC, [BASE-4] - | mov RC, [BASE] - | mov [BASE-4], RB; mov [BASE-8], RC - | shl RB, 1; cmp RB, 0xffe00000; jae >3 - | or RC, RB; jz >3 - | mov RC, 1022 - | cmp RB, 0x00200000; jb >4 - |1: - | shr RB, 21; sub RB, RC // Extract and unbias exponent. - | cvtsi2sd xmm0, RB - | mov RB, [BASE-4] - | and RB, 0x800fffff // Mask off exponent. - | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. - | mov [BASE-4], RB - |2: - | movsd qword [BASE], xmm0 - | mov RD, 1+2 - | jmp ->fff_res - |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. - | xorps xmm0, xmm0; jmp <2 - |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. - | movsd xmm0, qword [BASE] - | sseconst_hi xmm1, RBa, 43500000 // 2^54. - | mulsd xmm0, xmm1 - | movsd qword [BASE-8], xmm0 - | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 - | - |.ffunc_nsse math_modf - | mov RB, [BASE+4] - | mov PC, [BASE-4] - | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? - | movaps xmm4, xmm0 - | call ->vm_trunc_sse - | subsd xmm4, xmm0 - |1: - | movsd qword [BASE-8], xmm0 - | movsd qword [BASE], xmm4 - | mov RC, [BASE-4]; mov RB, [BASE+4] - | xor RC, RB; js >3 // Need to adjust sign? - |2: - | mov RD, 1+2 - | jmp ->fff_res - |3: - | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. - | jmp <2 - |4: - | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. - | - |.macro math_minmax, name, cmovop, sseop - | .ffunc name - | mov RA, 2 - | cmp dword [BASE+4], LJ_TISNUM - |.if DUALNUM - | jne >4 - | mov RB, dword [BASE] - |1: // Handle integers. - | cmp RA, RD; jae ->fff_resi - | cmp dword [BASE+RA*8-4], LJ_TISNUM; jne >3 - | cmp RB, dword [BASE+RA*8-8] - | cmovop RB, dword [BASE+RA*8-8] - | add RA, 1 - | jmp <1 - |3: - | ja ->fff_fallback - | // Convert intermediate result to number and continue below. - | cvtsi2sd xmm0, RB - | jmp >6 - |4: - | ja ->fff_fallback - |.else - | jae ->fff_fallback - |.endif - | - | movsd xmm0, qword [BASE] - |5: // Handle numbers or integers. - | cmp RA, RD; jae ->fff_resxmm0 - | cmp dword [BASE+RA*8-4], LJ_TISNUM - |.if DUALNUM - | jb >6 - | ja ->fff_fallback - | cvtsi2sd xmm1, dword [BASE+RA*8-8] - | jmp >7 - |.else - | jae ->fff_fallback - |.endif - |6: - | movsd xmm1, qword [BASE+RA*8-8] - |7: - | sseop xmm0, xmm1 - | add RA, 1 - | jmp <5 - |.endmacro - | - | math_minmax math_min, cmovg, minsd - | math_minmax math_max, cmovl, maxsd - | - |//-- String library ----------------------------------------------------- - | - |.ffunc string_byte // Only handle the 1-arg case here. - | cmp NARGS:RD, 1+1; jne ->fff_fallback - | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback - | mov STR:RB, [BASE] - | mov PC, [BASE-4] - | cmp dword STR:RB->len, 1 - | jb ->fff_res0 // Return no results for empty string. - | movzx RB, byte STR:RB[1] - |.if DUALNUM - | jmp ->fff_resi - |.else - | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0 - |.endif - | - |.ffunc string_char // Only handle the 1-arg case here. - | ffgccheck - | cmp NARGS:RD, 1+1; jne ->fff_fallback // *Exactly* 1 arg. - | cmp dword [BASE+4], LJ_TISNUM - |.if DUALNUM - | jne ->fff_fallback - | mov RB, dword [BASE] - | cmp RB, 255; ja ->fff_fallback - | mov TMP2, RB - |.else - | jae ->fff_fallback - | cvttsd2si RB, qword [BASE] - | cmp RB, 255; ja ->fff_fallback - | mov TMP2, RB - |.endif - |.if X64 - | mov TMP3, 1 - |.else - | mov ARG3, 1 - |.endif - | lea RDa, TMP2 // Points to stack. Little-endian. - |->fff_newstr: - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - |.if X64 - | mov CARG3d, TMP3 // Zero-extended to size_t. - | mov CARG2, RDa // May be 64 bit ptr to stack. - | mov CARG1d, L:RB - |.else - | mov ARG2, RD - | mov ARG1, L:RB - |.endif - | mov SAVE_PC, PC - | call extern lj_str_new // (lua_State *L, char *str, size_t l) - |->fff_resstr: - | // GCstr * returned in eax (RD). - | mov BASE, L:RB->base - | mov PC, [BASE-4] - | mov dword [BASE-4], LJ_TSTR - | mov [BASE-8], STR:RD - | jmp ->fff_res1 - | - |.ffunc string_sub - | ffgccheck - | mov TMP2, -1 - | cmp NARGS:RD, 1+2; jb ->fff_fallback - | jna >1 - | cmp dword [BASE+20], LJ_TISNUM - |.if DUALNUM - | jne ->fff_fallback - | mov RB, dword [BASE+16] - | mov TMP2, RB - |.else - | jae ->fff_fallback - | cvttsd2si RB, qword [BASE+16] - | mov TMP2, RB - |.endif - |1: - | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback - | cmp dword [BASE+12], LJ_TISNUM - |.if DUALNUM - | jne ->fff_fallback - |.else - | jae ->fff_fallback - |.endif - | mov STR:RB, [BASE] - | mov TMP3, STR:RB - | mov RB, STR:RB->len - |.if DUALNUM - | mov RA, dword [BASE+8] - |.else - | cvttsd2si RA, qword [BASE+8] - |.endif - | mov RC, TMP2 - | cmp RB, RC // len < end? (unsigned compare) - | jb >5 - |2: - | test RA, RA // start <= 0? - | jle >7 - |3: - | mov STR:RB, TMP3 - | sub RC, RA // start > end? - | jl ->fff_emptystr - | lea RB, [STR:RB+RA+#STR-1] - | add RC, 1 - |4: - |.if X64 - | mov TMP3, RC - |.else - | mov ARG3, RC - |.endif - | mov RD, RB - | jmp ->fff_newstr - | - |5: // Negative end or overflow. - | jl >6 - | lea RC, [RC+RB+1] // end = end+(len+1) - | jmp <2 - |6: // Overflow. - | mov RC, RB // end = len - | jmp <2 - | - |7: // Negative start or underflow. - | je >8 - | add RA, RB // start = start+(len+1) - | add RA, 1 - | jg <3 // start > 0? - |8: // Underflow. - | mov RA, 1 // start = 1 - | jmp <3 - | - |->fff_emptystr: // Range underflow. - | xor RC, RC // Zero length. Any ptr in RB is ok. - | jmp <4 - | - |.macro ffstring_op, name - | .ffunc_1 string_ .. name - | ffgccheck - | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback - | mov L:RB, SAVE_L - | lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)] - | mov L:RB->base, BASE - | mov STR:FCARG2, [BASE] // Caveat: FCARG2 == BASE - | mov RC, SBUF:FCARG1->b - | mov SBUF:FCARG1->L, L:RB - | mov SBUF:FCARG1->p, RC - | mov SAVE_PC, PC - | call extern lj_buf_putstr_ .. name .. @8 - | mov FCARG1, eax - | call extern lj_buf_tostr@4 - | jmp ->fff_resstr - |.endmacro - | - |ffstring_op reverse - |ffstring_op lower - |ffstring_op upper - | - |//-- Bit library -------------------------------------------------------- - | - |.macro .ffunc_bit, name, kind, fdef - | fdef name - |.if kind == 2 - | sseconst_tobit xmm1, RBa - |.endif - | cmp dword [BASE+4], LJ_TISNUM - |.if DUALNUM - | jne >1 - | mov RB, dword [BASE] - |.if kind > 0 - | jmp >2 - |.else - | jmp ->fff_resbit - |.endif - |1: - | ja ->fff_fallback - |.else - | jae ->fff_fallback - |.endif - | movsd xmm0, qword [BASE] - |.if kind < 2 - | sseconst_tobit xmm1, RBa - |.endif - | addsd xmm0, xmm1 - | movd RB, xmm0 - |2: - |.endmacro - | - |.macro .ffunc_bit, name, kind - | .ffunc_bit name, kind, .ffunc_1 - |.endmacro - | - |.ffunc_bit bit_tobit, 0 - | jmp ->fff_resbit - | - |.macro .ffunc_bit_op, name, ins - | .ffunc_bit name, 2 - | mov TMP2, NARGS:RD // Save for fallback. - | lea RD, [BASE+NARGS:RD*8-16] - |1: - | cmp RD, BASE - | jbe ->fff_resbit - | cmp dword [RD+4], LJ_TISNUM - |.if DUALNUM - | jne >2 - | ins RB, dword [RD] - | sub RD, 8 - | jmp <1 - |2: - | ja ->fff_fallback_bit_op - |.else - | jae ->fff_fallback_bit_op - |.endif - | movsd xmm0, qword [RD] - | addsd xmm0, xmm1 - | movd RA, xmm0 - | ins RB, RA - | sub RD, 8 - | jmp <1 - |.endmacro - | - |.ffunc_bit_op bit_band, and - |.ffunc_bit_op bit_bor, or - |.ffunc_bit_op bit_bxor, xor - | - |.ffunc_bit bit_bswap, 1 - | bswap RB - | jmp ->fff_resbit - | - |.ffunc_bit bit_bnot, 1 - | not RB - |.if DUALNUM - | jmp ->fff_resbit - |.else - |->fff_resbit: - | cvtsi2sd xmm0, RB - | jmp ->fff_resxmm0 - |.endif - | - |->fff_fallback_bit_op: - | mov NARGS:RD, TMP2 // Restore for fallback - | jmp ->fff_fallback - | - |.macro .ffunc_bit_sh, name, ins - |.if DUALNUM - | .ffunc_bit name, 1, .ffunc_2 - | // Note: no inline conversion from number for 2nd argument! - | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback - | mov RA, dword [BASE+8] - |.else - | .ffunc_nnsse name - | sseconst_tobit xmm2, RBa - | addsd xmm0, xmm2 - | addsd xmm1, xmm2 - | movd RB, xmm0 - | movd RA, xmm1 - |.endif - | ins RB, cl // Assumes RA is ecx. - | jmp ->fff_resbit - |.endmacro - | - |.ffunc_bit_sh bit_lshift, shl - |.ffunc_bit_sh bit_rshift, shr - |.ffunc_bit_sh bit_arshift, sar - |.ffunc_bit_sh bit_rol, rol - |.ffunc_bit_sh bit_ror, ror - | - |//----------------------------------------------------------------------- - | - |->fff_fallback_2: - | mov NARGS:RD, 1+2 // Other args are ignored, anyway. - | jmp ->fff_fallback - |->fff_fallback_1: - | mov NARGS:RD, 1+1 // Other args are ignored, anyway. - |->fff_fallback: // Call fast function fallback handler. - | // BASE = new base, RD = nargs+1 - | mov L:RB, SAVE_L - | mov PC, [BASE-4] // Fallback may overwrite PC. - | mov SAVE_PC, PC // Redundant (but a defined value). - | mov L:RB->base, BASE - | lea RD, [BASE+NARGS:RD*8-8] - | lea RA, [RD+8*LUA_MINSTACK] // Ensure enough space for handler. - | mov L:RB->top, RD - | mov CFUNC:RD, [BASE-8] - | cmp RA, L:RB->maxstack - | ja >5 // Need to grow stack. - |.if X64 - | mov CARG1d, L:RB - |.else - | mov ARG1, L:RB - |.endif - | call aword CFUNC:RD->f // (lua_State *L) - | mov BASE, L:RB->base - | // Either throws an error, or recovers and returns -1, 0 or nresults+1. - | test RD, RD; jg ->fff_res // Returned nresults+1? - |1: - | mov RA, L:RB->top - | sub RA, BASE - | shr RA, 3 - | test RD, RD - | lea NARGS:RD, [RA+1] - | mov LFUNC:RB, [BASE-8] - | jne ->vm_call_tail // Returned -1? - | ins_callt // Returned 0: retry fast path. - | - |// Reconstruct previous base for vmeta_call during tailcall. - |->vm_call_tail: - | mov RA, BASE - | test PC, FRAME_TYPE - | jnz >3 - | movzx RB, PC_RA - | not RBa // Note: ~RB = -(RB+1) - | lea BASE, [BASE+RB*8] // base = base - (RB+1)*8 - | jmp ->vm_call_dispatch // Resolve again for tailcall. - |3: - | mov RB, PC - | and RB, -8 - | sub BASE, RB - | jmp ->vm_call_dispatch // Resolve again for tailcall. - | - |5: // Grow stack for fallback handler. - | mov FCARG2, LUA_MINSTACK - | mov FCARG1, L:RB - | call extern lj_state_growstack@8 // (lua_State *L, int n) - | mov BASE, L:RB->base - | xor RD, RD // Simulate a return 0. - | jmp <1 // Dumb retry (goes through ff first). - | - |->fff_gcstep: // Call GC step function. - | // BASE = new base, RD = nargs+1 - | pop RBa // Must keep stack at same level. - | mov TMPa, RBa // Save return address - | mov L:RB, SAVE_L - | mov SAVE_PC, PC // Redundant (but a defined value). - | mov L:RB->base, BASE - | lea RD, [BASE+NARGS:RD*8-8] - | mov FCARG1, L:RB - | mov L:RB->top, RD - | call extern lj_gc_step@4 // (lua_State *L) - | mov BASE, L:RB->base - | mov RD, L:RB->top - | sub RD, BASE - | shr RD, 3 - | add NARGS:RD, 1 - | mov RBa, TMPa - | push RBa // Restore return address. - | ret - | - |//----------------------------------------------------------------------- - |//-- Special dispatch targets ------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_record: // Dispatch target for recording phase. - |.if JIT - | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)] - | test RDL, HOOK_VMEVENT // No recording while in vmevent. - | jnz >5 - | // Decrement the hookcount for consistency, but always do the call. - | test RDL, HOOK_ACTIVE - | jnz >1 - | test RDL, LUA_MASKLINE|LUA_MASKCOUNT - | jz >1 - | dec dword [DISPATCH+DISPATCH_GL(hookcount)] - | jmp >1 - |.endif - | - |->vm_rethook: // Dispatch target for return hooks. - | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)] - | test RDL, HOOK_ACTIVE // Hook already active? - | jnz >5 - | jmp >1 - | - |->vm_inshook: // Dispatch target for instr/line hooks. - | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)] - | test RDL, HOOK_ACTIVE // Hook already active? - | jnz >5 - | - | test RDL, LUA_MASKLINE|LUA_MASKCOUNT - | jz >5 - | dec dword [DISPATCH+DISPATCH_GL(hookcount)] - | jz >1 - | test RDL, LUA_MASKLINE - | jz >5 - |1: - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - | mov FCARG2, PC // Caveat: FCARG2 == BASE - | mov FCARG1, L:RB - | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. - | call extern lj_dispatch_ins@8 // (lua_State *L, const BCIns *pc) - |3: - | mov BASE, L:RB->base - |4: - | movzx RA, PC_RA - |5: - | movzx OP, PC_OP - | movzx RD, PC_RD - |.if X64 - | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins. - |.else - | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] // Re-dispatch to static ins. - |.endif - | - |->cont_hook: // Continue from hook yield. - | add PC, 4 - | mov RA, [RB-24] - | mov MULTRES, RA // Restore MULTRES for *M ins. - | jmp <4 - | - |->vm_hotloop: // Hot loop counter underflow. - |.if JIT - | mov LFUNC:RB, [BASE-8] // Same as curr_topL(L). - | mov RB, LFUNC:RB->pc - | movzx RD, byte [RB+PC2PROTO(framesize)] - | lea RD, [BASE+RD*8] - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - | mov L:RB->top, RD - | mov FCARG2, PC - | lea FCARG1, [DISPATCH+GG_DISP2J] - | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa - | mov SAVE_PC, PC - | call extern lj_trace_hot@8 // (jit_State *J, const BCIns *pc) - | jmp <3 - |.endif - | - |->vm_callhook: // Dispatch target for call hooks. - | mov SAVE_PC, PC - |.if JIT - | jmp >1 - |.endif - | - |->vm_hotcall: // Hot call counter underflow. - |.if JIT - | mov SAVE_PC, PC - | or PC, 1 // Marker for hot call. - |1: - |.endif - | lea RD, [BASE+NARGS:RD*8-8] - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - | mov L:RB->top, RD - | mov FCARG2, PC - | mov FCARG1, L:RB - | call extern lj_dispatch_call@8 // (lua_State *L, const BCIns *pc) - | // ASMFunction returned in eax/rax (RDa). - | mov SAVE_PC, 0 // Invalidate for subsequent line hook. - |.if JIT - | and PC, -2 - |.endif - | mov BASE, L:RB->base - | mov RAa, RDa - | mov RD, L:RB->top - | sub RD, BASE - | mov RBa, RAa - | movzx RA, PC_RA - | shr RD, 3 - | add NARGS:RD, 1 - | jmp RBa - | - |->cont_stitch: // Trace stitching. - |.if JIT - | // BASE = base, RC = result, RB = mbase - | mov TRACE:RA, [RB-24] // Save previous trace. - | mov TMP1, TRACE:RA - | mov TMP3, DISPATCH // Need one more register. - | mov DISPATCH, MULTRES - | movzx RA, PC_RA - | lea RA, [BASE+RA*8] // Call base. - | sub DISPATCH, 1 - | jz >2 - |1: // Move results down. - |.if X64 - | mov RBa, [RC] - | mov [RA], RBa - |.else - | mov RB, [RC] - | mov [RA], RB - | mov RB, [RC+4] - | mov [RA+4], RB - |.endif - | add RC, 8 - | add RA, 8 - | sub DISPATCH, 1 - | jnz <1 - |2: - | movzx RC, PC_RA - | movzx RB, PC_RB - | add RC, RB - | lea RC, [BASE+RC*8-8] - |3: - | cmp RC, RA - | ja >9 // More results wanted? - | - | mov DISPATCH, TMP3 - | mov TRACE:RD, TMP1 // Get previous trace. - | movzx RB, word TRACE:RD->traceno - | movzx RD, word TRACE:RD->link - | cmp RD, RB - | je ->cont_nop // Blacklisted. - | test RD, RD - | jne =>BC_JLOOP // Jump to stitched trace. - | - | // Stitch a new trace to the previous trace. - | mov [DISPATCH+DISPATCH_J(exitno)], RB - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - | mov FCARG2, PC - | lea FCARG1, [DISPATCH+GG_DISP2J] - | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa - | call extern lj_dispatch_stitch@8 // (jit_State *J, const BCIns *pc) - | mov BASE, L:RB->base - | jmp ->cont_nop - | - |9: // Fill up results with nil. - | mov dword [RA+4], LJ_TNIL - | add RA, 8 - | jmp <3 - |.endif - | - |->vm_profhook: // Dispatch target for profiler hook. -#if LJ_HASPROFILE - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - | mov FCARG2, PC // Caveat: FCARG2 == BASE - | mov FCARG1, L:RB - | call extern lj_dispatch_profile@8 // (lua_State *L, const BCIns *pc) - | mov BASE, L:RB->base - | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. - | sub PC, 4 - | jmp ->cont_nop -#endif - | - |//----------------------------------------------------------------------- - |//-- Trace exit handler ------------------------------------------------- - |//----------------------------------------------------------------------- - | - |// Called from an exit stub with the exit number on the stack. - |// The 16 bit exit number is stored with two (sign-extended) push imm8. - |->vm_exit_handler: - |.if JIT - |.if X64 - | push r13; push r12 - | push r11; push r10; push r9; push r8 - | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp - | push rbx; push rdx; push rcx; push rax - | movzx RC, byte [rbp-8] // Reconstruct exit number. - | mov RCH, byte [rbp-16] - | mov [rbp-8], r15; mov [rbp-16], r14 - |.else - | push ebp; lea ebp, [esp+12]; push ebp - | push ebx; push edx; push ecx; push eax - | movzx RC, byte [ebp-4] // Reconstruct exit number. - | mov RCH, byte [ebp-8] - | mov [ebp-4], edi; mov [ebp-8], esi - |.endif - | // Caveat: DISPATCH is ebx. - | mov DISPATCH, [ebp] - | mov RA, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number. - | set_vmstate EXIT - | mov [DISPATCH+DISPATCH_J(exitno)], RC - | mov [DISPATCH+DISPATCH_J(parent)], RA - |.if X64 - |.if X64WIN - | sub rsp, 16*8+4*8 // Room for SSE regs + save area. - |.else - | sub rsp, 16*8 // Room for SSE regs. - |.endif - | add rbp, -128 - | movsd qword [rbp-8], xmm15; movsd qword [rbp-16], xmm14 - | movsd qword [rbp-24], xmm13; movsd qword [rbp-32], xmm12 - | movsd qword [rbp-40], xmm11; movsd qword [rbp-48], xmm10 - | movsd qword [rbp-56], xmm9; movsd qword [rbp-64], xmm8 - | movsd qword [rbp-72], xmm7; movsd qword [rbp-80], xmm6 - | movsd qword [rbp-88], xmm5; movsd qword [rbp-96], xmm4 - | movsd qword [rbp-104], xmm3; movsd qword [rbp-112], xmm2 - | movsd qword [rbp-120], xmm1; movsd qword [rbp-128], xmm0 - |.else - | sub esp, 8*8+16 // Room for SSE regs + args. - | movsd qword [ebp-40], xmm7; movsd qword [ebp-48], xmm6 - | movsd qword [ebp-56], xmm5; movsd qword [ebp-64], xmm4 - | movsd qword [ebp-72], xmm3; movsd qword [ebp-80], xmm2 - | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0 - |.endif - | // Caveat: RB is ebp. - | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)] - | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] - | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa - | mov L:RB->base, BASE - |.if X64WIN - | lea CARG2, [rsp+4*8] - |.elif X64 - | mov CARG2, rsp - |.else - | lea FCARG2, [esp+16] - |.endif - | lea FCARG1, [DISPATCH+GG_DISP2J] - | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0 - | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex) - | // MULTRES or negated error code returned in eax (RD). - | mov RAa, L:RB->cframe - | and RAa, CFRAME_RAWMASK - |.if X64WIN - | // Reposition stack later. - |.elif X64 - | mov rsp, RAa // Reposition stack to C frame. - |.else - | mov esp, RAa // Reposition stack to C frame. - |.endif - | mov [RAa+CFRAME_OFS_L], L:RB // Set SAVE_L (on-trace resume/yield). - | mov BASE, L:RB->base - | mov PC, [RAa+CFRAME_OFS_PC] // Get SAVE_PC. - |.if X64 - | jmp >1 - |.endif - |.endif - |->vm_exit_interp: - | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set. - |.if JIT - |.if X64 - | // Restore additional callee-save registers only used in compiled code. - |.if X64WIN - | lea RAa, [rsp+9*16+4*8] - |1: - | movdqa xmm15, [RAa-9*16] - | movdqa xmm14, [RAa-8*16] - | movdqa xmm13, [RAa-7*16] - | movdqa xmm12, [RAa-6*16] - | movdqa xmm11, [RAa-5*16] - | movdqa xmm10, [RAa-4*16] - | movdqa xmm9, [RAa-3*16] - | movdqa xmm8, [RAa-2*16] - | movdqa xmm7, [RAa-1*16] - | mov rsp, RAa // Reposition stack to C frame. - | movdqa xmm6, [RAa] - | mov r15, CSAVE_3 - | mov r14, CSAVE_4 - |.else - | add rsp, 16 // Reposition stack to C frame. - |1: - |.endif - | mov r13, TMPa - | mov r12, TMPQ - |.endif - | test RD, RD; js >9 // Check for error from exit. - | mov L:RB, SAVE_L - | mov MULTRES, RD - | mov LFUNC:KBASE, [BASE-8] - | mov KBASE, LFUNC:KBASE->pc - | mov KBASE, [KBASE+PC2PROTO(k)] - | mov L:RB->base, BASE - | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0 - | set_vmstate INTERP - | // Modified copy of ins_next which handles function header dispatch, too. - | mov RC, [PC] - | movzx RA, RCH - | movzx OP, RCL - | add PC, 4 - | shr RC, 16 - | cmp OP, BC_FUNCF // Function header? - | jb >3 - | cmp OP, BC_FUNCC+2 // Fast function? - | jae >4 - |2: - | mov RC, MULTRES // RC/RD holds nres+1. - |3: - |.if X64 - | jmp aword [DISPATCH+OP*8] - |.else - | jmp aword [DISPATCH+OP*4] - |.endif - | - |4: // Check frame below fast function. - | mov RC, [BASE-4] - | test RC, FRAME_TYPE - | jnz <2 // Trace stitching continuation? - | // Otherwise set KBASE for Lua function below fast function. - | movzx RC, byte [RC-3] - | not RCa - | mov LFUNC:KBASE, [BASE+RC*8-8] - | mov KBASE, LFUNC:KBASE->pc - | mov KBASE, [KBASE+PC2PROTO(k)] - | jmp <2 - | - |9: // Rethrow error from the right C frame. - | neg RD - | mov FCARG1, L:RB - | mov FCARG2, RD - | call extern lj_err_throw@8 // (lua_State *L, int errcode) - |.endif - | - |//----------------------------------------------------------------------- - |//-- Math helper functions ---------------------------------------------- - |//----------------------------------------------------------------------- - | - |// FP value rounding. Called by math.floor/math.ceil fast functions - |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. - |.macro vm_round, name, mode, cond - |->name: - |.if not X64 and cond - | movsd xmm0, qword [esp+4] - | call ->name .. _sse - | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg. - | fld qword [esp+4] - | ret - |.endif - | - |->name .. _sse: - | sseconst_abs xmm2, RDa - | sseconst_2p52 xmm3, RDa - | movaps xmm1, xmm0 - | andpd xmm1, xmm2 // |x| - | ucomisd xmm3, xmm1 // No truncation if 2^52 <= |x|. - | jbe >1 - | andnpd xmm2, xmm0 // Isolate sign bit. - |.if mode == 2 // trunc(x)? - | movaps xmm0, xmm1 - | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52 - | subsd xmm1, xmm3 - | sseconst_1 xmm3, RDa - | cmpsd xmm0, xmm1, 1 // |x| < result? - | andpd xmm0, xmm3 - | subsd xmm1, xmm0 // If yes, subtract -1. - | orpd xmm1, xmm2 // Merge sign bit back in. - |.else - | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52 - | subsd xmm1, xmm3 - | orpd xmm1, xmm2 // Merge sign bit back in. - | .if mode == 1 // ceil(x)? - | sseconst_m1 xmm2, RDa // Must subtract -1 to preserve -0. - | cmpsd xmm0, xmm1, 6 // x > result? - | .else // floor(x)? - | sseconst_1 xmm2, RDa - | cmpsd xmm0, xmm1, 1 // x < result? - | .endif - | andpd xmm0, xmm2 - | subsd xmm1, xmm0 // If yes, subtract +-1. - |.endif - | movaps xmm0, xmm1 - |1: - | ret - |.endmacro - | - | vm_round vm_floor, 0, 1 - | vm_round vm_ceil, 1, JIT - | vm_round vm_trunc, 2, JIT - | - |// FP modulo x%y. Called by BC_MOD* and vm_arith. - |->vm_mod: - |// Args in xmm0/xmm1, return value in xmm0. - |// Caveat: xmm0-xmm5 and RC (eax) modified! - | movaps xmm5, xmm0 - | divsd xmm0, xmm1 - | sseconst_abs xmm2, RDa - | sseconst_2p52 xmm3, RDa - | movaps xmm4, xmm0 - | andpd xmm4, xmm2 // |x/y| - | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|. - | jbe >1 - | andnpd xmm2, xmm0 // Isolate sign bit. - | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52 - | subsd xmm4, xmm3 - | orpd xmm4, xmm2 // Merge sign bit back in. - | sseconst_1 xmm2, RDa - | cmpsd xmm0, xmm4, 1 // x/y < result? - | andpd xmm0, xmm2 - | subsd xmm4, xmm0 // If yes, subtract 1.0. - | movaps xmm0, xmm5 - | mulsd xmm1, xmm4 - | subsd xmm0, xmm1 - | ret - |1: - | mulsd xmm1, xmm0 - | movaps xmm0, xmm5 - | subsd xmm0, xmm1 - | ret - | - |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. - |->vm_powi_sse: - | cmp eax, 1; jle >6 // i<=1? - | // Now 1 < (unsigned)i <= 0x80000000. - |1: // Handle leading zeros. - | test eax, 1; jnz >2 - | mulsd xmm0, xmm0 - | shr eax, 1 - | jmp <1 - |2: - | shr eax, 1; jz >5 - | movaps xmm1, xmm0 - |3: // Handle trailing bits. - | mulsd xmm0, xmm0 - | shr eax, 1; jz >4 - | jnc <3 - | mulsd xmm1, xmm0 - | jmp <3 - |4: - | mulsd xmm0, xmm1 - |5: - | ret - |6: - | je <5 // x^1 ==> x - | jb >7 // x^0 ==> 1 - | neg eax - | call <1 - | sseconst_1 xmm1, RDa - | divsd xmm1, xmm0 - | movaps xmm0, xmm1 - | ret - |7: - | sseconst_1 xmm0, RDa - | ret - | - |//----------------------------------------------------------------------- - |//-- Miscellaneous functions -------------------------------------------- - |//----------------------------------------------------------------------- - | - |// int lj_vm_cpuid(uint32_t f, uint32_t res[4]) - |->vm_cpuid: - |.if X64 - | mov eax, CARG1d - | .if X64WIN; push rsi; mov rsi, CARG2; .endif - | push rbx - | xor ecx, ecx - | cpuid - | mov [rsi], eax - | mov [rsi+4], ebx - | mov [rsi+8], ecx - | mov [rsi+12], edx - | pop rbx - | .if X64WIN; pop rsi; .endif - | ret - |.else - | pushfd - | pop edx - | mov ecx, edx - | xor edx, 0x00200000 // Toggle ID bit in flags. - | push edx - | popfd - | pushfd - | pop edx - | xor eax, eax // Zero means no features supported. - | cmp ecx, edx - | jz >1 // No ID toggle means no CPUID support. - | mov eax, [esp+4] // Argument 1 is function number. - | push edi - | push ebx - | xor ecx, ecx - | cpuid - | mov edi, [esp+16] // Argument 2 is result area. - | mov [edi], eax - | mov [edi+4], ebx - | mov [edi+8], ecx - | mov [edi+12], edx - | pop ebx - | pop edi - |1: - | ret - |.endif - | - |//----------------------------------------------------------------------- - |//-- Assertions --------------------------------------------------------- - |//----------------------------------------------------------------------- - | - |->assert_bad_for_arg_type: -#ifdef LUA_USE_ASSERT - | int3 -#endif - | int3 - | - |//----------------------------------------------------------------------- - |//-- FFI helper functions ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |// Handler for callback functions. Callback slot number in ah/al. - |->vm_ffi_callback: - |.if FFI - |.type CTSTATE, CTState, PC - |.if not X64 - | sub esp, 16 // Leave room for SAVE_ERRF etc. - |.endif - | saveregs_ // ebp/rbp already saved. ebp now holds global_State *. - | lea DISPATCH, [ebp+GG_G2DISP] - | mov CTSTATE, GL:ebp->ctype_state - | movzx eax, ax - | mov CTSTATE->cb.slot, eax - |.if X64 - | mov CTSTATE->cb.gpr[0], CARG1 - | mov CTSTATE->cb.gpr[1], CARG2 - | mov CTSTATE->cb.gpr[2], CARG3 - | mov CTSTATE->cb.gpr[3], CARG4 - | movsd qword CTSTATE->cb.fpr[0], xmm0 - | movsd qword CTSTATE->cb.fpr[1], xmm1 - | movsd qword CTSTATE->cb.fpr[2], xmm2 - | movsd qword CTSTATE->cb.fpr[3], xmm3 - |.if X64WIN - | lea rax, [rsp+CFRAME_SIZE+4*8] - |.else - | lea rax, [rsp+CFRAME_SIZE] - | mov CTSTATE->cb.gpr[4], CARG5 - | mov CTSTATE->cb.gpr[5], CARG6 - | movsd qword CTSTATE->cb.fpr[4], xmm4 - | movsd qword CTSTATE->cb.fpr[5], xmm5 - | movsd qword CTSTATE->cb.fpr[6], xmm6 - | movsd qword CTSTATE->cb.fpr[7], xmm7 - |.endif - | mov CTSTATE->cb.stack, rax - | mov CARG2, rsp - |.else - | lea eax, [esp+CFRAME_SIZE+16] - | mov CTSTATE->cb.gpr[0], FCARG1 - | mov CTSTATE->cb.gpr[1], FCARG2 - | mov CTSTATE->cb.stack, eax - | mov FCARG1, [esp+CFRAME_SIZE+12] // Move around misplaced retaddr/ebp. - | mov FCARG2, [esp+CFRAME_SIZE+8] - | mov SAVE_RET, FCARG1 - | mov SAVE_R4, FCARG2 - | mov FCARG2, esp - |.endif - | mov SAVE_PC, CTSTATE // Any value outside of bytecode is ok. - | mov FCARG1, CTSTATE - | call extern lj_ccallback_enter@8 // (CTState *cts, void *cf) - | // lua_State * returned in eax (RD). - | set_vmstate INTERP - | mov BASE, L:RD->base - | mov RD, L:RD->top - | sub RD, BASE - | mov LFUNC:RB, [BASE-8] - | shr RD, 3 - | add RD, 1 - | ins_callt - |.endif - | - |->cont_ffi_callback: // Return from FFI callback. - |.if FFI - | mov L:RA, SAVE_L - | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)] - | mov aword CTSTATE->L, L:RAa - | mov L:RA->base, BASE - | mov L:RA->top, RB - | mov FCARG1, CTSTATE - | mov FCARG2, RC - | call extern lj_ccallback_leave@8 // (CTState *cts, TValue *o) - |.if X64 - | mov rax, CTSTATE->cb.gpr[0] - | movsd xmm0, qword CTSTATE->cb.fpr[0] - | jmp ->vm_leave_unw - |.else - | mov L:RB, SAVE_L - | mov eax, CTSTATE->cb.gpr[0] - | mov edx, CTSTATE->cb.gpr[1] - | cmp dword CTSTATE->cb.gpr[2], 1 - | jb >7 - | je >6 - | fld qword CTSTATE->cb.fpr[0].d - | jmp >7 - |6: - | fld dword CTSTATE->cb.fpr[0].f - |7: - | mov ecx, L:RB->top - | movzx ecx, word [ecx+6] // Get stack adjustment and copy up. - | mov SAVE_L, ecx // Must be one slot above SAVE_RET - | restoreregs - | pop ecx // Move return addr from SAVE_RET. - | add esp, [esp] // Adjust stack. - | add esp, 16 - | push ecx - | ret - |.endif - |.endif - | - |->vm_ffi_call@4: // Call C function via FFI. - | // Caveat: needs special frame unwinding, see below. - |.if FFI - |.if X64 - | .type CCSTATE, CCallState, rbx - | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1 - |.else - | .type CCSTATE, CCallState, ebx - | push ebp; mov ebp, esp; push ebx; mov CCSTATE, FCARG1 - |.endif - | - | // Readjust stack. - |.if X64 - | mov eax, CCSTATE->spadj - | sub rsp, rax - |.else - | sub esp, CCSTATE->spadj - |.if WIN - | mov CCSTATE->spadj, esp - |.endif - |.endif - | - | // Copy stack slots. - | movzx ecx, byte CCSTATE->nsp - | sub ecx, 1 - | js >2 - |1: - |.if X64 - | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)] - | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax - |.else - | mov eax, [CCSTATE+ecx*4+offsetof(CCallState, stack)] - | mov [esp+ecx*4], eax - |.endif - | sub ecx, 1 - | jns <1 - |2: - | - |.if X64 - | movzx eax, byte CCSTATE->nfpr - | mov CARG1, CCSTATE->gpr[0] - | mov CARG2, CCSTATE->gpr[1] - | mov CARG3, CCSTATE->gpr[2] - | mov CARG4, CCSTATE->gpr[3] - |.if not X64WIN - | mov CARG5, CCSTATE->gpr[4] - | mov CARG6, CCSTATE->gpr[5] - |.endif - | test eax, eax; jz >5 - | movaps xmm0, CCSTATE->fpr[0] - | movaps xmm1, CCSTATE->fpr[1] - | movaps xmm2, CCSTATE->fpr[2] - | movaps xmm3, CCSTATE->fpr[3] - |.if not X64WIN - | cmp eax, 4; jbe >5 - | movaps xmm4, CCSTATE->fpr[4] - | movaps xmm5, CCSTATE->fpr[5] - | movaps xmm6, CCSTATE->fpr[6] - | movaps xmm7, CCSTATE->fpr[7] - |.endif - |5: - |.else - | mov FCARG1, CCSTATE->gpr[0] - | mov FCARG2, CCSTATE->gpr[1] - |.endif - | - | call aword CCSTATE->func - | - |.if X64 - | mov CCSTATE->gpr[0], rax - | movaps CCSTATE->fpr[0], xmm0 - |.if not X64WIN - | mov CCSTATE->gpr[1], rdx - | movaps CCSTATE->fpr[1], xmm1 - |.endif - |.else - | mov CCSTATE->gpr[0], eax - | mov CCSTATE->gpr[1], edx - | cmp byte CCSTATE->resx87, 1 - | jb >7 - | je >6 - | fstp qword CCSTATE->fpr[0].d[0] - | jmp >7 - |6: - | fstp dword CCSTATE->fpr[0].f[0] - |7: - |.if WIN - | sub CCSTATE->spadj, esp - |.endif - |.endif - | - |.if X64 - | mov rbx, [rbp-8]; leave; ret - |.else - | mov ebx, [ebp-4]; leave; ret - |.endif - |.endif - |// Note: vm_ffi_call must be the last function in this object file! - | - |//----------------------------------------------------------------------- -} - -/* Generate the code for a single instruction. */ -static void build_ins(BuildCtx *ctx, BCOp op, int defop) -{ - int vk = 0; - |// Note: aligning all instructions does not pay off. - |=>defop: - - switch (op) { - - /* -- Comparison ops ---------------------------------------------------- */ - - /* Remember: all ops branch for a true comparison, fall through otherwise. */ - - |.macro jmp_comp, lt, ge, le, gt, target - ||switch (op) { - ||case BC_ISLT: - | lt target - ||break; - ||case BC_ISGE: - | ge target - ||break; - ||case BC_ISLE: - | le target - ||break; - ||case BC_ISGT: - | gt target - ||break; - ||default: break; /* Shut up GCC. */ - ||} - |.endmacro - - case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: - | // RA = src1, RD = src2, JMP with RD = target - | ins_AD - |.if DUALNUM - | checkint RA, >7 - | checkint RD, >8 - | mov RB, dword [BASE+RA*8] - | add PC, 4 - | cmp RB, dword [BASE+RD*8] - | jmp_comp jge, jl, jg, jle, >9 - |6: - | movzx RD, PC_RD - | branchPC RD - |9: - | ins_next - | - |7: // RA is not an integer. - | ja ->vmeta_comp - | // RA is a number. - | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp - | // RA is a number, RD is an integer. - | cvtsi2sd xmm0, dword [BASE+RD*8] - | jmp >2 - | - |8: // RA is an integer, RD is not an integer. - | ja ->vmeta_comp - | // RA is an integer, RD is a number. - | cvtsi2sd xmm1, dword [BASE+RA*8] - | movsd xmm0, qword [BASE+RD*8] - | add PC, 4 - | ucomisd xmm0, xmm1 - | jmp_comp jbe, ja, jb, jae, <9 - | jmp <6 - |.else - | checknum RA, ->vmeta_comp - | checknum RD, ->vmeta_comp - |.endif - |1: - | movsd xmm0, qword [BASE+RD*8] - |2: - | add PC, 4 - | ucomisd xmm0, qword [BASE+RA*8] - |3: - | // Unordered: all of ZF CF PF set, ordered: PF clear. - | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. - |.if DUALNUM - | jmp_comp jbe, ja, jb, jae, <9 - | jmp <6 - |.else - | jmp_comp jbe, ja, jb, jae, >1 - | movzx RD, PC_RD - | branchPC RD - |1: - | ins_next - |.endif - break; - - case BC_ISEQV: case BC_ISNEV: - vk = op == BC_ISEQV; - | ins_AD // RA = src1, RD = src2, JMP with RD = target - | mov RB, [BASE+RD*8+4] - | add PC, 4 - |.if DUALNUM - | cmp RB, LJ_TISNUM; jne >7 - | checkint RA, >8 - | mov RB, dword [BASE+RD*8] - | cmp RB, dword [BASE+RA*8] - if (vk) { - | jne >9 - } else { - | je >9 - } - | movzx RD, PC_RD - | branchPC RD - |9: - | ins_next - | - |7: // RD is not an integer. - | ja >5 - | // RD is a number. - | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 - | // RD is a number, RA is an integer. - | cvtsi2sd xmm0, dword [BASE+RA*8] - | jmp >2 - | - |8: // RD is an integer, RA is not an integer. - | ja >5 - | // RD is an integer, RA is a number. - | cvtsi2sd xmm0, dword [BASE+RD*8] - | ucomisd xmm0, qword [BASE+RA*8] - | jmp >4 - | - |.else - | cmp RB, LJ_TISNUM; jae >5 - | checknum RA, >5 - |.endif - |1: - | movsd xmm0, qword [BASE+RA*8] - |2: - | ucomisd xmm0, qword [BASE+RD*8] - |4: - iseqne_fp: - if (vk) { - | jp >2 // Unordered means not equal. - | jne >2 - } else { - | jp >2 // Unordered means not equal. - | je >1 - } - iseqne_end: - if (vk) { - |1: // EQ: Branch to the target. - | movzx RD, PC_RD - | branchPC RD - |2: // NE: Fallthrough to next instruction. - |.if not FFI - |3: - |.endif - } else { - |.if not FFI - |3: - |.endif - |2: // NE: Branch to the target. - | movzx RD, PC_RD - | branchPC RD - |1: // EQ: Fallthrough to next instruction. - } - if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV || - op == BC_ISEQN || op == BC_ISNEN)) { - | jmp <9 - } else { - | ins_next - } - | - if (op == BC_ISEQV || op == BC_ISNEV) { - |5: // Either or both types are not numbers. - |.if FFI - | cmp RB, LJ_TCDATA; je ->vmeta_equal_cd - | checktp RA, LJ_TCDATA; je ->vmeta_equal_cd - |.endif - | checktp RA, RB // Compare types. - | jne <2 // Not the same type? - | cmp RB, LJ_TISPRI - | jae <1 // Same type and primitive type? - | - | // Same types and not a primitive type. Compare GCobj or pvalue. - | mov RA, [BASE+RA*8] - | mov RD, [BASE+RD*8] - | cmp RA, RD - | je <1 // Same GCobjs or pvalues? - | cmp RB, LJ_TISTABUD - | ja <2 // Different objects and not table/ud? - |.if X64 - | cmp RB, LJ_TUDATA // And not 64 bit lightuserdata. - | jb <2 - |.endif - | - | // Different tables or userdatas. Need to check __eq metamethod. - | // Field metatable must be at same offset for GCtab and GCudata! - | mov TAB:RB, TAB:RA->metatable - | test TAB:RB, TAB:RB - | jz <2 // No metatable? - | test byte TAB:RB->nomm, 1<vmeta_equal // Handle __eq metamethod. - } else { - |.if FFI - |3: - | cmp RB, LJ_TCDATA - if (LJ_DUALNUM && vk) { - | jne <9 - } else { - | jne <2 - } - | jmp ->vmeta_equal_cd - |.endif - } - break; - case BC_ISEQS: case BC_ISNES: - vk = op == BC_ISEQS; - | ins_AND // RA = src, RD = str const, JMP with RD = target - | mov RB, [BASE+RA*8+4] - | add PC, 4 - | cmp RB, LJ_TSTR; jne >3 - | mov RA, [BASE+RA*8] - | cmp RA, [KBASE+RD*4] - iseqne_test: - if (vk) { - | jne >2 - } else { - | je >1 - } - goto iseqne_end; - case BC_ISEQN: case BC_ISNEN: - vk = op == BC_ISEQN; - | ins_AD // RA = src, RD = num const, JMP with RD = target - | mov RB, [BASE+RA*8+4] - | add PC, 4 - |.if DUALNUM - | cmp RB, LJ_TISNUM; jne >7 - | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jne >8 - | mov RB, dword [KBASE+RD*8] - | cmp RB, dword [BASE+RA*8] - if (vk) { - | jne >9 - } else { - | je >9 - } - | movzx RD, PC_RD - | branchPC RD - |9: - | ins_next - | - |7: // RA is not an integer. - | ja >3 - | // RA is a number. - | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 - | // RA is a number, RD is an integer. - | cvtsi2sd xmm0, dword [KBASE+RD*8] - | jmp >2 - | - |8: // RA is an integer, RD is a number. - | cvtsi2sd xmm0, dword [BASE+RA*8] - | ucomisd xmm0, qword [KBASE+RD*8] - | jmp >4 - |.else - | cmp RB, LJ_TISNUM; jae >3 - |.endif - |1: - | movsd xmm0, qword [KBASE+RD*8] - |2: - | ucomisd xmm0, qword [BASE+RA*8] - |4: - goto iseqne_fp; - case BC_ISEQP: case BC_ISNEP: - vk = op == BC_ISEQP; - | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target - | mov RB, [BASE+RA*8+4] - | add PC, 4 - | cmp RB, RD - if (!LJ_HASFFI) goto iseqne_test; - if (vk) { - | jne >3 - | movzx RD, PC_RD - | branchPC RD - |2: - | ins_next - |3: - | cmp RB, LJ_TCDATA; jne <2 - | jmp ->vmeta_equal_cd - } else { - | je >2 - | cmp RB, LJ_TCDATA; je ->vmeta_equal_cd - | movzx RD, PC_RD - | branchPC RD - |2: - | ins_next - } - break; - - /* -- Unary test and copy ops ------------------------------------------- */ - - case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: - | ins_AD // RA = dst or unused, RD = src, JMP with RD = target - | mov RB, [BASE+RD*8+4] - | add PC, 4 - | cmp RB, LJ_TISTRUECOND - if (op == BC_IST || op == BC_ISTC) { - | jae >1 - } else { - | jb >1 - } - if (op == BC_ISTC || op == BC_ISFC) { - | mov [BASE+RA*8+4], RB - | mov RB, [BASE+RD*8] - | mov [BASE+RA*8], RB - } - | movzx RD, PC_RD - | branchPC RD - |1: // Fallthrough to the next instruction. - | ins_next - break; - - case BC_ISTYPE: - | ins_AD // RA = src, RD = -type - | add RD, [BASE+RA*8+4] - | jne ->vmeta_istype - | ins_next - break; - case BC_ISNUM: - | ins_AD // RA = src, RD = -(TISNUM-1) - | checknum RA, ->vmeta_istype - | ins_next - break; - - /* -- Unary ops --------------------------------------------------------- */ - - case BC_MOV: - | ins_AD // RA = dst, RD = src - |.if X64 - | mov RBa, [BASE+RD*8] - | mov [BASE+RA*8], RBa - |.else - | mov RB, [BASE+RD*8+4] - | mov RD, [BASE+RD*8] - | mov [BASE+RA*8+4], RB - | mov [BASE+RA*8], RD - |.endif - | ins_next_ - break; - case BC_NOT: - | ins_AD // RA = dst, RD = src - | xor RB, RB - | checktp RD, LJ_TISTRUECOND - | adc RB, LJ_TTRUE - | mov [BASE+RA*8+4], RB - | ins_next - break; - case BC_UNM: - | ins_AD // RA = dst, RD = src - |.if DUALNUM - | checkint RD, >5 - | mov RB, [BASE+RD*8] - | neg RB - | jo >4 - | mov dword [BASE+RA*8+4], LJ_TISNUM - | mov dword [BASE+RA*8], RB - |9: - | ins_next - |4: - | mov dword [BASE+RA*8+4], 0x41e00000 // 2^31. - | mov dword [BASE+RA*8], 0 - | jmp <9 - |5: - | ja ->vmeta_unm - |.else - | checknum RD, ->vmeta_unm - |.endif - | movsd xmm0, qword [BASE+RD*8] - | sseconst_sign xmm1, RDa - | xorps xmm0, xmm1 - | movsd qword [BASE+RA*8], xmm0 - |.if DUALNUM - | jmp <9 - |.else - | ins_next - |.endif - break; - case BC_LEN: - | ins_AD // RA = dst, RD = src - | checkstr RD, >2 - | mov STR:RD, [BASE+RD*8] - |.if DUALNUM - | mov RD, dword STR:RD->len - |1: - | mov dword [BASE+RA*8+4], LJ_TISNUM - | mov dword [BASE+RA*8], RD - |.else - | xorps xmm0, xmm0 - | cvtsi2sd xmm0, dword STR:RD->len - |1: - | movsd qword [BASE+RA*8], xmm0 - |.endif - | ins_next - |2: - | checktab RD, ->vmeta_len - | mov TAB:FCARG1, [BASE+RD*8] -#if LJ_52 - | mov TAB:RB, TAB:FCARG1->metatable - | cmp TAB:RB, 0 - | jnz >9 - |3: -#endif - |->BC_LEN_Z: - | mov RB, BASE // Save BASE. - | call extern lj_tab_len@4 // (GCtab *t) - | // Length of table returned in eax (RD). - |.if DUALNUM - | // Nothing to do. - |.else - | cvtsi2sd xmm0, RD - |.endif - | mov BASE, RB // Restore BASE. - | movzx RA, PC_RA - | jmp <1 -#if LJ_52 - |9: // Check for __len. - | test byte TAB:RB->nomm, 1<vmeta_len // 'no __len' flag NOT set: check. -#endif - break; - - /* -- Binary ops -------------------------------------------------------- */ - - |.macro ins_arithpre, sseins, ssereg - | ins_ABC - ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); - ||switch (vk) { - ||case 0: - | checknum RB, ->vmeta_arith_vn - | .if DUALNUM - | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn - | .endif - | movsd xmm0, qword [BASE+RB*8] - | sseins ssereg, qword [KBASE+RC*8] - || break; - ||case 1: - | checknum RB, ->vmeta_arith_nv - | .if DUALNUM - | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv - | .endif - | movsd xmm0, qword [KBASE+RC*8] - | sseins ssereg, qword [BASE+RB*8] - || break; - ||default: - | checknum RB, ->vmeta_arith_vv - | checknum RC, ->vmeta_arith_vv - | movsd xmm0, qword [BASE+RB*8] - | sseins ssereg, qword [BASE+RC*8] - || break; - ||} - |.endmacro - | - |.macro ins_arithdn, intins - | ins_ABC - ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); - ||switch (vk) { - ||case 0: - | checkint RB, ->vmeta_arith_vn - | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jne ->vmeta_arith_vn - | mov RB, [BASE+RB*8] - | intins RB, [KBASE+RC*8]; jo ->vmeta_arith_vno - || break; - ||case 1: - | checkint RB, ->vmeta_arith_nv - | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jne ->vmeta_arith_nv - | mov RC, [KBASE+RC*8] - | intins RC, [BASE+RB*8]; jo ->vmeta_arith_nvo - || break; - ||default: - | checkint RB, ->vmeta_arith_vv - | checkint RC, ->vmeta_arith_vv - | mov RB, [BASE+RB*8] - | intins RB, [BASE+RC*8]; jo ->vmeta_arith_vvo - || break; - ||} - | mov dword [BASE+RA*8+4], LJ_TISNUM - ||if (vk == 1) { - | mov dword [BASE+RA*8], RC - ||} else { - | mov dword [BASE+RA*8], RB - ||} - | ins_next - |.endmacro - | - |.macro ins_arithpost - | movsd qword [BASE+RA*8], xmm0 - |.endmacro - | - |.macro ins_arith, sseins - | ins_arithpre sseins, xmm0 - | ins_arithpost - | ins_next - |.endmacro - | - |.macro ins_arith, intins, sseins - |.if DUALNUM - | ins_arithdn intins - |.else - | ins_arith, sseins - |.endif - |.endmacro - - | // RA = dst, RB = src1 or num const, RC = src2 or num const - case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: - | ins_arith add, addsd - break; - case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: - | ins_arith sub, subsd - break; - case BC_MULVN: case BC_MULNV: case BC_MULVV: - | ins_arith imul, mulsd - break; - case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: - | ins_arith divsd - break; - case BC_MODVN: - | ins_arithpre movsd, xmm1 - |->BC_MODVN_Z: - | call ->vm_mod - | ins_arithpost - | ins_next - break; - case BC_MODNV: case BC_MODVV: - | ins_arithpre movsd, xmm1 - | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. - break; - case BC_POW: - | ins_arithpre movsd, xmm1 - | mov RB, BASE - |.if not X64 - | movsd FPARG1, xmm0 - | movsd FPARG3, xmm1 - |.endif - | call extern pow - | movzx RA, PC_RA - | mov BASE, RB - |.if X64 - | ins_arithpost - |.else - | fstp qword [BASE+RA*8] - |.endif - | ins_next - break; - - case BC_CAT: - | ins_ABC // RA = dst, RB = src_start, RC = src_end - |.if X64 - | mov L:CARG1d, SAVE_L - | mov L:CARG1d->base, BASE - | lea CARG2d, [BASE+RC*8] - | mov CARG3d, RC - | sub CARG3d, RB - |->BC_CAT_Z: - | mov L:RB, L:CARG1d - |.else - | lea RA, [BASE+RC*8] - | sub RC, RB - | mov ARG2, RA - | mov ARG3, RC - |->BC_CAT_Z: - | mov L:RB, SAVE_L - | mov ARG1, L:RB - | mov L:RB->base, BASE - |.endif - | mov SAVE_PC, PC - | call extern lj_meta_cat // (lua_State *L, TValue *top, int left) - | // NULL (finished) or TValue * (metamethod) returned in eax (RC). - | mov BASE, L:RB->base - | test RC, RC - | jnz ->vmeta_binop - | movzx RB, PC_RB // Copy result to Stk[RA] from Stk[RB]. - | movzx RA, PC_RA - |.if X64 - | mov RCa, [BASE+RB*8] - | mov [BASE+RA*8], RCa - |.else - | mov RC, [BASE+RB*8+4] - | mov RB, [BASE+RB*8] - | mov [BASE+RA*8+4], RC - | mov [BASE+RA*8], RB - |.endif - | ins_next - break; - - /* -- Constant ops ------------------------------------------------------ */ - - case BC_KSTR: - | ins_AND // RA = dst, RD = str const (~) - | mov RD, [KBASE+RD*4] - | mov dword [BASE+RA*8+4], LJ_TSTR - | mov [BASE+RA*8], RD - | ins_next - break; - case BC_KCDATA: - |.if FFI - | ins_AND // RA = dst, RD = cdata const (~) - | mov RD, [KBASE+RD*4] - | mov dword [BASE+RA*8+4], LJ_TCDATA - | mov [BASE+RA*8], RD - | ins_next - |.endif - break; - case BC_KSHORT: - | ins_AD // RA = dst, RD = signed int16 literal - |.if DUALNUM - | movsx RD, RDW - | mov dword [BASE+RA*8+4], LJ_TISNUM - | mov dword [BASE+RA*8], RD - |.else - | movsx RD, RDW // Sign-extend literal. - | cvtsi2sd xmm0, RD - | movsd qword [BASE+RA*8], xmm0 - |.endif - | ins_next - break; - case BC_KNUM: - | ins_AD // RA = dst, RD = num const - | movsd xmm0, qword [KBASE+RD*8] - | movsd qword [BASE+RA*8], xmm0 - | ins_next - break; - case BC_KPRI: - | ins_AND // RA = dst, RD = primitive type (~) - | mov [BASE+RA*8+4], RD - | ins_next - break; - case BC_KNIL: - | ins_AD // RA = dst_start, RD = dst_end - | lea RA, [BASE+RA*8+12] - | lea RD, [BASE+RD*8+4] - | mov RB, LJ_TNIL - | mov [RA-8], RB // Sets minimum 2 slots. - |1: - | mov [RA], RB - | add RA, 8 - | cmp RA, RD - | jbe <1 - | ins_next - break; - - /* -- Upvalue and function ops ------------------------------------------ */ - - case BC_UGET: - | ins_AD // RA = dst, RD = upvalue # - | mov LFUNC:RB, [BASE-8] - | mov UPVAL:RB, [LFUNC:RB+RD*4+offsetof(GCfuncL, uvptr)] - | mov RB, UPVAL:RB->v - |.if X64 - | mov RDa, [RB] - | mov [BASE+RA*8], RDa - |.else - | mov RD, [RB+4] - | mov RB, [RB] - | mov [BASE+RA*8+4], RD - | mov [BASE+RA*8], RB - |.endif - | ins_next - break; - case BC_USETV: -#define TV2MARKOFS \ - ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)) - | ins_AD // RA = upvalue #, RD = src - | mov LFUNC:RB, [BASE-8] - | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] - | cmp byte UPVAL:RB->closed, 0 - | mov RB, UPVAL:RB->v - | mov RA, [BASE+RD*8] - | mov RD, [BASE+RD*8+4] - | mov [RB], RA - | mov [RB+4], RD - | jz >1 - | // Check barrier for closed upvalue. - | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv) - | jnz >2 - |1: - | ins_next - | - |2: // Upvalue is black. Check if new value is collectable and white. - | sub RD, LJ_TISGCV - | cmp RD, LJ_TNUMX - LJ_TISGCV // tvisgcv(v) - | jbe <1 - | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v) - | jz <1 - | // Crossed a write barrier. Move the barrier forward. - |.if X64 and not X64WIN - | mov FCARG2, RB - | mov RB, BASE // Save BASE. - |.else - | xchg FCARG2, RB // Save BASE (FCARG2 == BASE). - |.endif - | lea GL:FCARG1, [DISPATCH+GG_DISP2G] - | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv) - | mov BASE, RB // Restore BASE. - | jmp <1 - break; -#undef TV2MARKOFS - case BC_USETS: - | ins_AND // RA = upvalue #, RD = str const (~) - | mov LFUNC:RB, [BASE-8] - | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] - | mov GCOBJ:RA, [KBASE+RD*4] - | mov RD, UPVAL:RB->v - | mov [RD], GCOBJ:RA - | mov dword [RD+4], LJ_TSTR - | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv) - | jnz >2 - |1: - | ins_next - | - |2: // Check if string is white and ensure upvalue is closed. - | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str) - | jz <1 - | cmp byte UPVAL:RB->closed, 0 - | jz <1 - | // Crossed a write barrier. Move the barrier forward. - | mov RB, BASE // Save BASE (FCARG2 == BASE). - | mov FCARG2, RD - | lea GL:FCARG1, [DISPATCH+GG_DISP2G] - | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv) - | mov BASE, RB // Restore BASE. - | jmp <1 - break; - case BC_USETN: - | ins_AD // RA = upvalue #, RD = num const - | mov LFUNC:RB, [BASE-8] - | movsd xmm0, qword [KBASE+RD*8] - | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] - | mov RA, UPVAL:RB->v - | movsd qword [RA], xmm0 - | ins_next - break; - case BC_USETP: - | ins_AND // RA = upvalue #, RD = primitive type (~) - | mov LFUNC:RB, [BASE-8] - | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] - | mov RA, UPVAL:RB->v - | mov [RA+4], RD - | ins_next - break; - case BC_UCLO: - | ins_AD // RA = level, RD = target - | branchPC RD // Do this first to free RD. - | mov L:RB, SAVE_L - | cmp dword L:RB->openupval, 0 - | je >1 - | mov L:RB->base, BASE - | lea FCARG2, [BASE+RA*8] // Caveat: FCARG2 == BASE - | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA - | call extern lj_func_closeuv@8 // (lua_State *L, TValue *level) - | mov BASE, L:RB->base - |1: - | ins_next - break; - - case BC_FNEW: - | ins_AND // RA = dst, RD = proto const (~) (holding function prototype) - |.if X64 - | mov L:RB, SAVE_L - | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE. - | mov CARG3d, [BASE-8] - | mov CARG2d, [KBASE+RD*4] // Fetch GCproto *. - | mov CARG1d, L:RB - |.else - | mov LFUNC:RA, [BASE-8] - | mov PROTO:RD, [KBASE+RD*4] // Fetch GCproto *. - | mov L:RB, SAVE_L - | mov ARG3, LFUNC:RA - | mov ARG2, PROTO:RD - | mov ARG1, L:RB - | mov L:RB->base, BASE - |.endif - | mov SAVE_PC, PC - | // (lua_State *L, GCproto *pt, GCfuncL *parent) - | call extern lj_func_newL_gc - | // GCfuncL * returned in eax (RC). - | mov BASE, L:RB->base - | movzx RA, PC_RA - | mov [BASE+RA*8], LFUNC:RC - | mov dword [BASE+RA*8+4], LJ_TFUNC - | ins_next - break; - - /* -- Table ops --------------------------------------------------------- */ - - case BC_TNEW: - | ins_AD // RA = dst, RD = hbits|asize - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] - | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] - | mov SAVE_PC, PC - | jae >5 - |1: - |.if X64 - | mov CARG3d, RD - | and RD, 0x7ff - | shr CARG3d, 11 - |.else - | mov RA, RD - | and RD, 0x7ff - | shr RA, 11 - | mov ARG3, RA - |.endif - | cmp RD, 0x7ff - | je >3 - |2: - |.if X64 - | mov L:CARG1d, L:RB - | mov CARG2d, RD - |.else - | mov ARG1, L:RB - | mov ARG2, RD - |.endif - | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) - | // Table * returned in eax (RC). - | mov BASE, L:RB->base - | movzx RA, PC_RA - | mov [BASE+RA*8], TAB:RC - | mov dword [BASE+RA*8+4], LJ_TTAB - | ins_next - |3: // Turn 0x7ff into 0x801. - | mov RD, 0x801 - | jmp <2 - |5: - | mov L:FCARG1, L:RB - | call extern lj_gc_step_fixtop@4 // (lua_State *L) - | movzx RD, PC_RD - | jmp <1 - break; - case BC_TDUP: - | ins_AND // RA = dst, RD = table const (~) (holding template table) - | mov L:RB, SAVE_L - | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] - | mov SAVE_PC, PC - | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] - | mov L:RB->base, BASE - | jae >3 - |2: - | mov TAB:FCARG2, [KBASE+RD*4] // Caveat: FCARG2 == BASE - | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA - | call extern lj_tab_dup@8 // (lua_State *L, Table *kt) - | // Table * returned in eax (RC). - | mov BASE, L:RB->base - | movzx RA, PC_RA - | mov [BASE+RA*8], TAB:RC - | mov dword [BASE+RA*8+4], LJ_TTAB - | ins_next - |3: - | mov L:FCARG1, L:RB - | call extern lj_gc_step_fixtop@4 // (lua_State *L) - | movzx RD, PC_RD // Need to reload RD. - | not RDa - | jmp <2 - break; - - case BC_GGET: - | ins_AND // RA = dst, RD = str const (~) - | mov LFUNC:RB, [BASE-8] - | mov TAB:RB, LFUNC:RB->env - | mov STR:RC, [KBASE+RD*4] - | jmp ->BC_TGETS_Z - break; - case BC_GSET: - | ins_AND // RA = src, RD = str const (~) - | mov LFUNC:RB, [BASE-8] - | mov TAB:RB, LFUNC:RB->env - | mov STR:RC, [KBASE+RD*4] - | jmp ->BC_TSETS_Z - break; - - case BC_TGETV: - | ins_ABC // RA = dst, RB = table, RC = key - | checktab RB, ->vmeta_tgetv - | mov TAB:RB, [BASE+RB*8] - | - | // Integer key? - |.if DUALNUM - | checkint RC, >5 - | mov RC, dword [BASE+RC*8] - |.else - | // Convert number to int and back and compare. - | checknum RC, >5 - | movsd xmm0, qword [BASE+RC*8] - | cvttsd2si RC, xmm0 - | cvtsi2sd xmm1, RC - | ucomisd xmm0, xmm1 - | jne ->vmeta_tgetv // Generic numeric key? Use fallback. - |.endif - | cmp RC, TAB:RB->asize // Takes care of unordered, too. - | jae ->vmeta_tgetv // Not in array part? Use fallback. - | shl RC, 3 - | add RC, TAB:RB->array - | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath. - | je >2 - | // Get array slot. - |.if X64 - | mov RBa, [RC] - | mov [BASE+RA*8], RBa - |.else - | mov RB, [RC] - | mov RC, [RC+4] - | mov [BASE+RA*8], RB - | mov [BASE+RA*8+4], RC - |.endif - |1: - | ins_next - | - |2: // Check for __index if table value is nil. - | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. - | jz >3 - | mov TAB:RA, TAB:RB->metatable - | test byte TAB:RA->nomm, 1<vmeta_tgetv // 'no __index' flag NOT set: check. - | movzx RA, PC_RA // Restore RA. - |3: - | mov dword [BASE+RA*8+4], LJ_TNIL - | jmp <1 - | - |5: // String key? - | checkstr RC, ->vmeta_tgetv - | mov STR:RC, [BASE+RC*8] - | jmp ->BC_TGETS_Z - break; - case BC_TGETS: - | ins_ABC // RA = dst, RB = table, RC = str const (~) - | not RCa - | mov STR:RC, [KBASE+RC*4] - | checktab RB, ->vmeta_tgets - | mov TAB:RB, [BASE+RB*8] - |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. - | mov RA, TAB:RB->hmask - | and RA, STR:RC->hash - | imul RA, #NODE - | add NODE:RA, TAB:RB->node - |1: - | cmp dword NODE:RA->key.it, LJ_TSTR - | jne >4 - | cmp dword NODE:RA->key.gcr, STR:RC - | jne >4 - | // Ok, key found. Assumes: offsetof(Node, val) == 0 - | cmp dword [RA+4], LJ_TNIL // Avoid overwriting RB in fastpath. - | je >5 // Key found, but nil value? - | movzx RC, PC_RA - | // Get node value. - |.if X64 - | mov RBa, [RA] - | mov [BASE+RC*8], RBa - |.else - | mov RB, [RA] - | mov RA, [RA+4] - | mov [BASE+RC*8], RB - | mov [BASE+RC*8+4], RA - |.endif - |2: - | ins_next - | - |3: - | movzx RC, PC_RA - | mov dword [BASE+RC*8+4], LJ_TNIL - | jmp <2 - | - |4: // Follow hash chain. - | mov NODE:RA, NODE:RA->next - | test NODE:RA, NODE:RA - | jnz <1 - | // End of hash chain: key not found, nil result. - | - |5: // Check for __index if table value is nil. - | mov TAB:RA, TAB:RB->metatable - | test TAB:RA, TAB:RA - | jz <3 // No metatable: done. - | test byte TAB:RA->nomm, 1<vmeta_tgets // Caveat: preserve STR:RC. - break; - case BC_TGETB: - | ins_ABC // RA = dst, RB = table, RC = byte literal - | checktab RB, ->vmeta_tgetb - | mov TAB:RB, [BASE+RB*8] - | cmp RC, TAB:RB->asize - | jae ->vmeta_tgetb - | shl RC, 3 - | add RC, TAB:RB->array - | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath. - | je >2 - | // Get array slot. - |.if X64 - | mov RBa, [RC] - | mov [BASE+RA*8], RBa - |.else - | mov RB, [RC] - | mov RC, [RC+4] - | mov [BASE+RA*8], RB - | mov [BASE+RA*8+4], RC - |.endif - |1: - | ins_next - | - |2: // Check for __index if table value is nil. - | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. - | jz >3 - | mov TAB:RA, TAB:RB->metatable - | test byte TAB:RA->nomm, 1<vmeta_tgetb // 'no __index' flag NOT set: check. - | movzx RA, PC_RA // Restore RA. - |3: - | mov dword [BASE+RA*8+4], LJ_TNIL - | jmp <1 - break; - case BC_TGETR: - | ins_ABC // RA = dst, RB = table, RC = key - | mov TAB:RB, [BASE+RB*8] - |.if DUALNUM - | mov RC, dword [BASE+RC*8] - |.else - | cvttsd2si RC, qword [BASE+RC*8] - |.endif - | cmp RC, TAB:RB->asize - | jae ->vmeta_tgetr // Not in array part? Use fallback. - | shl RC, 3 - | add RC, TAB:RB->array - | // Get array slot. - |->BC_TGETR_Z: - |.if X64 - | mov RBa, [RC] - | mov [BASE+RA*8], RBa - |.else - | mov RB, [RC] - | mov RC, [RC+4] - | mov [BASE+RA*8], RB - | mov [BASE+RA*8+4], RC - |.endif - |->BC_TGETR2_Z: - | ins_next - break; - - case BC_TSETV: - | ins_ABC // RA = src, RB = table, RC = key - | checktab RB, ->vmeta_tsetv - | mov TAB:RB, [BASE+RB*8] - | - | // Integer key? - |.if DUALNUM - | checkint RC, >5 - | mov RC, dword [BASE+RC*8] - |.else - | // Convert number to int and back and compare. - | checknum RC, >5 - | movsd xmm0, qword [BASE+RC*8] - | cvttsd2si RC, xmm0 - | cvtsi2sd xmm1, RC - | ucomisd xmm0, xmm1 - | jne ->vmeta_tsetv // Generic numeric key? Use fallback. - |.endif - | cmp RC, TAB:RB->asize // Takes care of unordered, too. - | jae ->vmeta_tsetv - | shl RC, 3 - | add RC, TAB:RB->array - | cmp dword [RC+4], LJ_TNIL - | je >3 // Previous value is nil? - |1: - | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) - | jnz >7 - |2: // Set array slot. - |.if X64 - | mov RBa, [BASE+RA*8] - | mov [RC], RBa - |.else - | mov RB, [BASE+RA*8+4] - | mov RA, [BASE+RA*8] - | mov [RC+4], RB - | mov [RC], RA - |.endif - | ins_next - | - |3: // Check for __newindex if previous value is nil. - | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. - | jz <1 - | mov TAB:RA, TAB:RB->metatable - | test byte TAB:RA->nomm, 1<vmeta_tsetv // 'no __newindex' flag NOT set: check. - | movzx RA, PC_RA // Restore RA. - | jmp <1 - | - |5: // String key? - | checkstr RC, ->vmeta_tsetv - | mov STR:RC, [BASE+RC*8] - | jmp ->BC_TSETS_Z - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, RA - | movzx RA, PC_RA // Restore RA. - | jmp <2 - break; - case BC_TSETS: - | ins_ABC // RA = src, RB = table, RC = str const (~) - | not RCa - | mov STR:RC, [KBASE+RC*4] - | checktab RB, ->vmeta_tsets - | mov TAB:RB, [BASE+RB*8] - |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. - | mov RA, TAB:RB->hmask - | and RA, STR:RC->hash - | imul RA, #NODE - | mov byte TAB:RB->nomm, 0 // Clear metamethod cache. - | add NODE:RA, TAB:RB->node - |1: - | cmp dword NODE:RA->key.it, LJ_TSTR - | jne >5 - | cmp dword NODE:RA->key.gcr, STR:RC - | jne >5 - | // Ok, key found. Assumes: offsetof(Node, val) == 0 - | cmp dword [RA+4], LJ_TNIL - | je >4 // Previous value is nil? - |2: - | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) - | jnz >7 - |3: // Set node value. - | movzx RC, PC_RA - |.if X64 - | mov RBa, [BASE+RC*8] - | mov [RA], RBa - |.else - | mov RB, [BASE+RC*8+4] - | mov RC, [BASE+RC*8] - | mov [RA+4], RB - | mov [RA], RC - |.endif - | ins_next - | - |4: // Check for __newindex if previous value is nil. - | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. - | jz <2 - | mov TMP1, RA // Save RA. - | mov TAB:RA, TAB:RB->metatable - | test byte TAB:RA->nomm, 1<vmeta_tsets // 'no __newindex' flag NOT set: check. - | mov RA, TMP1 // Restore RA. - | jmp <2 - | - |5: // Follow hash chain. - | mov NODE:RA, NODE:RA->next - | test NODE:RA, NODE:RA - | jnz <1 - | // End of hash chain: key not found, add a new one. - | - | // But check for __newindex first. - | mov TAB:RA, TAB:RB->metatable - | test TAB:RA, TAB:RA - | jz >6 // No metatable: continue. - | test byte TAB:RA->nomm, 1<vmeta_tsets // 'no __newindex' flag NOT set: check. - |6: - | mov TMP1, STR:RC - | mov TMP2, LJ_TSTR - | mov TMP3, TAB:RB // Save TAB:RB for us. - |.if X64 - | mov L:CARG1d, SAVE_L - | mov L:CARG1d->base, BASE - | lea CARG3, TMP1 - | mov CARG2d, TAB:RB - | mov L:RB, L:CARG1d - |.else - | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2. - | mov ARG2, TAB:RB - | mov L:RB, SAVE_L - | mov ARG3, RC - | mov ARG1, L:RB - | mov L:RB->base, BASE - |.endif - | mov SAVE_PC, PC - | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) - | // Handles write barrier for the new key. TValue * returned in eax (RC). - | mov BASE, L:RB->base - | mov TAB:RB, TMP3 // Need TAB:RB for barrier. - | mov RA, eax - | jmp <2 // Must check write barrier for value. - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, RC // Destroys STR:RC. - | jmp <3 - break; - case BC_TSETB: - | ins_ABC // RA = src, RB = table, RC = byte literal - | checktab RB, ->vmeta_tsetb - | mov TAB:RB, [BASE+RB*8] - | cmp RC, TAB:RB->asize - | jae ->vmeta_tsetb - | shl RC, 3 - | add RC, TAB:RB->array - | cmp dword [RC+4], LJ_TNIL - | je >3 // Previous value is nil? - |1: - | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) - | jnz >7 - |2: // Set array slot. - |.if X64 - | mov RAa, [BASE+RA*8] - | mov [RC], RAa - |.else - | mov RB, [BASE+RA*8+4] - | mov RA, [BASE+RA*8] - | mov [RC+4], RB - | mov [RC], RA - |.endif - | ins_next - | - |3: // Check for __newindex if previous value is nil. - | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. - | jz <1 - | mov TAB:RA, TAB:RB->metatable - | test byte TAB:RA->nomm, 1<vmeta_tsetb // 'no __newindex' flag NOT set: check. - | movzx RA, PC_RA // Restore RA. - | jmp <1 - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, RA - | movzx RA, PC_RA // Restore RA. - | jmp <2 - break; - case BC_TSETR: - | ins_ABC // RA = src, RB = table, RC = key - | mov TAB:RB, [BASE+RB*8] - |.if DUALNUM - | mov RC, dword [BASE+RC*8] - |.else - | cvttsd2si RC, qword [BASE+RC*8] - |.endif - | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) - | jnz >7 - |2: - | cmp RC, TAB:RB->asize - | jae ->vmeta_tsetr - | shl RC, 3 - | add RC, TAB:RB->array - | // Set array slot. - |->BC_TSETR_Z: - |.if X64 - | mov RBa, [BASE+RA*8] - | mov [RC], RBa - |.else - | mov RB, [BASE+RA*8+4] - | mov RA, [BASE+RA*8] - | mov [RC+4], RB - | mov [RC], RA - |.endif - | ins_next - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, RA - | movzx RA, PC_RA // Restore RA. - | jmp <2 - break; - - case BC_TSETM: - | ins_AD // RA = base (table at base-1), RD = num const (start index) - | mov TMP1, KBASE // Need one more free register. - | mov KBASE, dword [KBASE+RD*8] // Integer constant is in lo-word. - |1: - | lea RA, [BASE+RA*8] - | mov TAB:RB, [RA-8] // Guaranteed to be a table. - | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) - | jnz >7 - |2: - | mov RD, MULTRES - | sub RD, 1 - | jz >4 // Nothing to copy? - | add RD, KBASE // Compute needed size. - | cmp RD, TAB:RB->asize - | ja >5 // Doesn't fit into array part? - | sub RD, KBASE - | shl KBASE, 3 - | add KBASE, TAB:RB->array - |3: // Copy result slots to table. - |.if X64 - | mov RBa, [RA] - | add RA, 8 - | mov [KBASE], RBa - |.else - | mov RB, [RA] - | mov [KBASE], RB - | mov RB, [RA+4] - | add RA, 8 - | mov [KBASE+4], RB - |.endif - | add KBASE, 8 - | sub RD, 1 - | jnz <3 - |4: - | mov KBASE, TMP1 - | ins_next - | - |5: // Need to resize array part. - |.if X64 - | mov L:CARG1d, SAVE_L - | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE. - | mov CARG2d, TAB:RB - | mov CARG3d, RD - | mov L:RB, L:CARG1d - |.else - | mov ARG2, TAB:RB - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - | mov ARG3, RD - | mov ARG1, L:RB - |.endif - | mov SAVE_PC, PC - | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) - | mov BASE, L:RB->base - | movzx RA, PC_RA // Restore RA. - | jmp <1 // Retry. - | - |7: // Possible table write barrier for any value. Skip valiswhite check. - | barrierback TAB:RB, RD - | jmp <2 - break; - - /* -- Calls and vararg handling ----------------------------------------- */ - - case BC_CALL: case BC_CALLM: - | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs - if (op == BC_CALLM) { - | add NARGS:RD, MULTRES - } - | cmp dword [BASE+RA*8+4], LJ_TFUNC - | mov LFUNC:RB, [BASE+RA*8] - | jne ->vmeta_call_ra - | lea BASE, [BASE+RA*8+8] - | ins_call - break; - - case BC_CALLMT: - | ins_AD // RA = base, RD = extra_nargs - | add NARGS:RD, MULTRES - | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op. - break; - case BC_CALLT: - | ins_AD // RA = base, RD = nargs+1 - | lea RA, [BASE+RA*8+8] - | mov KBASE, BASE // Use KBASE for move + vmeta_call hint. - | mov LFUNC:RB, [RA-8] - | cmp dword [RA-4], LJ_TFUNC - | jne ->vmeta_call - |->BC_CALLT_Z: - | mov PC, [BASE-4] - | test PC, FRAME_TYPE - | jnz >7 - |1: - | mov [BASE-8], LFUNC:RB // Copy function down, reloaded below. - | mov MULTRES, NARGS:RD - | sub NARGS:RD, 1 - | jz >3 - |2: // Move args down. - |.if X64 - | mov RBa, [RA] - | add RA, 8 - | mov [KBASE], RBa - |.else - | mov RB, [RA] - | mov [KBASE], RB - | mov RB, [RA+4] - | add RA, 8 - | mov [KBASE+4], RB - |.endif - | add KBASE, 8 - | sub NARGS:RD, 1 - | jnz <2 - | - | mov LFUNC:RB, [BASE-8] - |3: - | mov NARGS:RD, MULTRES - | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function? - | ja >5 - |4: - | ins_callt - | - |5: // Tailcall to a fast function. - | test PC, FRAME_TYPE // Lua frame below? - | jnz <4 - | movzx RA, PC_RA - | not RAa - | mov LFUNC:KBASE, [BASE+RA*8-8] // Need to prepare KBASE. - | mov KBASE, LFUNC:KBASE->pc - | mov KBASE, [KBASE+PC2PROTO(k)] - | jmp <4 - | - |7: // Tailcall from a vararg function. - | sub PC, FRAME_VARG - | test PC, FRAME_TYPEP - | jnz >8 // Vararg frame below? - | sub BASE, PC // Need to relocate BASE/KBASE down. - | mov KBASE, BASE - | mov PC, [BASE-4] - | jmp <1 - |8: - | add PC, FRAME_VARG - | jmp <1 - break; - - case BC_ITERC: - | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1) - | lea RA, [BASE+RA*8+8] // fb = base+1 - |.if X64 - | mov RBa, [RA-24] // Copy state. fb[0] = fb[-3]. - | mov RCa, [RA-16] // Copy control var. fb[1] = fb[-2]. - | mov [RA], RBa - | mov [RA+8], RCa - |.else - | mov RB, [RA-24] // Copy state. fb[0] = fb[-3]. - | mov RC, [RA-20] - | mov [RA], RB - | mov [RA+4], RC - | mov RB, [RA-16] // Copy control var. fb[1] = fb[-2]. - | mov RC, [RA-12] - | mov [RA+8], RB - | mov [RA+12], RC - |.endif - | mov LFUNC:RB, [RA-32] // Copy callable. fb[-1] = fb[-4] - | mov RC, [RA-28] - | mov [RA-8], LFUNC:RB - | mov [RA-4], RC - | cmp RC, LJ_TFUNC // Handle like a regular 2-arg call. - | mov NARGS:RD, 2+1 - | jne ->vmeta_call - | mov BASE, RA - | ins_call - break; - - case BC_ITERN: - | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) - |.if JIT - | // NYI: add hotloop, record BC_ITERN. - |.endif - | mov TMP1, KBASE // Need two more free registers. - | mov TMP2, DISPATCH - | mov TAB:RB, [BASE+RA*8-16] - | mov RC, [BASE+RA*8-8] // Get index from control var. - | mov DISPATCH, TAB:RB->asize - | add PC, 4 - | mov KBASE, TAB:RB->array - |1: // Traverse array part. - | cmp RC, DISPATCH; jae >5 // Index points after array part? - | cmp dword [KBASE+RC*8+4], LJ_TNIL; je >4 - |.if DUALNUM - | mov dword [BASE+RA*8+4], LJ_TISNUM - | mov dword [BASE+RA*8], RC - |.else - | cvtsi2sd xmm0, RC - |.endif - | // Copy array slot to returned value. - |.if X64 - | mov RBa, [KBASE+RC*8] - | mov [BASE+RA*8+8], RBa - |.else - | mov RB, [KBASE+RC*8+4] - | mov [BASE+RA*8+12], RB - | mov RB, [KBASE+RC*8] - | mov [BASE+RA*8+8], RB - |.endif - | add RC, 1 - | // Return array index as a numeric key. - |.if DUALNUM - | // See above. - |.else - | movsd qword [BASE+RA*8], xmm0 - |.endif - | mov [BASE+RA*8-8], RC // Update control var. - |2: - | movzx RD, PC_RD // Get target from ITERL. - | branchPC RD - |3: - | mov DISPATCH, TMP2 - | mov KBASE, TMP1 - | ins_next - | - |4: // Skip holes in array part. - | add RC, 1 - | jmp <1 - | - |5: // Traverse hash part. - | sub RC, DISPATCH - |6: - | cmp RC, TAB:RB->hmask; ja <3 // End of iteration? Branch to ITERL+1. - | imul KBASE, RC, #NODE - | add NODE:KBASE, TAB:RB->node - | cmp dword NODE:KBASE->val.it, LJ_TNIL; je >7 - | lea DISPATCH, [RC+DISPATCH+1] - | // Copy key and value from hash slot. - |.if X64 - | mov RBa, NODE:KBASE->key - | mov RCa, NODE:KBASE->val - | mov [BASE+RA*8], RBa - | mov [BASE+RA*8+8], RCa - |.else - | mov RB, NODE:KBASE->key.gcr - | mov RC, NODE:KBASE->key.it - | mov [BASE+RA*8], RB - | mov [BASE+RA*8+4], RC - | mov RB, NODE:KBASE->val.gcr - | mov RC, NODE:KBASE->val.it - | mov [BASE+RA*8+8], RB - | mov [BASE+RA*8+12], RC - |.endif - | mov [BASE+RA*8-8], DISPATCH - | jmp <2 - | - |7: // Skip holes in hash part. - | add RC, 1 - | jmp <6 - break; - - case BC_ISNEXT: - | ins_AD // RA = base, RD = target (points to ITERN) - | cmp dword [BASE+RA*8-20], LJ_TFUNC; jne >5 - | mov CFUNC:RB, [BASE+RA*8-24] - | cmp dword [BASE+RA*8-12], LJ_TTAB; jne >5 - | cmp dword [BASE+RA*8-4], LJ_TNIL; jne >5 - | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5 - | branchPC RD - | mov dword [BASE+RA*8-8], 0 // Initialize control var. - | mov dword [BASE+RA*8-4], 0xfffe7fff - |1: - | ins_next - |5: // Despecialize bytecode if any of the checks fail. - | mov PC_OP, BC_JMP - | branchPC RD - | mov byte [PC], BC_ITERC - | jmp <1 - break; - - case BC_VARG: - | ins_ABC // RA = base, RB = nresults+1, RC = numparams - | mov TMP1, KBASE // Need one more free register. - | lea KBASE, [BASE+RC*8+(8+FRAME_VARG)] - | lea RA, [BASE+RA*8] - | sub KBASE, [BASE-4] - | // Note: KBASE may now be even _above_ BASE if nargs was < numparams. - | test RB, RB - | jz >5 // Copy all varargs? - | lea RB, [RA+RB*8-8] - | cmp KBASE, BASE // No vararg slots? - | jnb >2 - |1: // Copy vararg slots to destination slots. - |.if X64 - | mov RCa, [KBASE-8] - | add KBASE, 8 - | mov [RA], RCa - |.else - | mov RC, [KBASE-8] - | mov [RA], RC - | mov RC, [KBASE-4] - | add KBASE, 8 - | mov [RA+4], RC - |.endif - | add RA, 8 - | cmp RA, RB // All destination slots filled? - | jnb >3 - | cmp KBASE, BASE // No more vararg slots? - | jb <1 - |2: // Fill up remainder with nil. - | mov dword [RA+4], LJ_TNIL - | add RA, 8 - | cmp RA, RB - | jb <2 - |3: - | mov KBASE, TMP1 - | ins_next - | - |5: // Copy all varargs. - | mov MULTRES, 1 // MULTRES = 0+1 - | mov RC, BASE - | sub RC, KBASE - | jbe <3 // No vararg slots? - | mov RB, RC - | shr RB, 3 - | add RB, 1 - | mov MULTRES, RB // MULTRES = #varargs+1 - | mov L:RB, SAVE_L - | add RC, RA - | cmp RC, L:RB->maxstack - | ja >7 // Need to grow stack? - |6: // Copy all vararg slots. - |.if X64 - | mov RCa, [KBASE-8] - | add KBASE, 8 - | mov [RA], RCa - |.else - | mov RC, [KBASE-8] - | mov [RA], RC - | mov RC, [KBASE-4] - | add KBASE, 8 - | mov [RA+4], RC - |.endif - | add RA, 8 - | cmp KBASE, BASE // No more vararg slots? - | jb <6 - | jmp <3 - | - |7: // Grow stack for varargs. - | mov L:RB->base, BASE - | mov L:RB->top, RA - | mov SAVE_PC, PC - | sub KBASE, BASE // Need delta, because BASE may change. - | mov FCARG2, MULTRES - | sub FCARG2, 1 - | mov FCARG1, L:RB - | call extern lj_state_growstack@8 // (lua_State *L, int n) - | mov BASE, L:RB->base - | mov RA, L:RB->top - | add KBASE, BASE - | jmp <6 - break; - - /* -- Returns ----------------------------------------------------------- */ - - case BC_RETM: - | ins_AD // RA = results, RD = extra_nresults - | add RD, MULTRES // MULTRES >=1, so RD >=1. - | // Fall through. Assumes BC_RET follows and ins_AD is a no-op. - break; - - case BC_RET: case BC_RET0: case BC_RET1: - | ins_AD // RA = results, RD = nresults+1 - if (op != BC_RET0) { - | shl RA, 3 - } - |1: - | mov PC, [BASE-4] - | mov MULTRES, RD // Save nresults+1. - | test PC, FRAME_TYPE // Check frame type marker. - | jnz >7 // Not returning to a fixarg Lua func? - switch (op) { - case BC_RET: - |->BC_RET_Z: - | mov KBASE, BASE // Use KBASE for result move. - | sub RD, 1 - | jz >3 - |2: // Move results down. - |.if X64 - | mov RBa, [KBASE+RA] - | mov [KBASE-8], RBa - |.else - | mov RB, [KBASE+RA] - | mov [KBASE-8], RB - | mov RB, [KBASE+RA+4] - | mov [KBASE-4], RB - |.endif - | add KBASE, 8 - | sub RD, 1 - | jnz <2 - |3: - | mov RD, MULTRES // Note: MULTRES may be >255. - | movzx RB, PC_RB // So cannot compare with RDL! - |5: - | cmp RB, RD // More results expected? - | ja >6 - break; - case BC_RET1: - |.if X64 - | mov RBa, [BASE+RA] - | mov [BASE-8], RBa - |.else - | mov RB, [BASE+RA+4] - | mov [BASE-4], RB - | mov RB, [BASE+RA] - | mov [BASE-8], RB - |.endif - /* fallthrough */ - case BC_RET0: - |5: - | cmp PC_RB, RDL // More results expected? - | ja >6 - default: - break; - } - | movzx RA, PC_RA - | not RAa // Note: ~RA = -(RA+1) - | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8 - | mov LFUNC:KBASE, [BASE-8] - | mov KBASE, LFUNC:KBASE->pc - | mov KBASE, [KBASE+PC2PROTO(k)] - | ins_next - | - |6: // Fill up results with nil. - if (op == BC_RET) { - | mov dword [KBASE-4], LJ_TNIL // Note: relies on shifted base. - | add KBASE, 8 - } else { - | mov dword [BASE+RD*8-12], LJ_TNIL - } - | add RD, 1 - | jmp <5 - | - |7: // Non-standard return case. - | lea RB, [PC-FRAME_VARG] - | test RB, FRAME_TYPEP - | jnz ->vm_return - | // Return from vararg function: relocate BASE down and RA up. - | sub BASE, RB - if (op != BC_RET0) { - | add RA, RB - } - | jmp <1 - break; - - /* -- Loops and branches ------------------------------------------------ */ - - |.define FOR_IDX, [RA]; .define FOR_TIDX, dword [RA+4] - |.define FOR_STOP, [RA+8]; .define FOR_TSTOP, dword [RA+12] - |.define FOR_STEP, [RA+16]; .define FOR_TSTEP, dword [RA+20] - |.define FOR_EXT, [RA+24]; .define FOR_TEXT, dword [RA+28] - - case BC_FORL: - |.if JIT - | hotloop RB - |.endif - | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op. - break; - - case BC_JFORI: - case BC_JFORL: -#if !LJ_HASJIT - break; -#endif - case BC_FORI: - case BC_IFORL: - vk = (op == BC_IFORL || op == BC_JFORL); - | ins_AJ // RA = base, RD = target (after end of loop or start of loop) - | lea RA, [BASE+RA*8] - if (LJ_DUALNUM) { - | cmp FOR_TIDX, LJ_TISNUM; jne >9 - if (!vk) { - | cmp FOR_TSTOP, LJ_TISNUM; jne ->vmeta_for - | cmp FOR_TSTEP, LJ_TISNUM; jne ->vmeta_for - | mov RB, dword FOR_IDX - | cmp dword FOR_STEP, 0; jl >5 - } else { -#ifdef LUA_USE_ASSERT - | cmp FOR_TSTOP, LJ_TISNUM; jne ->assert_bad_for_arg_type - | cmp FOR_TSTEP, LJ_TISNUM; jne ->assert_bad_for_arg_type -#endif - | mov RB, dword FOR_STEP - | test RB, RB; js >5 - | add RB, dword FOR_IDX; jo >1 - | mov dword FOR_IDX, RB - } - | cmp RB, dword FOR_STOP - | mov FOR_TEXT, LJ_TISNUM - | mov dword FOR_EXT, RB - if (op == BC_FORI) { - | jle >7 - |1: - |6: - | branchPC RD - } else if (op == BC_JFORI) { - | branchPC RD - | movzx RD, PC_RD - | jle =>BC_JLOOP - |1: - |6: - } else if (op == BC_IFORL) { - | jg >7 - |6: - | branchPC RD - |1: - } else { - | jle =>BC_JLOOP - |1: - |6: - } - |7: - | ins_next - | - |5: // Invert check for negative step. - if (vk) { - | add RB, dword FOR_IDX; jo <1 - | mov dword FOR_IDX, RB - } - | cmp RB, dword FOR_STOP - | mov FOR_TEXT, LJ_TISNUM - | mov dword FOR_EXT, RB - if (op == BC_FORI) { - | jge <7 - } else if (op == BC_JFORI) { - | branchPC RD - | movzx RD, PC_RD - | jge =>BC_JLOOP - } else if (op == BC_IFORL) { - | jl <7 - } else { - | jge =>BC_JLOOP - } - | jmp <6 - |9: // Fallback to FP variant. - } else if (!vk) { - | cmp FOR_TIDX, LJ_TISNUM - } - if (!vk) { - | jae ->vmeta_for - | cmp FOR_TSTOP, LJ_TISNUM; jae ->vmeta_for - } else { -#ifdef LUA_USE_ASSERT - | cmp FOR_TSTOP, LJ_TISNUM; jae ->assert_bad_for_arg_type - | cmp FOR_TSTEP, LJ_TISNUM; jae ->assert_bad_for_arg_type -#endif - } - | mov RB, FOR_TSTEP // Load type/hiword of for step. - if (!vk) { - | cmp RB, LJ_TISNUM; jae ->vmeta_for - } - | movsd xmm0, qword FOR_IDX - | movsd xmm1, qword FOR_STOP - if (vk) { - | addsd xmm0, qword FOR_STEP - | movsd qword FOR_IDX, xmm0 - | test RB, RB; js >3 - } else { - | jl >3 - } - | ucomisd xmm1, xmm0 - |1: - | movsd qword FOR_EXT, xmm0 - if (op == BC_FORI) { - |.if DUALNUM - | jnb <7 - |.else - | jnb >2 - | branchPC RD - |.endif - } else if (op == BC_JFORI) { - | branchPC RD - | movzx RD, PC_RD - | jnb =>BC_JLOOP - } else if (op == BC_IFORL) { - |.if DUALNUM - | jb <7 - |.else - | jb >2 - | branchPC RD - |.endif - } else { - | jnb =>BC_JLOOP - } - |.if DUALNUM - | jmp <6 - |.else - |2: - | ins_next - |.endif - | - |3: // Invert comparison if step is negative. - | ucomisd xmm0, xmm1 - | jmp <1 - break; - - case BC_ITERL: - |.if JIT - | hotloop RB - |.endif - | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op. - break; - - case BC_JITERL: -#if !LJ_HASJIT - break; -#endif - case BC_IITERL: - | ins_AJ // RA = base, RD = target - | lea RA, [BASE+RA*8] - | mov RB, [RA+4] - | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil. - if (op == BC_JITERL) { - | mov [RA-4], RB - | mov RB, [RA] - | mov [RA-8], RB - | jmp =>BC_JLOOP - } else { - | branchPC RD // Otherwise save control var + branch. - | mov RD, [RA] - | mov [RA-4], RB - | mov [RA-8], RD - } - |1: - | ins_next - break; - - case BC_LOOP: - | ins_A // RA = base, RD = target (loop extent) - | // Note: RA/RD is only used by trace recorder to determine scope/extent - | // This opcode does NOT jump, it's only purpose is to detect a hot loop. - |.if JIT - | hotloop RB - |.endif - | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. - break; - - case BC_ILOOP: - | ins_A // RA = base, RD = target (loop extent) - | ins_next - break; - - case BC_JLOOP: - |.if JIT - | ins_AD // RA = base (ignored), RD = traceno - | mov RA, [DISPATCH+DISPATCH_J(trace)] - | mov TRACE:RD, [RA+RD*4] - | mov RDa, TRACE:RD->mcode - | mov L:RB, SAVE_L - | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE - | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB - | // Save additional callee-save registers only used in compiled code. - |.if X64WIN - | mov TMPQ, r12 - | mov TMPa, r13 - | mov CSAVE_4, r14 - | mov CSAVE_3, r15 - | mov RAa, rsp - | sub rsp, 9*16+4*8 - | movdqa [RAa], xmm6 - | movdqa [RAa-1*16], xmm7 - | movdqa [RAa-2*16], xmm8 - | movdqa [RAa-3*16], xmm9 - | movdqa [RAa-4*16], xmm10 - | movdqa [RAa-5*16], xmm11 - | movdqa [RAa-6*16], xmm12 - | movdqa [RAa-7*16], xmm13 - | movdqa [RAa-8*16], xmm14 - | movdqa [RAa-9*16], xmm15 - |.elif X64 - | mov TMPQ, r12 - | mov TMPa, r13 - | sub rsp, 16 - |.endif - | jmp RDa - |.endif - break; - - case BC_JMP: - | ins_AJ // RA = unused, RD = target - | branchPC RD - | ins_next - break; - - /* -- Function headers -------------------------------------------------- */ - - /* - ** Reminder: A function may be called with func/args above L->maxstack, - ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot, - ** too. This means all FUNC* ops (including fast functions) must check - ** for stack overflow _before_ adding more slots! - */ - - case BC_FUNCF: - |.if JIT - | hotcall RB - |.endif - case BC_FUNCV: /* NYI: compiled vararg functions. */ - | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op. - break; - - case BC_JFUNCF: -#if !LJ_HASJIT - break; -#endif - case BC_IFUNCF: - | ins_AD // BASE = new base, RA = framesize, RD = nargs+1 - | mov KBASE, [PC-4+PC2PROTO(k)] - | mov L:RB, SAVE_L - | lea RA, [BASE+RA*8] // Top of frame. - | cmp RA, L:RB->maxstack - | ja ->vm_growstack_f - | movzx RA, byte [PC-4+PC2PROTO(numparams)] - | cmp NARGS:RD, RA // Check for missing parameters. - | jbe >3 - |2: - if (op == BC_JFUNCF) { - | movzx RD, PC_RD - | jmp =>BC_JLOOP - } else { - | ins_next - } - | - |3: // Clear missing parameters. - | mov dword [BASE+NARGS:RD*8-4], LJ_TNIL - | add NARGS:RD, 1 - | cmp NARGS:RD, RA - | jbe <3 - | jmp <2 - break; - - case BC_JFUNCV: -#if !LJ_HASJIT - break; -#endif - | int3 // NYI: compiled vararg functions - break; /* NYI: compiled vararg functions. */ - - case BC_IFUNCV: - | ins_AD // BASE = new base, RA = framesize, RD = nargs+1 - | lea RB, [NARGS:RD*8+FRAME_VARG] - | lea RD, [BASE+NARGS:RD*8] - | mov LFUNC:KBASE, [BASE-8] - | mov [RD-4], RB // Store delta + FRAME_VARG. - | mov [RD-8], LFUNC:KBASE // Store copy of LFUNC. - | mov L:RB, SAVE_L - | lea RA, [RD+RA*8] - | cmp RA, L:RB->maxstack - | ja ->vm_growstack_v // Need to grow stack. - | mov RA, BASE - | mov BASE, RD - | movzx RB, byte [PC-4+PC2PROTO(numparams)] - | test RB, RB - | jz >2 - |1: // Copy fixarg slots up to new frame. - | add RA, 8 - | cmp RA, BASE - | jnb >3 // Less args than parameters? - | mov KBASE, [RA-8] - | mov [RD], KBASE - | mov KBASE, [RA-4] - | mov [RD+4], KBASE - | add RD, 8 - | mov dword [RA-4], LJ_TNIL // Clear old fixarg slot (help the GC). - | sub RB, 1 - | jnz <1 - |2: - if (op == BC_JFUNCV) { - | movzx RD, PC_RD - | jmp =>BC_JLOOP - } else { - | mov KBASE, [PC-4+PC2PROTO(k)] - | ins_next - } - | - |3: // Clear missing parameters. - | mov dword [RD+4], LJ_TNIL - | add RD, 8 - | sub RB, 1 - | jnz <3 - | jmp <2 - break; - - case BC_FUNCC: - case BC_FUNCCW: - | ins_AD // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1 - | mov CFUNC:RB, [BASE-8] - | mov KBASEa, CFUNC:RB->f - | mov L:RB, SAVE_L - | lea RD, [BASE+NARGS:RD*8-8] - | mov L:RB->base, BASE - | lea RA, [RD+8*LUA_MINSTACK] - | cmp RA, L:RB->maxstack - | mov L:RB->top, RD - if (op == BC_FUNCC) { - |.if X64 - | mov CARG1d, L:RB // Caveat: CARG1d may be RA. - |.else - | mov ARG1, L:RB - |.endif - } else { - |.if X64 - | mov CARG2, KBASEa - | mov CARG1d, L:RB // Caveat: CARG1d may be RA. - |.else - | mov ARG2, KBASEa - | mov ARG1, L:RB - |.endif - } - | ja ->vm_growstack_c // Need to grow stack. - | set_vmstate C - if (op == BC_FUNCC) { - | call KBASEa // (lua_State *L) - } else { - | // (lua_State *L, lua_CFunction f) - | call aword [DISPATCH+DISPATCH_GL(wrapf)] - } - | // nresults returned in eax (RD). - | mov BASE, L:RB->base - | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB - | set_vmstate INTERP - | lea RA, [BASE+RD*8] - | neg RA - | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 - | mov PC, [BASE-4] // Fetch PC of caller. - | jmp ->vm_returnc - break; - - /* ---------------------------------------------------------------------- */ - - default: - fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); - exit(2); - break; - } -} - -static int build_backend(BuildCtx *ctx) -{ - int op; - dasm_growpc(Dst, BC__MAX); - build_subroutines(ctx); - |.code_op - for (op = 0; op < BC__MAX; op++) - build_ins(ctx, (BCOp)op, op); - return BC__MAX; -} - -/* Emit pseudo frame-info for all assembler functions. */ -static void emit_asm_debug(BuildCtx *ctx) -{ - int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); -#if LJ_64 -#define SZPTR "8" -#define BSZPTR "3" -#define REG_SP "0x7" -#define REG_RA "0x10" -#else -#define SZPTR "4" -#define BSZPTR "2" -#define REG_SP "0x4" -#define REG_RA "0x8" -#endif - switch (ctx->mode) { - case BUILD_elfasm: - fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); - fprintf(ctx->fp, - ".Lframe0:\n" - "\t.long .LECIE0-.LSCIE0\n" - ".LSCIE0:\n" - "\t.long 0xffffffff\n" - "\t.byte 0x1\n" - "\t.string \"\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -" SZPTR "\n" - "\t.byte " REG_RA "\n" - "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n" - "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n" - "\t.align " SZPTR "\n" - ".LECIE0:\n\n"); - fprintf(ctx->fp, - ".LSFDE0:\n" - "\t.long .LEFDE0-.LASFDE0\n" - ".LASFDE0:\n" - "\t.long .Lframe0\n" -#if LJ_64 - "\t.quad .Lbegin\n" - "\t.quad %d\n" - "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ - "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ - "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ - "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */ - "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */ -#if LJ_NO_UNWIND - "\t.byte 0x8d\n\t.uleb128 0x6\n" /* offset r13 */ - "\t.byte 0x8c\n\t.uleb128 0x7\n" /* offset r12 */ -#endif -#else - "\t.long .Lbegin\n" - "\t.long %d\n" - "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ - "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */ - "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */ - "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */ - "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */ -#endif - "\t.align " SZPTR "\n" - ".LEFDE0:\n\n", fcofs, CFRAME_SIZE); -#if LJ_HASFFI - fprintf(ctx->fp, - ".LSFDE1:\n" - "\t.long .LEFDE1-.LASFDE1\n" - ".LASFDE1:\n" - "\t.long .Lframe0\n" -#if LJ_64 - "\t.quad lj_vm_ffi_call\n" - "\t.quad %d\n" - "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */ - "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ - "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */ - "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ -#else - "\t.long lj_vm_ffi_call\n" - "\t.long %d\n" - "\t.byte 0xe\n\t.uleb128 8\n" /* def_cfa_offset */ - "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */ - "\t.byte 0xd\n\t.uleb128 0x5\n" /* def_cfa_register ebp */ - "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset ebx */ -#endif - "\t.align " SZPTR "\n" - ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); -#endif -#if !LJ_NO_UNWIND -#if (defined(__sun__) && defined(__svr4__)) -#if LJ_64 - fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n"); -#else - fprintf(ctx->fp, "\t.section .eh_frame,\"aw\",@progbits\n"); -#endif -#else - fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); -#endif - fprintf(ctx->fp, - ".Lframe1:\n" - "\t.long .LECIE1-.LSCIE1\n" - ".LSCIE1:\n" - "\t.long 0\n" - "\t.byte 0x1\n" - "\t.string \"zPR\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -" SZPTR "\n" - "\t.byte " REG_RA "\n" - "\t.uleb128 6\n" /* augmentation length */ - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.long lj_err_unwind_dwarf-.\n" - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n" - "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n" - "\t.align " SZPTR "\n" - ".LECIE1:\n\n"); - fprintf(ctx->fp, - ".LSFDE2:\n" - "\t.long .LEFDE2-.LASFDE2\n" - ".LASFDE2:\n" - "\t.long .LASFDE2-.Lframe1\n" - "\t.long .Lbegin-.\n" - "\t.long %d\n" - "\t.uleb128 0\n" /* augmentation length */ - "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ -#if LJ_64 - "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ - "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ - "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */ - "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */ -#else - "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */ - "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */ - "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */ - "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */ -#endif - "\t.align " SZPTR "\n" - ".LEFDE2:\n\n", fcofs, CFRAME_SIZE); -#if LJ_HASFFI - fprintf(ctx->fp, - ".Lframe2:\n" - "\t.long .LECIE2-.LSCIE2\n" - ".LSCIE2:\n" - "\t.long 0\n" - "\t.byte 0x1\n" - "\t.string \"zR\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -" SZPTR "\n" - "\t.byte " REG_RA "\n" - "\t.uleb128 1\n" /* augmentation length */ - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n" - "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n" - "\t.align " SZPTR "\n" - ".LECIE2:\n\n"); - fprintf(ctx->fp, - ".LSFDE3:\n" - "\t.long .LEFDE3-.LASFDE3\n" - ".LASFDE3:\n" - "\t.long .LASFDE3-.Lframe2\n" - "\t.long lj_vm_ffi_call-.\n" - "\t.long %d\n" - "\t.uleb128 0\n" /* augmentation length */ -#if LJ_64 - "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */ - "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ - "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */ - "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ -#else - "\t.byte 0xe\n\t.uleb128 8\n" /* def_cfa_offset */ - "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */ - "\t.byte 0xd\n\t.uleb128 0x5\n" /* def_cfa_register ebp */ - "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset ebx */ -#endif - "\t.align " SZPTR "\n" - ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); -#endif -#endif - break; -#if !LJ_NO_UNWIND - /* Mental note: never let Apple design an assembler. - ** Or a linker. Or a plastic case. But I digress. - */ - case BUILD_machasm: { -#if LJ_HASFFI - int fcsize = 0; -#endif - int i; - fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n"); - fprintf(ctx->fp, - "EH_frame1:\n" - "\t.set L$set$x,LECIEX-LSCIEX\n" - "\t.long L$set$x\n" - "LSCIEX:\n" - "\t.long 0\n" - "\t.byte 0x1\n" - "\t.ascii \"zPR\\0\"\n" - "\t.byte 0x1\n" - "\t.byte 128-" SZPTR "\n" - "\t.byte " REG_RA "\n" - "\t.byte 6\n" /* augmentation length */ - "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */ -#if LJ_64 - "\t.long _lj_err_unwind_dwarf+4@GOTPCREL\n" - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.byte " REG_SP "\n\t.byte " SZPTR "\n" -#else - "\t.long L_lj_err_unwind_dwarf$non_lazy_ptr-.\n" - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.byte 0x5\n\t.byte 0x4\n" /* esp=5 on 32 bit MACH-O. */ -#endif - "\t.byte 0x80+" REG_RA "\n\t.byte 0x1\n" - "\t.align " BSZPTR "\n" - "LECIEX:\n\n"); - for (i = 0; i < ctx->nsym; i++) { - const char *name = ctx->sym[i].name; - int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs; - if (size == 0) continue; -#if LJ_HASFFI - if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; } -#endif - fprintf(ctx->fp, - "%s.eh:\n" - "LSFDE%d:\n" - "\t.set L$set$%d,LEFDE%d-LASFDE%d\n" - "\t.long L$set$%d\n" - "LASFDE%d:\n" - "\t.long LASFDE%d-EH_frame1\n" - "\t.long %s-.\n" - "\t.long %d\n" - "\t.byte 0\n" /* augmentation length */ - "\t.byte 0xe\n\t.byte %d\n" /* def_cfa_offset */ -#if LJ_64 - "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */ - "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */ - "\t.byte 0x8f\n\t.byte 0x4\n" /* offset r15 */ - "\t.byte 0x8e\n\t.byte 0x5\n" /* offset r14 */ -#else - "\t.byte 0x84\n\t.byte 0x2\n" /* offset ebp (4 for MACH-O)*/ - "\t.byte 0x87\n\t.byte 0x3\n" /* offset edi */ - "\t.byte 0x86\n\t.byte 0x4\n" /* offset esi */ - "\t.byte 0x83\n\t.byte 0x5\n" /* offset ebx */ -#endif - "\t.align " BSZPTR "\n" - "LEFDE%d:\n\n", - name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i); - } -#if LJ_HASFFI - if (fcsize) { - fprintf(ctx->fp, - "EH_frame2:\n" - "\t.set L$set$y,LECIEY-LSCIEY\n" - "\t.long L$set$y\n" - "LSCIEY:\n" - "\t.long 0\n" - "\t.byte 0x1\n" - "\t.ascii \"zR\\0\"\n" - "\t.byte 0x1\n" - "\t.byte 128-" SZPTR "\n" - "\t.byte " REG_RA "\n" - "\t.byte 1\n" /* augmentation length */ -#if LJ_64 - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.byte " REG_SP "\n\t.byte " SZPTR "\n" -#else - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.byte 0x5\n\t.byte 0x4\n" /* esp=5 on 32 bit MACH. */ -#endif - "\t.byte 0x80+" REG_RA "\n\t.byte 0x1\n" - "\t.align " BSZPTR "\n" - "LECIEY:\n\n"); - fprintf(ctx->fp, - "_lj_vm_ffi_call.eh:\n" - "LSFDEY:\n" - "\t.set L$set$yy,LEFDEY-LASFDEY\n" - "\t.long L$set$yy\n" - "LASFDEY:\n" - "\t.long LASFDEY-EH_frame2\n" - "\t.long _lj_vm_ffi_call-.\n" - "\t.long %d\n" - "\t.byte 0\n" /* augmentation length */ -#if LJ_64 - "\t.byte 0xe\n\t.byte 16\n" /* def_cfa_offset */ - "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */ - "\t.byte 0xd\n\t.byte 0x6\n" /* def_cfa_register rbp */ - "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */ -#else - "\t.byte 0xe\n\t.byte 8\n" /* def_cfa_offset */ - "\t.byte 0x84\n\t.byte 0x2\n" /* offset ebp (4 for MACH-O)*/ - "\t.byte 0xd\n\t.byte 0x4\n" /* def_cfa_register ebp */ - "\t.byte 0x83\n\t.byte 0x3\n" /* offset ebx */ -#endif - "\t.align " BSZPTR "\n" - "LEFDEY:\n\n", fcsize); - } -#endif -#if !LJ_64 - fprintf(ctx->fp, - "\t.non_lazy_symbol_pointer\n" - "L_lj_err_unwind_dwarf$non_lazy_ptr:\n" - ".indirect_symbol _lj_err_unwind_dwarf\n" - ".long 0\n\n"); - fprintf(ctx->fp, "\t.section __IMPORT,__jump_table,symbol_stubs,pure_instructions+self_modifying_code,5\n"); - { - const char *const *xn; - for (xn = ctx->extnames; *xn; xn++) - if (strncmp(*xn, LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) - fprintf(ctx->fp, "L_%s$stub:\n\t.indirect_symbol _%s\n\t.ascii \"\\364\\364\\364\\364\\364\"\n", *xn, *xn); - } -#endif - fprintf(ctx->fp, ".subsections_via_symbols\n"); - } - break; -#endif - default: /* Difficult for other modes. */ - break; - } -} -