diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h deleted file mode 100644 index 37bfa40f2f..0000000000 --- a/src/lj_asm_arm.h +++ /dev/null @@ -1,2210 +0,0 @@ -/* -** ARM IR assembler (SSA IR -> machine code). -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -/* -- Register allocator extensions --------------------------------------- */ - -/* Allocate a register with a hint. */ -static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) -{ - Reg r = IR(ref)->r; - if (ra_noreg(r)) { - if (!ra_hashint(r) && !iscrossref(as, ref)) - ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ - r = ra_allocref(as, ref, allow); - } - ra_noweak(as, r); - return r; -} - -/* Allocate a scratch register pair. */ -static Reg ra_scratchpair(ASMState *as, RegSet allow) -{ - RegSet pick1 = as->freeset & allow; - RegSet pick2 = pick1 & (pick1 >> 1) & RSET_GPREVEN; - Reg r; - if (pick2) { - r = rset_picktop(pick2); - } else { - RegSet pick = pick1 & (allow >> 1) & RSET_GPREVEN; - if (pick) { - r = rset_picktop(pick); - ra_restore(as, regcost_ref(as->cost[r+1])); - } else { - pick = pick1 & (allow << 1) & RSET_GPRODD; - if (pick) { - r = ra_restore(as, regcost_ref(as->cost[rset_picktop(pick)-1])); - } else { - r = ra_evict(as, allow & (allow >> 1) & RSET_GPREVEN); - ra_restore(as, regcost_ref(as->cost[r+1])); - } - } - } - lua_assert(rset_test(RSET_GPREVEN, r)); - ra_modified(as, r); - ra_modified(as, r+1); - RA_DBGX((as, "scratchpair $r $r", r, r+1)); - return r; -} - -#if !LJ_SOFTFP -/* Allocate two source registers for three-operand instructions. */ -static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) -{ - IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); - Reg left = irl->r, right = irr->r; - if (ra_hasreg(left)) { - ra_noweak(as, left); - if (ra_noreg(right)) - right = ra_allocref(as, ir->op2, rset_exclude(allow, left)); - else - ra_noweak(as, right); - } else if (ra_hasreg(right)) { - ra_noweak(as, right); - left = ra_allocref(as, ir->op1, rset_exclude(allow, right)); - } else if (ra_hashint(right)) { - right = ra_allocref(as, ir->op2, allow); - left = ra_alloc1(as, ir->op1, rset_exclude(allow, right)); - } else { - left = ra_allocref(as, ir->op1, allow); - right = ra_alloc1(as, ir->op2, rset_exclude(allow, left)); - } - return left | (right << 8); -} -#endif - -/* -- Guard handling ------------------------------------------------------ */ - -/* Generate an exit stub group at the bottom of the reserved MCode memory. */ -static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) -{ - MCode *mxp = as->mcbot; - int i; - if (mxp + 4*4+4*EXITSTUBS_PER_GROUP >= as->mctop) - asm_mclimit(as); - /* str lr, [sp]; bl ->vm_exit_handler; .long DISPATCH_address, group. */ - *mxp++ = ARMI_STR|ARMI_LS_P|ARMI_LS_U|ARMF_D(RID_LR)|ARMF_N(RID_SP); - *mxp = ARMI_BL|((((MCode *)(void *)lj_vm_exit_handler-mxp)-2)&0x00ffffffu); - mxp++; - *mxp++ = (MCode)i32ptr(J2GG(as->J)->dispatch); /* DISPATCH address */ - *mxp++ = group*EXITSTUBS_PER_GROUP; - for (i = 0; i < EXITSTUBS_PER_GROUP; i++) - *mxp++ = ARMI_B|((-6-i)&0x00ffffffu); - lj_mcode_sync(as->mcbot, mxp); - lj_mcode_commitbot(as->J, mxp); - as->mcbot = mxp; - as->mclim = as->mcbot + MCLIM_REDZONE; - return mxp - EXITSTUBS_PER_GROUP; -} - -/* Setup all needed exit stubs. */ -static void asm_exitstub_setup(ASMState *as, ExitNo nexits) -{ - ExitNo i; - if (nexits >= EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR) - lj_trace_err(as->J, LJ_TRERR_SNAPOV); - for (i = 0; i < (nexits+EXITSTUBS_PER_GROUP-1)/EXITSTUBS_PER_GROUP; i++) - if (as->J->exitstubgroup[i] == NULL) - as->J->exitstubgroup[i] = asm_exitstub_gen(as, i); -} - -/* Emit conditional branch to exit for guard. */ -static void asm_guardcc(ASMState *as, ARMCC cc) -{ - MCode *target = exitstub_addr(as->J, as->snapno); - MCode *p = as->mcp; - if (LJ_UNLIKELY(p == as->invmcp)) { - as->loopinv = 1; - *p = ARMI_BL | ((target-p-2) & 0x00ffffffu); - emit_branch(as, ARMF_CC(ARMI_B, cc^1), p+1); - return; - } - emit_branch(as, ARMF_CC(ARMI_BL, cc), target); -} - -/* -- Operand fusion ------------------------------------------------------ */ - -/* Limit linear search to this distance. Avoids O(n^2) behavior. */ -#define CONFLICT_SEARCH_LIM 31 - -/* Check if there's no conflicting instruction between curins and ref. */ -static int noconflict(ASMState *as, IRRef ref, IROp conflict) -{ - IRIns *ir = as->ir; - IRRef i = as->curins; - if (i > ref + CONFLICT_SEARCH_LIM) - return 0; /* Give up, ref is too far away. */ - while (--i > ref) - if (ir[i].o == conflict) - return 0; /* Conflict found. */ - return 1; /* Ok, no conflict. */ -} - -/* Fuse the array base of colocated arrays. */ -static int32_t asm_fuseabase(ASMState *as, IRRef ref) -{ - IRIns *ir = IR(ref); - if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE && - !neverfuse(as) && noconflict(as, ref, IR_NEWREF)) - return (int32_t)sizeof(GCtab); - return 0; -} - -/* Fuse array/hash/upvalue reference into register+offset operand. */ -static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow, - int lim) -{ - IRIns *ir = IR(ref); - if (ra_noreg(ir->r)) { - if (ir->o == IR_AREF) { - if (mayfuse(as, ref)) { - if (irref_isk(ir->op2)) { - IRRef tab = IR(ir->op1)->op1; - int32_t ofs = asm_fuseabase(as, tab); - IRRef refa = ofs ? tab : ir->op1; - ofs += 8*IR(ir->op2)->i; - if (ofs > -lim && ofs < lim) { - *ofsp = ofs; - return ra_alloc1(as, refa, allow); - } - } - } - } else if (ir->o == IR_HREFK) { - if (mayfuse(as, ref)) { - int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); - if (ofs < lim) { - *ofsp = ofs; - return ra_alloc1(as, ir->op1, allow); - } - } - } else if (ir->o == IR_UREFC) { - if (irref_isk(ir->op1)) { - GCfunc *fn = ir_kfunc(IR(ir->op1)); - int32_t ofs = i32ptr(&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv); - *ofsp = (ofs & 255); /* Mask out less bits to allow LDRD. */ - return ra_allock(as, (ofs & ~255), allow); - } - } - } - *ofsp = 0; - return ra_alloc1(as, ref, allow); -} - -/* Fuse m operand into arithmetic/logic instructions. */ -static uint32_t asm_fuseopm(ASMState *as, ARMIns ai, IRRef ref, RegSet allow) -{ - IRIns *ir = IR(ref); - if (ra_hasreg(ir->r)) { - ra_noweak(as, ir->r); - return ARMF_M(ir->r); - } else if (irref_isk(ref)) { - uint32_t k = emit_isk12(ai, ir->i); - if (k) - return k; - } else if (mayfuse(as, ref)) { - if (ir->o >= IR_BSHL && ir->o <= IR_BROR) { - Reg m = ra_alloc1(as, ir->op1, allow); - ARMShift sh = ir->o == IR_BSHL ? ARMSH_LSL : - ir->o == IR_BSHR ? ARMSH_LSR : - ir->o == IR_BSAR ? ARMSH_ASR : ARMSH_ROR; - if (irref_isk(ir->op2)) { - return m | ARMF_SH(sh, (IR(ir->op2)->i & 31)); - } else { - Reg s = ra_alloc1(as, ir->op2, rset_exclude(allow, m)); - return m | ARMF_RSH(sh, s); - } - } else if (ir->o == IR_ADD && ir->op1 == ir->op2) { - Reg m = ra_alloc1(as, ir->op1, allow); - return m | ARMF_SH(ARMSH_LSL, 1); - } - } - return ra_allocref(as, ref, allow); -} - -/* Fuse shifts into loads/stores. Only bother with BSHL 2 => lsl #2. */ -static IRRef asm_fuselsl2(ASMState *as, IRRef ref) -{ - IRIns *ir = IR(ref); - if (ra_noreg(ir->r) && mayfuse(as, ref) && ir->o == IR_BSHL && - irref_isk(ir->op2) && IR(ir->op2)->i == 2) - return ir->op1; - return 0; /* No fusion. */ -} - -/* Fuse XLOAD/XSTORE reference into load/store operand. */ -static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref, - RegSet allow, int32_t ofs) -{ - IRIns *ir = IR(ref); - Reg base; - if (ra_noreg(ir->r) && canfuse(as, ir)) { - int32_t lim = (!LJ_SOFTFP && (ai & 0x08000000)) ? 1024 : - (ai & 0x04000000) ? 4096 : 256; - if (ir->o == IR_ADD) { - int32_t ofs2; - if (irref_isk(ir->op2) && - (ofs2 = ofs + IR(ir->op2)->i) > -lim && ofs2 < lim && - (!(!LJ_SOFTFP && (ai & 0x08000000)) || !(ofs2 & 3))) { - ofs = ofs2; - ref = ir->op1; - } else if (ofs == 0 && !(!LJ_SOFTFP && (ai & 0x08000000))) { - IRRef lref = ir->op1, rref = ir->op2; - Reg rn, rm; - if ((ai & 0x04000000)) { - IRRef sref = asm_fuselsl2(as, rref); - if (sref) { - rref = sref; - ai |= ARMF_SH(ARMSH_LSL, 2); - } else if ((sref = asm_fuselsl2(as, lref)) != 0) { - lref = rref; - rref = sref; - ai |= ARMF_SH(ARMSH_LSL, 2); - } - } - rn = ra_alloc1(as, lref, allow); - rm = ra_alloc1(as, rref, rset_exclude(allow, rn)); - if ((ai & 0x04000000)) ai |= ARMI_LS_R; - emit_dnm(as, ai|ARMI_LS_P|ARMI_LS_U, rd, rn, rm); - return; - } - } else if (ir->o == IR_STRREF && !(!LJ_SOFTFP && (ai & 0x08000000))) { - lua_assert(ofs == 0); - ofs = (int32_t)sizeof(GCstr); - if (irref_isk(ir->op2)) { - ofs += IR(ir->op2)->i; - ref = ir->op1; - } else if (irref_isk(ir->op1)) { - ofs += IR(ir->op1)->i; - ref = ir->op2; - } else { - /* NYI: Fuse ADD with constant. */ - Reg rn = ra_alloc1(as, ir->op1, allow); - uint32_t m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn)); - if ((ai & 0x04000000)) - emit_lso(as, ai, rd, rd, ofs); - else - emit_lsox(as, ai, rd, rd, ofs); - emit_dn(as, ARMI_ADD^m, rd, rn); - return; - } - if (ofs <= -lim || ofs >= lim) { - Reg rn = ra_alloc1(as, ref, allow); - Reg rm = ra_allock(as, ofs, rset_exclude(allow, rn)); - if ((ai & 0x04000000)) ai |= ARMI_LS_R; - emit_dnm(as, ai|ARMI_LS_P|ARMI_LS_U, rd, rn, rm); - return; - } - } - } - base = ra_alloc1(as, ref, allow); -#if !LJ_SOFTFP - if ((ai & 0x08000000)) - emit_vlso(as, ai, rd, base, ofs); - else -#endif - if ((ai & 0x04000000)) - emit_lso(as, ai, rd, base, ofs); - else - emit_lsox(as, ai, rd, base, ofs); -} - -#if !LJ_SOFTFP -/* Fuse to multiply-add/sub instruction. */ -static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air) -{ - IRRef lref = ir->op1, rref = ir->op2; - IRIns *irm; - if (lref != rref && - ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && - ra_noreg(irm->r)) || - (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && - (rref = lref, ai = air, ra_noreg(irm->r))))) { - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg add = ra_hintalloc(as, rref, dest, RSET_FPR); - Reg right, left = ra_alloc2(as, irm, - rset_exclude(rset_exclude(RSET_FPR, dest), add)); - right = (left >> 8); left &= 255; - emit_dnm(as, ai, (dest & 15), (left & 15), (right & 15)); - if (dest != add) emit_dm(as, ARMI_VMOV_D, (dest & 15), (add & 15)); - return 1; - } - return 0; -} -#endif - -/* -- Calls --------------------------------------------------------------- */ - -/* Generate a call to a C function. */ -static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) -{ - uint32_t n, nargs = CCI_XNARGS(ci); - int32_t ofs = 0; -#if LJ_SOFTFP - Reg gpr = REGARG_FIRSTGPR; -#else - Reg gpr, fpr = REGARG_FIRSTFPR, fprodd = 0; -#endif - if ((void *)ci->func) - emit_call(as, (void *)ci->func); -#if !LJ_SOFTFP - for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) - as->cost[gpr] = REGCOST(~0u, ASMREF_L); - gpr = REGARG_FIRSTGPR; -#endif - for (n = 0; n < nargs; n++) { /* Setup args. */ - IRRef ref = args[n]; - IRIns *ir = IR(ref); -#if !LJ_SOFTFP - if (ref && irt_isfp(ir->t)) { - RegSet of = as->freeset; - Reg src; - if (!LJ_ABI_SOFTFP && !(ci->flags & CCI_VARARG)) { - if (irt_isnum(ir->t)) { - if (fpr <= REGARG_LASTFPR) { - ra_leftov(as, fpr, ref); - fpr++; - continue; - } - } else if (fprodd) { /* Ick. */ - src = ra_alloc1(as, ref, RSET_FPR); - emit_dm(as, ARMI_VMOV_S, (fprodd & 15), (src & 15) | 0x00400000); - fprodd = 0; - continue; - } else if (fpr <= REGARG_LASTFPR) { - ra_leftov(as, fpr, ref); - fprodd = fpr++; - continue; - } - /* Workaround to protect argument GPRs from being used for remat. */ - as->freeset &= ~RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1); - src = ra_alloc1(as, ref, RSET_FPR); /* May alloc GPR to remat FPR. */ - as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); - fprodd = 0; - goto stackfp; - } - /* Workaround to protect argument GPRs from being used for remat. */ - as->freeset &= ~RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1); - src = ra_alloc1(as, ref, RSET_FPR); /* May alloc GPR to remat FPR. */ - as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); - if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1u; - if (gpr <= REGARG_LASTGPR) { - lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ - if (irt_isnum(ir->t)) { - lua_assert(rset_test(as->freeset, gpr+1)); /* Ditto. */ - emit_dnm(as, ARMI_VMOV_RR_D, gpr, gpr+1, (src & 15)); - gpr += 2; - } else { - emit_dn(as, ARMI_VMOV_R_S, gpr, (src & 15)); - gpr++; - } - } else { - stackfp: - if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4; - emit_spstore(as, ir, src, ofs); - ofs += irt_isnum(ir->t) ? 8 : 4; - } - } else -#endif - { - if (gpr <= REGARG_LASTGPR) { - lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ - if (ref) ra_leftov(as, gpr, ref); - gpr++; - } else { - if (ref) { - Reg r = ra_alloc1(as, ref, RSET_GPR); - emit_spstore(as, ir, r, ofs); - } - ofs += 4; - } - } - } -} - -/* Setup result reg/sp for call. Evict scratch regs. */ -static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) -{ - RegSet drop = RSET_SCRATCH; - int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); - if (ra_hasreg(ir->r)) - rset_clear(drop, ir->r); /* Dest reg handled below. */ - if (hiop && ra_hasreg((ir+1)->r)) - rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ - ra_evictset(as, drop); /* Evictions must be performed first. */ - if (ra_used(ir)) { - lua_assert(!irt_ispri(ir->t)); - if (!LJ_SOFTFP && irt_isfp(ir->t)) { - if (LJ_ABI_SOFTFP || (ci->flags & (CCI_CASTU64|CCI_VARARG))) { - Reg dest = (ra_dest(as, ir, RSET_FPR) & 15); - if (irt_isnum(ir->t)) - emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, dest); - else - emit_dn(as, ARMI_VMOV_S_R, RID_RET, dest); - } else { - ra_destreg(as, ir, RID_FPRET); - } - } else if (hiop) { - ra_destpair(as, ir); - } else { - ra_destreg(as, ir, RID_RET); - } - } - UNUSED(ci); -} - -static void asm_callx(ASMState *as, IRIns *ir) -{ - IRRef args[CCI_NARGS_MAX*2]; - CCallInfo ci; - IRRef func; - IRIns *irf; - ci.flags = asm_callx_flags(as, ir); - asm_collectargs(as, ir, &ci, args); - asm_setupresult(as, ir, &ci); - func = ir->op2; irf = IR(func); - if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } - if (irref_isk(func)) { /* Call to constant address. */ - ci.func = (ASMFunction)(void *)(irf->i); - } else { /* Need a non-argument register for indirect calls. */ - Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_R4, RID_R12+1)); - emit_m(as, ARMI_BLXr, freg); - ci.func = (ASMFunction)(void *)0; - } - asm_gencall(as, &ci, args); -} - -/* -- Returns ------------------------------------------------------------- */ - -/* Return to lower frame. Guard that it goes to the right spot. */ -static void asm_retf(ASMState *as, IRIns *ir) -{ - Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); - void *pc = ir_kptr(IR(ir->op2)); - int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); - as->topslot -= (BCReg)delta; - if ((int32_t)as->topslot < 0) as->topslot = 0; - irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ - /* Need to force a spill on REF_BASE now to update the stack slot. */ - emit_lso(as, ARMI_STR, base, RID_SP, ra_spill(as, IR(REF_BASE))); - emit_setgl(as, base, jit_base); - emit_addptr(as, base, -8*delta); - asm_guardcc(as, CC_NE); - emit_nm(as, ARMI_CMP, RID_TMP, - ra_allock(as, i32ptr(pc), rset_exclude(RSET_GPR, base))); - emit_lso(as, ARMI_LDR, RID_TMP, base, -4); -} - -/* -- Type conversions ---------------------------------------------------- */ - -#if !LJ_SOFTFP -static void asm_tointg(ASMState *as, IRIns *ir, Reg left) -{ - Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); - Reg dest = ra_dest(as, ir, RSET_GPR); - asm_guardcc(as, CC_NE); - emit_d(as, ARMI_VMRS, 0); - emit_dm(as, ARMI_VCMP_D, (tmp & 15), (left & 15)); - emit_dm(as, ARMI_VCVT_F64_S32, (tmp & 15), (tmp & 15)); - emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); - emit_dm(as, ARMI_VCVT_S32_F64, (tmp & 15), (left & 15)); -} - -static void asm_tobit(ASMState *as, IRIns *ir) -{ - RegSet allow = RSET_FPR; - Reg left = ra_alloc1(as, ir->op1, allow); - Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); - Reg tmp = ra_scratch(as, rset_clear(allow, right)); - Reg dest = ra_dest(as, ir, RSET_GPR); - emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); - emit_dnm(as, ARMI_VADD_D, (tmp & 15), (left & 15), (right & 15)); -} -#endif - -static void asm_conv(ASMState *as, IRIns *ir) -{ - IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); -#if !LJ_SOFTFP - int stfp = (st == IRT_NUM || st == IRT_FLOAT); -#endif - IRRef lref = ir->op1; - /* 64 bit integer conversions are handled by SPLIT. */ - lua_assert(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64)); -#if LJ_SOFTFP - /* FP conversions are handled by SPLIT. */ - lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); - /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ -#else - lua_assert(irt_type(ir->t) != st); - if (irt_isfp(ir->t)) { - Reg dest = ra_dest(as, ir, RSET_FPR); - if (stfp) { /* FP to FP conversion. */ - emit_dm(as, st == IRT_NUM ? ARMI_VCVT_F32_F64 : ARMI_VCVT_F64_F32, - (dest & 15), (ra_alloc1(as, lref, RSET_FPR) & 15)); - } else { /* Integer to FP conversion. */ - Reg left = ra_alloc1(as, lref, RSET_GPR); - ARMIns ai = irt_isfloat(ir->t) ? - (st == IRT_INT ? ARMI_VCVT_F32_S32 : ARMI_VCVT_F32_U32) : - (st == IRT_INT ? ARMI_VCVT_F64_S32 : ARMI_VCVT_F64_U32); - emit_dm(as, ai, (dest & 15), (dest & 15)); - emit_dn(as, ARMI_VMOV_S_R, left, (dest & 15)); - } - } else if (stfp) { /* FP to integer conversion. */ - if (irt_isguard(ir->t)) { - /* Checked conversions are only supported from number to int. */ - lua_assert(irt_isint(ir->t) && st == IRT_NUM); - asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); - } else { - Reg left = ra_alloc1(as, lref, RSET_FPR); - Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); - Reg dest = ra_dest(as, ir, RSET_GPR); - ARMIns ai; - emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); - ai = irt_isint(ir->t) ? - (st == IRT_NUM ? ARMI_VCVT_S32_F64 : ARMI_VCVT_S32_F32) : - (st == IRT_NUM ? ARMI_VCVT_U32_F64 : ARMI_VCVT_U32_F32); - emit_dm(as, ai, (tmp & 15), (left & 15)); - } - } else -#endif - { - Reg dest = ra_dest(as, ir, RSET_GPR); - if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ - Reg left = ra_alloc1(as, lref, RSET_GPR); - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); - if ((as->flags & JIT_F_ARMV6)) { - ARMIns ai = st == IRT_I8 ? ARMI_SXTB : - st == IRT_U8 ? ARMI_UXTB : - st == IRT_I16 ? ARMI_SXTH : ARMI_UXTH; - emit_dm(as, ai, dest, left); - } else if (st == IRT_U8) { - emit_dn(as, ARMI_AND|ARMI_K12|255, dest, left); - } else { - uint32_t shift = st == IRT_I8 ? 24 : 16; - ARMShift sh = st == IRT_U16 ? ARMSH_LSR : ARMSH_ASR; - emit_dm(as, ARMI_MOV|ARMF_SH(sh, shift), dest, RID_TMP); - emit_dm(as, ARMI_MOV|ARMF_SH(ARMSH_LSL, shift), RID_TMP, left); - } - } else { /* Handle 32/32 bit no-op (cast). */ - ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ - } - } -} - -static void asm_strto(ASMState *as, IRIns *ir) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; - IRRef args[2]; - Reg rlo = 0, rhi = 0, tmp; - int destused = ra_used(ir); - int32_t ofs = 0; - ra_evictset(as, RSET_SCRATCH); -#if LJ_SOFTFP - if (destused) { - if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) && - (ir->s & 1) == 0 && ir->s + 1 == (ir+1)->s) { - int i; - for (i = 0; i < 2; i++) { - Reg r = (ir+i)->r; - if (ra_hasreg(r)) { - ra_free(as, r); - ra_modified(as, r); - emit_spload(as, ir+i, r, sps_scale((ir+i)->s)); - } - } - ofs = sps_scale(ir->s); - destused = 0; - } else { - rhi = ra_dest(as, ir+1, RSET_GPR); - rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi)); - } - } - asm_guardcc(as, CC_EQ); - if (destused) { - emit_lso(as, ARMI_LDR, rhi, RID_SP, 4); - emit_lso(as, ARMI_LDR, rlo, RID_SP, 0); - } -#else - UNUSED(rhi); - if (destused) { - if (ra_hasspill(ir->s)) { - ofs = sps_scale(ir->s); - destused = 0; - if (ra_hasreg(ir->r)) { - ra_free(as, ir->r); - ra_modified(as, ir->r); - emit_spload(as, ir, ir->r, ofs); - } - } else { - rlo = ra_dest(as, ir, RSET_FPR); - } - } - asm_guardcc(as, CC_EQ); - if (destused) - emit_vlso(as, ARMI_VLDR_D, rlo, RID_SP, 0); -#endif - emit_n(as, ARMI_CMP|ARMI_K12|0, RID_RET); /* Test return status. */ - args[0] = ir->op1; /* GCstr *str */ - args[1] = ASMREF_TMP1; /* TValue *n */ - asm_gencall(as, ci, args); - tmp = ra_releasetmp(as, ASMREF_TMP1); - if (ofs == 0) - emit_dm(as, ARMI_MOV, tmp, RID_SP); - else - emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR); -} - -/* -- Memory references --------------------------------------------------- */ - -/* Get pointer to TValue. */ -static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) -{ - IRIns *ir = IR(ref); - if (irt_isnum(ir->t)) { - if (irref_isk(ref)) { - /* Use the number constant itself as a TValue. */ - ra_allockreg(as, i32ptr(ir_knum(ir)), dest); - } else { -#if LJ_SOFTFP - lua_assert(0); -#else - /* Otherwise force a spill and use the spill slot. */ - emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR); -#endif - } - } else { - /* Otherwise use [sp] and [sp+4] to hold the TValue. */ - RegSet allow = rset_exclude(RSET_GPR, dest); - Reg type; - emit_dm(as, ARMI_MOV, dest, RID_SP); - if (!irt_ispri(ir->t)) { - Reg src = ra_alloc1(as, ref, allow); - emit_lso(as, ARMI_STR, src, RID_SP, 0); - } - if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) - type = ra_alloc1(as, ref+1, allow); - else - type = ra_allock(as, irt_toitype(ir->t), allow); - emit_lso(as, ARMI_STR, type, RID_SP, 4); - } -} - -static void asm_aref(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg idx, base; - if (irref_isk(ir->op2)) { - IRRef tab = IR(ir->op1)->op1; - int32_t ofs = asm_fuseabase(as, tab); - IRRef refa = ofs ? tab : ir->op1; - uint32_t k = emit_isk12(ARMI_ADD, ofs + 8*IR(ir->op2)->i); - if (k) { - base = ra_alloc1(as, refa, RSET_GPR); - emit_dn(as, ARMI_ADD^k, dest, base); - return; - } - } - base = ra_alloc1(as, ir->op1, RSET_GPR); - idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); - emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 3), dest, base, idx); -} - -/* Inlined hash lookup. Specialized for key type and for const keys. -** The equivalent C code is: -** Node *n = hashkey(t, key); -** do { -** if (lj_obj_equal(&n->key, key)) return &n->val; -** } while ((n = nextnode(n))); -** return niltv(L); -*/ -static void asm_href(ASMState *as, IRIns *ir, IROp merge) -{ - RegSet allow = RSET_GPR; - int destused = ra_used(ir); - Reg dest = ra_dest(as, ir, allow); - Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); - Reg key = 0, keyhi = 0, keynumhi = RID_NONE, tmp = RID_TMP; - IRRef refkey = ir->op2; - IRIns *irkey = IR(refkey); - IRType1 kt = irkey->t; - int32_t k = 0, khi = emit_isk12(ARMI_CMP, irt_toitype(kt)); - uint32_t khash; - MCLabel l_end, l_loop; - rset_clear(allow, tab); - if (!irref_isk(refkey) || irt_isstr(kt)) { -#if LJ_SOFTFP - key = ra_alloc1(as, refkey, allow); - rset_clear(allow, key); - if (irkey[1].o == IR_HIOP) { - if (ra_hasreg((irkey+1)->r)) { - keynumhi = (irkey+1)->r; - keyhi = RID_TMP; - ra_noweak(as, keynumhi); - } else { - keyhi = keynumhi = ra_allocref(as, refkey+1, allow); - } - rset_clear(allow, keynumhi); - khi = 0; - } -#else - if (irt_isnum(kt)) { - key = ra_scratch(as, allow); - rset_clear(allow, key); - keyhi = keynumhi = ra_scratch(as, allow); - rset_clear(allow, keyhi); - khi = 0; - } else { - key = ra_alloc1(as, refkey, allow); - rset_clear(allow, key); - } -#endif - } else if (irt_isnum(kt)) { - int32_t val = (int32_t)ir_knum(irkey)->u32.lo; - k = emit_isk12(ARMI_CMP, val); - if (!k) { - key = ra_allock(as, val, allow); - rset_clear(allow, key); - } - val = (int32_t)ir_knum(irkey)->u32.hi; - khi = emit_isk12(ARMI_CMP, val); - if (!khi) { - keyhi = ra_allock(as, val, allow); - rset_clear(allow, keyhi); - } - } else if (!irt_ispri(kt)) { - k = emit_isk12(ARMI_CMP, irkey->i); - if (!k) { - key = ra_alloc1(as, refkey, allow); - rset_clear(allow, key); - } - } - if (!irt_ispri(kt)) - tmp = ra_scratchpair(as, allow); - - /* Key not found in chain: jump to exit (if merged) or load niltv. */ - l_end = emit_label(as); - as->invmcp = NULL; - if (merge == IR_NE) - asm_guardcc(as, CC_AL); - else if (destused) - emit_loada(as, dest, niltvg(J2G(as->J))); - - /* Follow hash chain until the end. */ - l_loop = --as->mcp; - emit_n(as, ARMI_CMP|ARMI_K12|0, dest); - emit_lso(as, ARMI_LDR, dest, dest, (int32_t)offsetof(Node, next)); - - /* Type and value comparison. */ - if (merge == IR_EQ) - asm_guardcc(as, CC_EQ); - else - emit_branch(as, ARMF_CC(ARMI_B, CC_EQ), l_end); - if (!irt_ispri(kt)) { - emit_nm(as, ARMF_CC(ARMI_CMP, CC_EQ)^k, tmp, key); - emit_nm(as, ARMI_CMP^khi, tmp+1, keyhi); - emit_lsox(as, ARMI_LDRD, tmp, dest, (int32_t)offsetof(Node, key)); - } else { - emit_n(as, ARMI_CMP^khi, tmp); - emit_lso(as, ARMI_LDR, tmp, dest, (int32_t)offsetof(Node, key.it)); - } - *l_loop = ARMF_CC(ARMI_B, CC_NE) | ((as->mcp-l_loop-2) & 0x00ffffffu); - - /* Load main position relative to tab->node into dest. */ - khash = irref_isk(refkey) ? ir_khash(irkey) : 1; - if (khash == 0) { - emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); - } else { - emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 3), dest, dest, tmp); - emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 1), tmp, tmp, tmp); - if (irt_isstr(kt)) { /* Fetch of str->hash is cheaper than ra_allock. */ - emit_dnm(as, ARMI_AND, tmp, tmp+1, RID_TMP); - emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); - emit_lso(as, ARMI_LDR, tmp+1, key, (int32_t)offsetof(GCstr, hash)); - emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask)); - } else if (irref_isk(refkey)) { - emit_opk(as, ARMI_AND, tmp, RID_TMP, (int32_t)khash, - rset_exclude(rset_exclude(RSET_GPR, tab), dest)); - emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); - emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask)); - } else { /* Must match with hash*() in lj_tab.c. */ - if (ra_hasreg(keynumhi)) { /* Canonicalize +-0.0 to 0.0. */ - if (keyhi == RID_TMP) - emit_dm(as, ARMF_CC(ARMI_MOV, CC_NE), keyhi, keynumhi); - emit_d(as, ARMF_CC(ARMI_MOV, CC_EQ)|ARMI_K12|0, keyhi); - } - emit_dnm(as, ARMI_AND, tmp, tmp, RID_TMP); - emit_dnm(as, ARMI_SUB|ARMF_SH(ARMSH_ROR, 32-HASH_ROT3), tmp, tmp, tmp+1); - emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); - emit_dnm(as, ARMI_EOR|ARMF_SH(ARMSH_ROR, 32-((HASH_ROT2+HASH_ROT1)&31)), - tmp, tmp+1, tmp); - emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask)); - emit_dnm(as, ARMI_SUB|ARMF_SH(ARMSH_ROR, 32-HASH_ROT1), tmp+1, tmp+1, tmp); - if (ra_hasreg(keynumhi)) { - emit_dnm(as, ARMI_EOR, tmp+1, tmp, key); - emit_dnm(as, ARMI_ORR|ARMI_S, RID_TMP, tmp, key); /* Test for +-0.0. */ - emit_dnm(as, ARMI_ADD, tmp, keynumhi, keynumhi); -#if !LJ_SOFTFP - emit_dnm(as, ARMI_VMOV_RR_D, key, keynumhi, - (ra_alloc1(as, refkey, RSET_FPR) & 15)); -#endif - } else { - emit_dnm(as, ARMI_EOR, tmp+1, tmp, key); - emit_opk(as, ARMI_ADD, tmp, key, (int32_t)HASH_BIAS, - rset_exclude(rset_exclude(RSET_GPR, tab), key)); - } - } - } -} - -static void asm_hrefk(ASMState *as, IRIns *ir) -{ - IRIns *kslot = IR(ir->op2); - IRIns *irkey = IR(kslot->op1); - int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); - int32_t kofs = ofs + (int32_t)offsetof(Node, key); - Reg dest = (ra_used(ir) || ofs > 4095) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; - Reg node = ra_alloc1(as, ir->op1, RSET_GPR); - Reg key = RID_NONE, type = RID_TMP, idx = node; - RegSet allow = rset_exclude(RSET_GPR, node); - lua_assert(ofs % sizeof(Node) == 0); - if (ofs > 4095) { - idx = dest; - rset_clear(allow, dest); - kofs = (int32_t)offsetof(Node, key); - } else if (ra_hasreg(dest)) { - emit_opk(as, ARMI_ADD, dest, node, ofs, allow); - } - asm_guardcc(as, CC_NE); - if (!irt_ispri(irkey->t)) { - RegSet even = (as->freeset & allow); - even = even & (even >> 1) & RSET_GPREVEN; - if (even) { - key = ra_scratch(as, even); - if (rset_test(as->freeset, key+1)) { - type = key+1; - ra_modified(as, type); - } - } else { - key = ra_scratch(as, allow); - } - rset_clear(allow, key); - } - rset_clear(allow, type); - if (irt_isnum(irkey->t)) { - emit_opk(as, ARMF_CC(ARMI_CMP, CC_EQ), 0, type, - (int32_t)ir_knum(irkey)->u32.hi, allow); - emit_opk(as, ARMI_CMP, 0, key, - (int32_t)ir_knum(irkey)->u32.lo, allow); - } else { - if (ra_hasreg(key)) - emit_opk(as, ARMF_CC(ARMI_CMP, CC_EQ), 0, key, irkey->i, allow); - emit_n(as, ARMI_CMN|ARMI_K12|-irt_toitype(irkey->t), type); - } - emit_lso(as, ARMI_LDR, type, idx, kofs+4); - if (ra_hasreg(key)) emit_lso(as, ARMI_LDR, key, idx, kofs); - if (ofs > 4095) - emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR); -} - -static void asm_uref(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - if (irref_isk(ir->op1)) { - GCfunc *fn = ir_kfunc(IR(ir->op1)); - MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; - emit_lsptr(as, ARMI_LDR, dest, v); - } else { - Reg uv = ra_scratch(as, RSET_GPR); - Reg func = ra_alloc1(as, ir->op1, RSET_GPR); - if (ir->o == IR_UREFC) { - asm_guardcc(as, CC_NE); - emit_n(as, ARMI_CMP|ARMI_K12|1, RID_TMP); - emit_opk(as, ARMI_ADD, dest, uv, - (int32_t)offsetof(GCupval, tv), RSET_GPR); - emit_lso(as, ARMI_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); - } else { - emit_lso(as, ARMI_LDR, dest, uv, (int32_t)offsetof(GCupval, v)); - } - emit_lso(as, ARMI_LDR, uv, func, - (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); - } -} - -static void asm_fref(ASMState *as, IRIns *ir) -{ - UNUSED(as); UNUSED(ir); - lua_assert(!ra_used(ir)); -} - -static void asm_strref(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - IRRef ref = ir->op2, refk = ir->op1; - Reg r; - if (irref_isk(ref)) { - IRRef tmp = refk; refk = ref; ref = tmp; - } else if (!irref_isk(refk)) { - uint32_t k, m = ARMI_K12|sizeof(GCstr); - Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); - IRIns *irr = IR(ir->op2); - if (ra_hasreg(irr->r)) { - ra_noweak(as, irr->r); - right = irr->r; - } else if (mayfuse(as, irr->op2) && - irr->o == IR_ADD && irref_isk(irr->op2) && - (k = emit_isk12(ARMI_ADD, - (int32_t)sizeof(GCstr) + IR(irr->op2)->i))) { - m = k; - right = ra_alloc1(as, irr->op1, rset_exclude(RSET_GPR, left)); - } else { - right = ra_allocref(as, ir->op2, rset_exclude(RSET_GPR, left)); - } - emit_dn(as, ARMI_ADD^m, dest, dest); - emit_dnm(as, ARMI_ADD, dest, left, right); - return; - } - r = ra_alloc1(as, ref, RSET_GPR); - emit_opk(as, ARMI_ADD, dest, r, - sizeof(GCstr) + IR(refk)->i, rset_exclude(RSET_GPR, r)); -} - -/* -- Loads and stores ---------------------------------------------------- */ - -static ARMIns asm_fxloadins(IRIns *ir) -{ - switch (irt_type(ir->t)) { - case IRT_I8: return ARMI_LDRSB; - case IRT_U8: return ARMI_LDRB; - case IRT_I16: return ARMI_LDRSH; - case IRT_U16: return ARMI_LDRH; - case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VLDR_D; - case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VLDR_S; - default: return ARMI_LDR; - } -} - -static ARMIns asm_fxstoreins(IRIns *ir) -{ - switch (irt_type(ir->t)) { - case IRT_I8: case IRT_U8: return ARMI_STRB; - case IRT_I16: case IRT_U16: return ARMI_STRH; - case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VSTR_D; - case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VSTR_S; - default: return ARMI_STR; - } -} - -static void asm_fload(ASMState *as, IRIns *ir) -{ - if (ir->op1 == REF_NIL) { - lua_assert(!ra_used(ir)); /* We can end up here if DCE is turned off. */ - } else { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); - ARMIns ai = asm_fxloadins(ir); - int32_t ofs; - if (ir->op2 == IRFL_TAB_ARRAY) { - ofs = asm_fuseabase(as, ir->op1); - if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ - emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx); - return; - } - } - ofs = field_ofs[ir->op2]; - if ((ai & 0x04000000)) - emit_lso(as, ai, dest, idx, ofs); - else - emit_lsox(as, ai, dest, idx, ofs); - } -} - -static void asm_fstore(ASMState *as, IRIns *ir) -{ - if (ir->r != RID_SINK) { - Reg src = ra_alloc1(as, ir->op2, RSET_GPR); - IRIns *irf = IR(ir->op1); - Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); - int32_t ofs = field_ofs[irf->op2]; - ARMIns ai = asm_fxstoreins(ir); - if ((ai & 0x04000000)) - emit_lso(as, ai, src, idx, ofs); - else - emit_lsox(as, ai, src, idx, ofs); - } -} - -static void asm_xload(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, - (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); - lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); - asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); -} - -static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) -{ - if (ir->r != RID_SINK) { - Reg src = ra_alloc1(as, ir->op2, - (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); - asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, - rset_exclude(RSET_GPR, src), ofs); - } -} - -#define asm_xstore(as, ir) asm_xstore_(as, ir, 0) - -static void asm_ahuvload(ASMState *as, IRIns *ir) -{ - int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); - IRType t = hiop ? IRT_NUM : irt_type(ir->t); - Reg dest = RID_NONE, type = RID_NONE, idx; - RegSet allow = RSET_GPR; - int32_t ofs = 0; - if (hiop && ra_used(ir+1)) { - type = ra_dest(as, ir+1, allow); - rset_clear(allow, type); - } - if (ra_used(ir)) { - lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || - irt_isint(ir->t) || irt_isaddr(ir->t)); - dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); - rset_clear(allow, dest); - } - idx = asm_fuseahuref(as, ir->op1, &ofs, allow, - (!LJ_SOFTFP && t == IRT_NUM) ? 1024 : 4096); - if (!hiop || type == RID_NONE) { - rset_clear(allow, idx); - if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 && - rset_test((as->freeset & allow), dest+1)) { - type = dest+1; - ra_modified(as, type); - } else { - type = RID_TMP; - } - } - asm_guardcc(as, t == IRT_NUM ? CC_HS : CC_NE); - emit_n(as, ARMI_CMN|ARMI_K12|-irt_toitype_(t), type); - if (ra_hasreg(dest)) { -#if !LJ_SOFTFP - if (t == IRT_NUM) - emit_vlso(as, ARMI_VLDR_D, dest, idx, ofs); - else -#endif - emit_lso(as, ARMI_LDR, dest, idx, ofs); - } - emit_lso(as, ARMI_LDR, type, idx, ofs+4); -} - -static void asm_ahustore(ASMState *as, IRIns *ir) -{ - if (ir->r != RID_SINK) { - RegSet allow = RSET_GPR; - Reg idx, src = RID_NONE, type = RID_NONE; - int32_t ofs = 0; -#if !LJ_SOFTFP - if (irt_isnum(ir->t)) { - src = ra_alloc1(as, ir->op2, RSET_FPR); - idx = asm_fuseahuref(as, ir->op1, &ofs, allow, 1024); - emit_vlso(as, ARMI_VSTR_D, src, idx, ofs); - } else -#endif - { - int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); - if (!irt_ispri(ir->t)) { - src = ra_alloc1(as, ir->op2, allow); - rset_clear(allow, src); - } - if (hiop) - type = ra_alloc1(as, (ir+1)->op2, allow); - else - type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); - idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type), 4096); - if (ra_hasreg(src)) emit_lso(as, ARMI_STR, src, idx, ofs); - emit_lso(as, ARMI_STR, type, idx, ofs+4); - } - } -} - -static void asm_sload(ASMState *as, IRIns *ir) -{ - int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); - int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); - IRType t = hiop ? IRT_NUM : irt_type(ir->t); - Reg dest = RID_NONE, type = RID_NONE, base; - RegSet allow = RSET_GPR; - lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ - lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); -#if LJ_SOFTFP - lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ - if (hiop && ra_used(ir+1)) { - type = ra_dest(as, ir+1, allow); - rset_clear(allow, type); - } -#else - if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(ir->t) && t == IRT_INT) { - dest = ra_scratch(as, RSET_FPR); - asm_tointg(as, ir, dest); - t = IRT_NUM; /* Continue with a regular number type check. */ - } else -#endif - if (ra_used(ir)) { - Reg tmp = RID_NONE; - if ((ir->op2 & IRSLOAD_CONVERT)) - tmp = ra_scratch(as, t == IRT_INT ? RSET_FPR : RSET_GPR); - lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || - irt_isint(ir->t) || irt_isaddr(ir->t)); - dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); - rset_clear(allow, dest); - base = ra_alloc1(as, REF_BASE, allow); - if ((ir->op2 & IRSLOAD_CONVERT)) { - if (t == IRT_INT) { - emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); - emit_dm(as, ARMI_VCVT_S32_F64, (tmp & 15), (tmp & 15)); - t = IRT_NUM; /* Check for original type. */ - } else { - emit_dm(as, ARMI_VCVT_F64_S32, (dest & 15), (dest & 15)); - emit_dn(as, ARMI_VMOV_S_R, tmp, (dest & 15)); - t = IRT_INT; /* Check for original type. */ - } - dest = tmp; - } - goto dotypecheck; - } - base = ra_alloc1(as, REF_BASE, allow); -dotypecheck: - rset_clear(allow, base); - if ((ir->op2 & IRSLOAD_TYPECHECK)) { - if (ra_noreg(type)) { - if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 && - rset_test((as->freeset & allow), dest+1)) { - type = dest+1; - ra_modified(as, type); - } else { - type = RID_TMP; - } - } - asm_guardcc(as, t == IRT_NUM ? CC_HS : CC_NE); - emit_n(as, ARMI_CMN|ARMI_K12|-irt_toitype_(t), type); - } - if (ra_hasreg(dest)) { -#if !LJ_SOFTFP - if (t == IRT_NUM) { - if (ofs < 1024) { - emit_vlso(as, ARMI_VLDR_D, dest, base, ofs); - } else { - if (ra_hasreg(type)) emit_lso(as, ARMI_LDR, type, base, ofs+4); - emit_vlso(as, ARMI_VLDR_D, dest, RID_TMP, 0); - emit_opk(as, ARMI_ADD, RID_TMP, base, ofs, allow); - return; - } - } else -#endif - emit_lso(as, ARMI_LDR, dest, base, ofs); - } - if (ra_hasreg(type)) emit_lso(as, ARMI_LDR, type, base, ofs+4); -} - -/* -- Allocations --------------------------------------------------------- */ - -#if LJ_HASFFI -static void asm_cnew(ASMState *as, IRIns *ir) -{ - CTState *cts = ctype_ctsG(J2G(as->J)); - CTypeID id = (CTypeID)IR(ir->op1)->i; - CTSize sz; - CTInfo info = lj_ctype_info(cts, id, &sz); - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; - IRRef args[4]; - RegSet allow = (RSET_GPR & ~RSET_SCRATCH); - RegSet drop = RSET_SCRATCH; - lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); - - as->gcsteps++; - if (ra_hasreg(ir->r)) - rset_clear(drop, ir->r); /* Dest reg handled below. */ - ra_evictset(as, drop); - if (ra_used(ir)) - ra_destreg(as, ir, RID_RET); /* GCcdata * */ - - /* Initialize immutable cdata object. */ - if (ir->o == IR_CNEWI) { - int32_t ofs = sizeof(GCcdata); - lua_assert(sz == 4 || sz == 8); - if (sz == 8) { - ofs += 4; ir++; - lua_assert(ir->o == IR_HIOP); - } - for (;;) { - Reg r = ra_alloc1(as, ir->op2, allow); - emit_lso(as, ARMI_STR, r, RID_RET, ofs); - rset_clear(allow, r); - if (ofs == sizeof(GCcdata)) break; - ofs -= 4; ir--; - } - } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ - ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ir->op1; /* CTypeID id */ - args[2] = ir->op2; /* CTSize sz */ - args[3] = ASMREF_TMP1; /* CTSize align */ - asm_gencall(as, ci, args); - emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); - return; - } - - /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ - { - uint32_t k = emit_isk12(ARMI_MOV, id); - Reg r = k ? RID_R1 : ra_allock(as, id, allow); - emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); - emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); - emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP); - if (k) emit_d(as, ARMI_MOV^k, RID_R1); - } - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ASMREF_TMP1; /* MSize size */ - asm_gencall(as, ci, args); - ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), - ra_releasetmp(as, ASMREF_TMP1)); -} -#else -#define asm_cnew(as, ir) ((void)0) -#endif - -/* -- Write barriers ------------------------------------------------------ */ - -static void asm_tbar(ASMState *as, IRIns *ir) -{ - Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); - Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab)); - Reg gr = ra_allock(as, i32ptr(J2G(as->J)), - rset_exclude(rset_exclude(RSET_GPR, tab), link)); - Reg mark = RID_TMP; - MCLabel l_end = emit_label(as); - emit_lso(as, ARMI_STR, link, tab, (int32_t)offsetof(GCtab, gclist)); - emit_lso(as, ARMI_STRB, mark, tab, (int32_t)offsetof(GCtab, marked)); - emit_lso(as, ARMI_STR, tab, gr, - (int32_t)offsetof(global_State, gc.grayagain)); - emit_dn(as, ARMI_BIC|ARMI_K12|LJ_GC_BLACK, mark, mark); - emit_lso(as, ARMI_LDR, link, gr, - (int32_t)offsetof(global_State, gc.grayagain)); - emit_branch(as, ARMF_CC(ARMI_B, CC_EQ), l_end); - emit_n(as, ARMI_TST|ARMI_K12|LJ_GC_BLACK, mark); - emit_lso(as, ARMI_LDRB, mark, tab, (int32_t)offsetof(GCtab, marked)); -} - -static void asm_obar(ASMState *as, IRIns *ir) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; - IRRef args[2]; - MCLabel l_end; - Reg obj, val, tmp; - /* No need for other object barriers (yet). */ - lua_assert(IR(ir->op1)->o == IR_UREFC); - ra_evictset(as, RSET_SCRATCH); - l_end = emit_label(as); - args[0] = ASMREF_TMP1; /* global_State *g */ - args[1] = ir->op1; /* TValue *tv */ - asm_gencall(as, ci, args); - if ((l_end[-1] >> 28) == CC_AL) - l_end[-1] = ARMF_CC(l_end[-1], CC_NE); - else - emit_branch(as, ARMF_CC(ARMI_B, CC_EQ), l_end); - ra_allockreg(as, i32ptr(J2G(as->J)), ra_releasetmp(as, ASMREF_TMP1)); - obj = IR(ir->op1)->r; - tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); - emit_n(as, ARMF_CC(ARMI_TST, CC_NE)|ARMI_K12|LJ_GC_BLACK, tmp); - emit_n(as, ARMI_TST|ARMI_K12|LJ_GC_WHITES, RID_TMP); - val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); - emit_lso(as, ARMI_LDRB, tmp, obj, - (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); - emit_lso(as, ARMI_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked)); -} - -/* -- Arithmetic and logic operations ------------------------------------- */ - -#if !LJ_SOFTFP -static void asm_fparith(ASMState *as, IRIns *ir, ARMIns ai) -{ - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = (left >> 8); left &= 255; - emit_dnm(as, ai, (dest & 15), (left & 15), (right & 15)); -} - -static void asm_fpunary(ASMState *as, IRIns *ir, ARMIns ai) -{ - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); - emit_dm(as, ai, (dest & 15), (left & 15)); -} - -static void asm_callround(ASMState *as, IRIns *ir, int id) -{ - /* The modified regs must match with the *.dasc implementation. */ - RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)| - RID2RSET(RID_R3)|RID2RSET(RID_R12); - RegSet of; - Reg dest, src; - ra_evictset(as, drop); - dest = ra_dest(as, ir, RSET_FPR); - emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15)); - emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf : - id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf : - (void *)lj_vm_trunc_sf); - /* Workaround to protect argument GPRs from being used for remat. */ - of = as->freeset; - as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1); - as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L); - src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */ - as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1)); - emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15)); -} - -static void asm_fpmath(ASMState *as, IRIns *ir) -{ - if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) - return; - if (ir->op2 <= IRFPM_TRUNC) - asm_callround(as, ir, ir->op2); - else if (ir->op2 == IRFPM_SQRT) - asm_fpunary(as, ir, ARMI_VSQRT_D); - else - asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); -} -#endif - -static int asm_swapops(ASMState *as, IRRef lref, IRRef rref) -{ - IRIns *ir; - if (irref_isk(rref)) - return 0; /* Don't swap constants to the left. */ - if (irref_isk(lref)) - return 1; /* But swap constants to the right. */ - ir = IR(rref); - if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) || - (ir->o == IR_ADD && ir->op1 == ir->op2)) - return 0; /* Don't swap fusable operands to the left. */ - ir = IR(lref); - if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) || - (ir->o == IR_ADD && ir->op1 == ir->op2)) - return 1; /* But swap fusable operands to the right. */ - return 0; /* Otherwise don't swap. */ -} - -static void asm_intop(ASMState *as, IRIns *ir, ARMIns ai) -{ - IRRef lref = ir->op1, rref = ir->op2; - Reg left, dest = ra_dest(as, ir, RSET_GPR); - uint32_t m; - if (asm_swapops(as, lref, rref)) { - IRRef tmp = lref; lref = rref; rref = tmp; - if ((ai & ~ARMI_S) == ARMI_SUB || (ai & ~ARMI_S) == ARMI_SBC) - ai ^= (ARMI_SUB^ARMI_RSB); - } - left = ra_hintalloc(as, lref, dest, RSET_GPR); - m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left)); - if (irt_isguard(ir->t)) { /* For IR_ADDOV etc. */ - asm_guardcc(as, CC_VS); - ai |= ARMI_S; - } - emit_dn(as, ai^m, dest, left); -} - -static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai) -{ - if (as->flagmcp == as->mcp) { /* Drop cmp r, #0. */ - as->flagmcp = NULL; - as->mcp++; - ai |= ARMI_S; - } - asm_intop(as, ir, ai); -} - -static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - emit_dn(as, ai|ARMI_K12|0, dest, left); -} - -/* NYI: use add/shift for MUL(OV) with constants. FOLD only does 2^k. */ -static void asm_intmul(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest)); - Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - Reg tmp = RID_NONE; - /* ARMv5 restriction: dest != left and dest_hi != left. */ - if (dest == left && left != right) { left = right; right = dest; } - if (irt_isguard(ir->t)) { /* IR_MULOV */ - if (!(as->flags & JIT_F_ARMV6) && dest == left) - tmp = left = ra_scratch(as, rset_exclude(RSET_GPR, left)); - asm_guardcc(as, CC_NE); - emit_nm(as, ARMI_TEQ|ARMF_SH(ARMSH_ASR, 31), RID_TMP, dest); - emit_dnm(as, ARMI_SMULL|ARMF_S(right), dest, RID_TMP, left); - } else { - if (!(as->flags & JIT_F_ARMV6) && dest == left) tmp = left = RID_TMP; - emit_nm(as, ARMI_MUL|ARMF_S(right), dest, left); - } - /* Only need this for the dest == left == right case. */ - if (ra_hasreg(tmp)) emit_dm(as, ARMI_MOV, tmp, right); -} - -static void asm_add(ASMState *as, IRIns *ir) -{ -#if !LJ_SOFTFP - if (irt_isnum(ir->t)) { - if (!asm_fusemadd(as, ir, ARMI_VMLA_D, ARMI_VMLA_D)) - asm_fparith(as, ir, ARMI_VADD_D); - return; - } -#endif - asm_intop_s(as, ir, ARMI_ADD); -} - -static void asm_sub(ASMState *as, IRIns *ir) -{ -#if !LJ_SOFTFP - if (irt_isnum(ir->t)) { - if (!asm_fusemadd(as, ir, ARMI_VNMLS_D, ARMI_VMLS_D)) - asm_fparith(as, ir, ARMI_VSUB_D); - return; - } -#endif - asm_intop_s(as, ir, ARMI_SUB); -} - -static void asm_mul(ASMState *as, IRIns *ir) -{ -#if !LJ_SOFTFP - if (irt_isnum(ir->t)) { - asm_fparith(as, ir, ARMI_VMUL_D); - return; - } -#endif - asm_intmul(as, ir); -} - -#define asm_addov(as, ir) asm_add(as, ir) -#define asm_subov(as, ir) asm_sub(as, ir) -#define asm_mulov(as, ir) asm_mul(as, ir) - -#if !LJ_SOFTFP -#define asm_div(as, ir) asm_fparith(as, ir, ARMI_VDIV_D) -#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi) -#define asm_abs(as, ir) asm_fpunary(as, ir, ARMI_VABS_D) -#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) -#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) -#endif - -#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi) - -static void asm_neg(ASMState *as, IRIns *ir) -{ -#if !LJ_SOFTFP - if (irt_isnum(ir->t)) { - asm_fpunary(as, ir, ARMI_VNEG_D); - return; - } -#endif - asm_intneg(as, ir, ARMI_RSB); -} - -static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai) -{ - if (as->flagmcp == as->mcp) { /* Try to drop cmp r, #0. */ - uint32_t cc = (as->mcp[1] >> 28); - as->flagmcp = NULL; - if (cc <= CC_NE) { - as->mcp++; - ai |= ARMI_S; - } else if (cc == CC_GE) { - *++as->mcp ^= ((CC_GE^CC_PL) << 28); - ai |= ARMI_S; - } else if (cc == CC_LT) { - *++as->mcp ^= ((CC_LT^CC_MI) << 28); - ai |= ARMI_S; - } /* else: other conds don't work with bit ops. */ - } - if (ir->op2 == 0) { - Reg dest = ra_dest(as, ir, RSET_GPR); - uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR); - emit_d(as, ai^m, dest); - } else { - /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */ - asm_intop(as, ir, ai); - } -} - -#define asm_bnot(as, ir) asm_bitop(as, ir, ARMI_MVN) - -static void asm_bswap(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - if ((as->flags & JIT_F_ARMV6)) { - emit_dm(as, ARMI_REV, dest, left); - } else { - Reg tmp2 = dest; - if (tmp2 == left) - tmp2 = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, dest), left)); - emit_dnm(as, ARMI_EOR|ARMF_SH(ARMSH_LSR, 8), dest, tmp2, RID_TMP); - emit_dm(as, ARMI_MOV|ARMF_SH(ARMSH_ROR, 8), tmp2, left); - emit_dn(as, ARMI_BIC|ARMI_K12|256*8|255, RID_TMP, RID_TMP); - emit_dnm(as, ARMI_EOR|ARMF_SH(ARMSH_ROR, 16), RID_TMP, left, left); - } -} - -#define asm_band(as, ir) asm_bitop(as, ir, ARMI_AND) -#define asm_bor(as, ir) asm_bitop(as, ir, ARMI_ORR) -#define asm_bxor(as, ir) asm_bitop(as, ir, ARMI_EOR) - -static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) -{ - if (irref_isk(ir->op2)) { /* Constant shifts. */ - /* NYI: Turn SHL+SHR or BAND+SHR into uxtb, uxth or ubfx. */ - /* NYI: Turn SHL+ASR into sxtb, sxth or sbfx. */ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - int32_t shift = (IR(ir->op2)->i & 31); - emit_dm(as, ARMI_MOV|ARMF_SH(sh, shift), dest, left); - } else { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_dm(as, ARMI_MOV|ARMF_RSH(sh, right), dest, left); - } -} - -#define asm_bshl(as, ir) asm_bitshift(as, ir, ARMSH_LSL) -#define asm_bshr(as, ir) asm_bitshift(as, ir, ARMSH_LSR) -#define asm_bsar(as, ir) asm_bitshift(as, ir, ARMSH_ASR) -#define asm_bror(as, ir) asm_bitshift(as, ir, ARMSH_ROR) -#define asm_brol(as, ir) lua_assert(0) - -static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) -{ - uint32_t kcmp = 0, kmov = 0; - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - Reg right = 0; - if (irref_isk(ir->op2)) { - kcmp = emit_isk12(ARMI_CMP, IR(ir->op2)->i); - if (kcmp) kmov = emit_isk12(ARMI_MOV, IR(ir->op2)->i); - } - if (!kmov) { - kcmp = 0; - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - } - if (kmov || dest != right) { - emit_dm(as, ARMF_CC(ARMI_MOV, cc)^kmov, dest, right); - cc ^= 1; /* Must use opposite conditions for paired moves. */ - } else { - cc ^= (CC_LT^CC_GT); /* Otherwise may swap CC_LT <-> CC_GT. */ - } - if (dest != left) emit_dm(as, ARMF_CC(ARMI_MOV, cc), dest, left); - emit_nm(as, ARMI_CMP^kcmp, left, right); -} - -#if LJ_SOFTFP -static void asm_sfpmin_max(ASMState *as, IRIns *ir, int cc) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; - RegSet drop = RSET_SCRATCH; - Reg r; - IRRef args[4]; - args[0] = ir->op1; args[1] = (ir+1)->op1; - args[2] = ir->op2; args[3] = (ir+1)->op2; - /* __aeabi_cdcmple preserves r0-r3. */ - if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); - if (ra_hasreg((ir+1)->r)) rset_clear(drop, (ir+1)->r); - if (!rset_test(as->freeset, RID_R2) && - regcost_ref(as->cost[RID_R2]) == args[2]) rset_clear(drop, RID_R2); - if (!rset_test(as->freeset, RID_R3) && - regcost_ref(as->cost[RID_R3]) == args[3]) rset_clear(drop, RID_R3); - ra_evictset(as, drop); - ra_destpair(as, ir); - emit_dm(as, ARMF_CC(ARMI_MOV, cc), RID_RETHI, RID_R3); - emit_dm(as, ARMF_CC(ARMI_MOV, cc), RID_RETLO, RID_R2); - emit_call(as, (void *)ci->func); - for (r = RID_R0; r <= RID_R3; r++) - ra_leftov(as, r, args[r-RID_R0]); -} -#else -static void asm_fpmin_max(ASMState *as, IRIns *ir, int cc) -{ - Reg dest = (ra_dest(as, ir, RSET_FPR) & 15); - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = ((left >> 8) & 15); left &= 15; - if (dest != left) emit_dm(as, ARMF_CC(ARMI_VMOV_D, cc^1), dest, left); - if (dest != right) emit_dm(as, ARMF_CC(ARMI_VMOV_D, cc), dest, right); - emit_d(as, ARMI_VMRS, 0); - emit_dm(as, ARMI_VCMP_D, left, right); -} -#endif - -static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc) -{ -#if LJ_SOFTFP - UNUSED(fcc); -#else - if (irt_isnum(ir->t)) - asm_fpmin_max(as, ir, fcc); - else -#endif - asm_intmin_max(as, ir, cc); -} - -#define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_HI) -#define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LO) - -/* -- Comparisons --------------------------------------------------------- */ - -/* Map of comparisons to flags. ORDER IR. */ -static const uint8_t asm_compmap[IR_ABC+1] = { - /* op FP swp int cc FP cc */ - /* LT */ CC_GE + (CC_HS << 4), - /* GE x */ CC_LT + (CC_HI << 4), - /* LE */ CC_GT + (CC_HI << 4), - /* GT x */ CC_LE + (CC_HS << 4), - /* ULT x */ CC_HS + (CC_LS << 4), - /* UGE */ CC_LO + (CC_LO << 4), - /* ULE x */ CC_HI + (CC_LO << 4), - /* UGT */ CC_LS + (CC_LS << 4), - /* EQ */ CC_NE + (CC_NE << 4), - /* NE */ CC_EQ + (CC_EQ << 4), - /* ABC */ CC_LS + (CC_LS << 4) /* Same as UGT. */ -}; - -#if LJ_SOFTFP -/* FP comparisons. */ -static void asm_sfpcomp(ASMState *as, IRIns *ir) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; - RegSet drop = RSET_SCRATCH; - Reg r; - IRRef args[4]; - int swp = (((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1) << 1); - args[swp^0] = ir->op1; args[swp^1] = (ir+1)->op1; - args[swp^2] = ir->op2; args[swp^3] = (ir+1)->op2; - /* __aeabi_cdcmple preserves r0-r3. This helps to reduce spills. */ - for (r = RID_R0; r <= RID_R3; r++) - if (!rset_test(as->freeset, r) && - regcost_ref(as->cost[r]) == args[r-RID_R0]) rset_clear(drop, r); - ra_evictset(as, drop); - asm_guardcc(as, (asm_compmap[ir->o] >> 4)); - emit_call(as, (void *)ci->func); - for (r = RID_R0; r <= RID_R3; r++) - ra_leftov(as, r, args[r-RID_R0]); -} -#else -/* FP comparisons. */ -static void asm_fpcomp(ASMState *as, IRIns *ir) -{ - Reg left, right; - ARMIns ai; - int swp = ((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1); - if (!swp && irref_isk(ir->op2) && ir_knum(IR(ir->op2))->u64 == 0) { - left = (ra_alloc1(as, ir->op1, RSET_FPR) & 15); - right = 0; - ai = ARMI_VCMPZ_D; - } else { - left = ra_alloc2(as, ir, RSET_FPR); - if (swp) { - right = (left & 15); left = ((left >> 8) & 15); - } else { - right = ((left >> 8) & 15); left &= 15; - } - ai = ARMI_VCMP_D; - } - asm_guardcc(as, (asm_compmap[ir->o] >> 4)); - emit_d(as, ARMI_VMRS, 0); - emit_dm(as, ai, left, right); -} -#endif - -/* Integer comparisons. */ -static void asm_intcomp(ASMState *as, IRIns *ir) -{ - ARMCC cc = (asm_compmap[ir->o] & 15); - IRRef lref = ir->op1, rref = ir->op2; - Reg left; - uint32_t m; - int cmpprev0 = 0; - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)); - if (asm_swapops(as, lref, rref)) { - Reg tmp = lref; lref = rref; rref = tmp; - if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */ - else if (cc > CC_NE) cc ^= 11; /* LO <-> HI, LS <-> HS */ - } - if (irref_isk(rref) && IR(rref)->i == 0) { - IRIns *irl = IR(lref); - cmpprev0 = (irl+1 == ir); - /* Combine comp(BAND(left, right), 0) into tst left, right. */ - if (cmpprev0 && irl->o == IR_BAND && !ra_used(irl)) { - IRRef blref = irl->op1, brref = irl->op2; - uint32_t m2 = 0; - Reg bleft; - if (asm_swapops(as, blref, brref)) { - Reg tmp = blref; blref = brref; brref = tmp; - } - if (irref_isk(brref)) { - m2 = emit_isk12(ARMI_AND, IR(brref)->i); - if ((m2 & (ARMI_AND^ARMI_BIC))) - goto notst; /* Not beneficial if we miss a constant operand. */ - } - if (cc == CC_GE) cc = CC_PL; - else if (cc == CC_LT) cc = CC_MI; - else if (cc > CC_NE) goto notst; /* Other conds don't work with tst. */ - bleft = ra_alloc1(as, blref, RSET_GPR); - if (!m2) m2 = asm_fuseopm(as, 0, brref, rset_exclude(RSET_GPR, bleft)); - asm_guardcc(as, cc); - emit_n(as, ARMI_TST^m2, bleft); - return; - } - } -notst: - left = ra_alloc1(as, lref, RSET_GPR); - m = asm_fuseopm(as, ARMI_CMP, rref, rset_exclude(RSET_GPR, left)); - asm_guardcc(as, cc); - emit_n(as, ARMI_CMP^m, left); - /* Signed comparison with zero and referencing previous ins? */ - if (cmpprev0 && (cc <= CC_NE || cc >= CC_GE)) - as->flagmcp = as->mcp; /* Allow elimination of the compare. */ -} - -static void asm_comp(ASMState *as, IRIns *ir) -{ -#if !LJ_SOFTFP - if (irt_isnum(ir->t)) - asm_fpcomp(as, ir); - else -#endif - asm_intcomp(as, ir); -} - -#define asm_equal(as, ir) asm_comp(as, ir) - -#if LJ_HASFFI -/* 64 bit integer comparisons. */ -static void asm_int64comp(ASMState *as, IRIns *ir) -{ - int signedcomp = (ir->o <= IR_GT); - ARMCC cclo, cchi; - Reg leftlo, lefthi; - uint32_t mlo, mhi; - RegSet allow = RSET_GPR, oldfree; - - /* Always use unsigned comparison for loword. */ - cclo = asm_compmap[ir->o + (signedcomp ? 4 : 0)] & 15; - leftlo = ra_alloc1(as, ir->op1, allow); - oldfree = as->freeset; - mlo = asm_fuseopm(as, ARMI_CMP, ir->op2, rset_clear(allow, leftlo)); - allow &= ~(oldfree & ~as->freeset); /* Update for allocs of asm_fuseopm. */ - - /* Use signed or unsigned comparison for hiword. */ - cchi = asm_compmap[ir->o] & 15; - lefthi = ra_alloc1(as, (ir+1)->op1, allow); - mhi = asm_fuseopm(as, ARMI_CMP, (ir+1)->op2, rset_clear(allow, lefthi)); - - /* All register allocations must be performed _before_ this point. */ - if (signedcomp) { - MCLabel l_around = emit_label(as); - asm_guardcc(as, cclo); - emit_n(as, ARMI_CMP^mlo, leftlo); - emit_branch(as, ARMF_CC(ARMI_B, CC_NE), l_around); - if (cchi == CC_GE || cchi == CC_LE) cchi ^= 6; /* GE -> GT, LE -> LT */ - asm_guardcc(as, cchi); - } else { - asm_guardcc(as, cclo); - emit_n(as, ARMF_CC(ARMI_CMP, CC_EQ)^mlo, leftlo); - } - emit_n(as, ARMI_CMP^mhi, lefthi); -} -#endif - -/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ - -/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ -static void asm_hiop(ASMState *as, IRIns *ir) -{ -#if LJ_HASFFI || LJ_SOFTFP - /* HIOP is marked as a store because it needs its own DCE logic. */ - int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ - if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; - if ((ir-1)->o <= IR_NE) { /* 64 bit integer or FP comparisons. ORDER IR. */ - as->curins--; /* Always skip the loword comparison. */ -#if LJ_SOFTFP - if (!irt_isint(ir->t)) { - asm_sfpcomp(as, ir-1); - return; - } -#endif -#if LJ_HASFFI - asm_int64comp(as, ir-1); -#endif - return; -#if LJ_SOFTFP - } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { - as->curins--; /* Always skip the loword min/max. */ - if (uselo || usehi) - asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO); - return; -#elif LJ_HASFFI - } else if ((ir-1)->o == IR_CONV) { - as->curins--; /* Always skip the CONV. */ - if (usehi || uselo) - asm_conv64(as, ir); - return; -#endif - } else if ((ir-1)->o == IR_XSTORE) { - if ((ir-1)->r != RID_SINK) - asm_xstore_(as, ir, 4); - return; - } - if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ - switch ((ir-1)->o) { -#if LJ_HASFFI - case IR_ADD: - as->curins--; - asm_intop(as, ir, ARMI_ADC); - asm_intop(as, ir-1, ARMI_ADD|ARMI_S); - break; - case IR_SUB: - as->curins--; - asm_intop(as, ir, ARMI_SBC); - asm_intop(as, ir-1, ARMI_SUB|ARMI_S); - break; - case IR_NEG: - as->curins--; - asm_intneg(as, ir, ARMI_RSC); - asm_intneg(as, ir-1, ARMI_RSB|ARMI_S); - break; -#endif -#if LJ_SOFTFP - case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: - case IR_STRTO: - if (!uselo) - ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ - break; -#endif - case IR_CALLN: - case IR_CALLS: - case IR_CALLXS: - if (!uselo) - ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ - break; -#if LJ_SOFTFP - case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: -#endif - case IR_CNEWI: - /* Nothing to do here. Handled by lo op itself. */ - break; - default: lua_assert(0); break; - } -#else - UNUSED(as); UNUSED(ir); lua_assert(0); -#endif -} - -/* -- Profiling ----------------------------------------------------------- */ - -static void asm_prof(ASMState *as, IRIns *ir) -{ - UNUSED(ir); - asm_guardcc(as, CC_NE); - emit_n(as, ARMI_TST|ARMI_K12|HOOK_PROFILE, RID_TMP); - emit_lsptr(as, ARMI_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask); -} - -/* -- Stack handling ------------------------------------------------------ */ - -/* Check Lua stack size for overflow. Use exit handler as fallback. */ -static void asm_stack_check(ASMState *as, BCReg topslot, - IRIns *irp, RegSet allow, ExitNo exitno) -{ - Reg pbase; - uint32_t k; - if (irp) { - if (!ra_hasspill(irp->s)) { - pbase = irp->r; - lua_assert(ra_hasreg(pbase)); - } else if (allow) { - pbase = rset_pickbot(allow); - } else { - pbase = RID_RET; - emit_lso(as, ARMI_LDR, RID_RET, RID_SP, 0); /* Restore temp. register. */ - } - } else { - pbase = RID_BASE; - } - emit_branch(as, ARMF_CC(ARMI_BL, CC_LS), exitstub_addr(as->J, exitno)); - k = emit_isk12(0, (int32_t)(8*topslot)); - lua_assert(k); - emit_n(as, ARMI_CMP^k, RID_TMP); - emit_dnm(as, ARMI_SUB, RID_TMP, RID_TMP, pbase); - emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, - (int32_t)offsetof(lua_State, maxstack)); - if (irp) { /* Must not spill arbitrary registers in head of side trace. */ - int32_t i = i32ptr(&J2G(as->J)->cur_L); - if (ra_hasspill(irp->s)) - emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s)); - emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095)); - if (ra_hasspill(irp->s) && !allow) - emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */ - emit_loadi(as, RID_TMP, (i & ~4095)); - } else { - emit_getgl(as, RID_TMP, cur_L); - } -} - -/* Restore Lua stack from on-trace state. */ -static void asm_stack_restore(ASMState *as, SnapShot *snap) -{ - SnapEntry *map = &as->T->snapmap[snap->mapofs]; - SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1]; - MSize n, nent = snap->nent; - /* Store the value of all modified slots to the Lua stack. */ - for (n = 0; n < nent; n++) { - SnapEntry sn = map[n]; - BCReg s = snap_slot(sn); - int32_t ofs = 8*((int32_t)s-1); - IRRef ref = snap_ref(sn); - IRIns *ir = IR(ref); - if ((sn & SNAP_NORESTORE)) - continue; - if (irt_isnum(ir->t)) { -#if LJ_SOFTFP - RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); - Reg tmp; - lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */ - tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, - rset_exclude(RSET_GPREVEN, RID_BASE)); - emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs); - if (rset_test(as->freeset, tmp+1)) odd = RID2RSET(tmp+1); - tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, odd); - emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs+4); -#else - Reg src = ra_alloc1(as, ref, RSET_FPR); - emit_vlso(as, ARMI_VSTR_D, src, RID_BASE, ofs); -#endif - } else { - RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); - Reg type; - lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); - if (!irt_ispri(ir->t)) { - Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPREVEN, RID_BASE)); - emit_lso(as, ARMI_STR, src, RID_BASE, ofs); - if (rset_test(as->freeset, src+1)) odd = RID2RSET(src+1); - } - if ((sn & (SNAP_CONT|SNAP_FRAME))) { - if (s == 0) continue; /* Do not overwrite link to previous frame. */ - type = ra_allock(as, (int32_t)(*flinks--), odd); -#if LJ_SOFTFP - } else if ((sn & SNAP_SOFTFPNUM)) { - type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPRODD, RID_BASE)); -#endif - } else { - type = ra_allock(as, (int32_t)irt_toitype(ir->t), odd); - } - emit_lso(as, ARMI_STR, type, RID_BASE, ofs+4); - } - checkmclim(as); - } - lua_assert(map + nent == flinks); -} - -/* -- GC handling --------------------------------------------------------- */ - -/* Check GC threshold and do one or more GC steps. */ -static void asm_gc_check(ASMState *as) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; - IRRef args[2]; - MCLabel l_end; - Reg tmp1, tmp2; - ra_evictset(as, RSET_SCRATCH); - l_end = emit_label(as); - /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ - asm_guardcc(as, CC_NE); /* Assumes asm_snap_prep() already done. */ - emit_n(as, ARMI_CMP|ARMI_K12|0, RID_RET); - args[0] = ASMREF_TMP1; /* global_State *g */ - args[1] = ASMREF_TMP2; /* MSize steps */ - asm_gencall(as, ci, args); - tmp1 = ra_releasetmp(as, ASMREF_TMP1); - tmp2 = ra_releasetmp(as, ASMREF_TMP2); - emit_loadi(as, tmp2, as->gcsteps); - /* Jump around GC step if GC total < GC threshold. */ - emit_branch(as, ARMF_CC(ARMI_B, CC_LS), l_end); - emit_nm(as, ARMI_CMP, RID_TMP, tmp2); - emit_lso(as, ARMI_LDR, tmp2, tmp1, - (int32_t)offsetof(global_State, gc.threshold)); - emit_lso(as, ARMI_LDR, RID_TMP, tmp1, - (int32_t)offsetof(global_State, gc.total)); - ra_allockreg(as, i32ptr(J2G(as->J)), tmp1); - as->gcsteps = 0; - checkmclim(as); -} - -/* -- Loop handling ------------------------------------------------------- */ - -/* Fixup the loop branch. */ -static void asm_loop_fixup(ASMState *as) -{ - MCode *p = as->mctop; - MCode *target = as->mcp; - if (as->loopinv) { /* Inverted loop branch? */ - /* asm_guardcc already inverted the bcc and patched the final bl. */ - p[-2] |= ((uint32_t)(target-p) & 0x00ffffffu); - } else { - p[-1] = ARMI_B | ((uint32_t)((target-p)-1) & 0x00ffffffu); - } -} - -/* -- Head of trace ------------------------------------------------------- */ - -/* Reload L register from g->cur_L. */ -static void asm_head_lreg(ASMState *as) -{ - IRIns *ir = IR(ASMREF_L); - if (ra_used(ir)) { - Reg r = ra_dest(as, ir, RSET_GPR); - emit_getgl(as, r, cur_L); - ra_evictk(as); - } -} - -/* Coalesce BASE register for a root trace. */ -static void asm_head_root_base(ASMState *as) -{ - IRIns *ir; - asm_head_lreg(as); - ir = IR(REF_BASE); - if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) - ra_spill(as, ir); - ra_destreg(as, ir, RID_BASE); -} - -/* Coalesce BASE register for a side trace. */ -static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) -{ - IRIns *ir; - asm_head_lreg(as); - ir = IR(REF_BASE); - if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) - ra_spill(as, ir); - if (ra_hasspill(irp->s)) { - rset_clear(allow, ra_dest(as, ir, allow)); - } else { - Reg r = irp->r; - lua_assert(ra_hasreg(r)); - rset_clear(allow, r); - if (r != ir->r && !rset_test(as->freeset, r)) - ra_restore(as, regcost_ref(as->cost[r])); - ra_destreg(as, ir, r); - } - return allow; -} - -/* -- Tail of trace ------------------------------------------------------- */ - -/* Fixup the tail code. */ -static void asm_tail_fixup(ASMState *as, TraceNo lnk) -{ - MCode *p = as->mctop; - MCode *target; - int32_t spadj = as->T->spadjust; - if (spadj == 0) { - as->mctop = --p; - } else { - /* Patch stack adjustment. */ - uint32_t k = emit_isk12(ARMI_ADD, spadj); - lua_assert(k); - p[-2] = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP); - } - /* Patch exit branch. */ - target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; - p[-1] = ARMI_B|(((target-p)-1)&0x00ffffffu); -} - -/* Prepare tail of code. */ -static void asm_tail_prep(ASMState *as) -{ - MCode *p = as->mctop - 1; /* Leave room for exit branch. */ - if (as->loopref) { - as->invmcp = as->mcp = p; - } else { - as->mcp = p-1; /* Leave room for stack pointer adjustment. */ - as->invmcp = NULL; - } - *p = 0; /* Prevent load/store merging. */ -} - -/* -- Trace setup --------------------------------------------------------- */ - -/* Ensure there are enough stack slots for call arguments. */ -static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) -{ - IRRef args[CCI_NARGS_MAX*2]; - uint32_t i, nargs = CCI_XNARGS(ci); - int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0; - asm_collectargs(as, ir, ci, args); - for (i = 0; i < nargs; i++) { - if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) { - if (!LJ_ABI_SOFTFP && !(ci->flags & CCI_VARARG)) { - if (irt_isnum(IR(args[i])->t)) { - if (nfpr > 0) nfpr--; - else fprodd = 0, nslots = (nslots + 3) & ~1; - } else { - if (fprodd) fprodd--; - else if (nfpr > 0) fprodd = 1, nfpr--; - else nslots++; - } - } else if (irt_isnum(IR(args[i])->t)) { - ngpr &= ~1; - if (ngpr > 0) ngpr -= 2; else nslots += 2; - } else { - if (ngpr > 0) ngpr--; else nslots++; - } - } else { - if (ngpr > 0) ngpr--; else nslots++; - } - } - if (nslots > as->evenspill) /* Leave room for args in stack slots. */ - as->evenspill = nslots; - return REGSP_HINT(RID_RET); -} - -static void asm_setup_target(ASMState *as) -{ - /* May need extra exit for asm_stack_check on side traces. */ - asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0)); -} - -/* -- Trace patching ------------------------------------------------------ */ - -/* Patch exit jumps of existing machine code to a new target. */ -void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) -{ - MCode *p = T->mcode; - MCode *pe = (MCode *)((char *)p + T->szmcode); - MCode *cstart = NULL, *cend = p; - MCode *mcarea = lj_mcode_patch(J, p, 0); - MCode *px = exitstub_addr(J, exitno) - 2; - for (; p < pe; p++) { - /* Look for bl_cc exitstub, replace with b_cc target. */ - uint32_t ins = *p; - if ((ins & 0x0f000000u) == 0x0b000000u && ins < 0xf0000000u && - ((ins ^ (px-p)) & 0x00ffffffu) == 0) { - *p = (ins & 0xfe000000u) | (((target-p)-2) & 0x00ffffffu); - cend = p+1; - if (!cstart) cstart = p; - } - } - lua_assert(cstart != NULL); - lj_mcode_sync(cstart, cend); - lj_mcode_patch(J, mcarea, 1); -} - diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h deleted file mode 100644 index 328e4d7740..0000000000 --- a/src/lj_asm_arm64.h +++ /dev/null @@ -1,2008 +0,0 @@ -/* -** ARM64 IR assembler (SSA IR -> machine code). -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -** -** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. -** Sponsored by Cisco Systems, Inc. -*/ - -/* -- Register allocator extensions --------------------------------------- */ - -/* Allocate a register with a hint. */ -static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) -{ - Reg r = IR(ref)->r; - if (ra_noreg(r)) { - if (!ra_hashint(r) && !iscrossref(as, ref)) - ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ - r = ra_allocref(as, ref, allow); - } - ra_noweak(as, r); - return r; -} - -/* Allocate two source registers for three-operand instructions. */ -static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) -{ - IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); - Reg left = irl->r, right = irr->r; - if (ra_hasreg(left)) { - ra_noweak(as, left); - if (ra_noreg(right)) - right = ra_allocref(as, ir->op2, rset_exclude(allow, left)); - else - ra_noweak(as, right); - } else if (ra_hasreg(right)) { - ra_noweak(as, right); - left = ra_allocref(as, ir->op1, rset_exclude(allow, right)); - } else if (ra_hashint(right)) { - right = ra_allocref(as, ir->op2, allow); - left = ra_alloc1(as, ir->op1, rset_exclude(allow, right)); - } else { - left = ra_allocref(as, ir->op1, allow); - right = ra_alloc1(as, ir->op2, rset_exclude(allow, left)); - } - return left | (right << 8); -} - -/* -- Guard handling ------------------------------------------------------ */ - -/* Setup all needed exit stubs. */ -static void asm_exitstub_setup(ASMState *as, ExitNo nexits) -{ - ExitNo i; - MCode *mxp = as->mctop; - if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim) - asm_mclimit(as); - /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */ - for (i = nexits-1; (int32_t)i >= 0; i--) - *--mxp = A64I_BL|((-3-i)&0x03ffffffu); - *--mxp = A64I_MOVZw|A64F_U16(as->T->traceno); - mxp--; - *mxp = A64I_BL|(((MCode *)(void *)lj_vm_exit_handler-mxp)&0x03ffffffu); - *--mxp = A64I_STRx|A64F_D(RID_LR)|A64F_N(RID_SP); - as->mctop = mxp; -} - -static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno) -{ - /* Keep this in-sync with exitstub_trace_addr(). */ - return as->mctop + exitno + 3; -} - -/* Emit conditional branch to exit for guard. */ -static void asm_guardcc(ASMState *as, A64CC cc) -{ - MCode *target = asm_exitstub_addr(as, as->snapno); - MCode *p = as->mcp; - if (LJ_UNLIKELY(p == as->invmcp)) { - as->loopinv = 1; - *p = A64I_B | ((target-p) & 0x03ffffffu); - emit_cond_branch(as, cc^1, p-1); - return; - } - emit_cond_branch(as, cc, target); -} - -/* Emit test and branch instruction to exit for guard. */ -static void asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit) -{ - MCode *target = asm_exitstub_addr(as, as->snapno); - MCode *p = as->mcp; - if (LJ_UNLIKELY(p == as->invmcp)) { - as->loopinv = 1; - *p = A64I_B | ((target-p) & 0x03ffffffu); - emit_tnb(as, ai^0x01000000u, r, bit, p-1); - return; - } - emit_tnb(as, ai, r, bit, target); -} - -/* Emit compare and branch instruction to exit for guard. */ -static void asm_guardcnb(ASMState *as, A64Ins ai, Reg r) -{ - MCode *target = asm_exitstub_addr(as, as->snapno); - MCode *p = as->mcp; - if (LJ_UNLIKELY(p == as->invmcp)) { - as->loopinv = 1; - *p = A64I_B | ((target-p) & 0x03ffffffu); - emit_cnb(as, ai^0x01000000u, r, p-1); - return; - } - emit_cnb(as, ai, r, target); -} - -/* -- Operand fusion ------------------------------------------------------ */ - -/* Limit linear search to this distance. Avoids O(n^2) behavior. */ -#define CONFLICT_SEARCH_LIM 31 - -static int asm_isk32(ASMState *as, IRRef ref, int32_t *k) -{ - if (irref_isk(ref)) { - IRIns *ir = IR(ref); - if (ir->o == IR_KNULL || !irt_is64(ir->t)) { - *k = ir->i; - return 1; - } else if (checki32((int64_t)ir_k64(ir)->u64)) { - *k = (int32_t)ir_k64(ir)->u64; - return 1; - } - } - return 0; -} - -/* Check if there's no conflicting instruction between curins and ref. */ -static int noconflict(ASMState *as, IRRef ref, IROp conflict) -{ - IRIns *ir = as->ir; - IRRef i = as->curins; - if (i > ref + CONFLICT_SEARCH_LIM) - return 0; /* Give up, ref is too far away. */ - while (--i > ref) - if (ir[i].o == conflict) - return 0; /* Conflict found. */ - return 1; /* Ok, no conflict. */ -} - -/* Fuse the array base of colocated arrays. */ -static int32_t asm_fuseabase(ASMState *as, IRRef ref) -{ - IRIns *ir = IR(ref); - if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE && - !neverfuse(as) && noconflict(as, ref, IR_NEWREF)) - return (int32_t)sizeof(GCtab); - return 0; -} - -#define FUSE_REG 0x40000000 - -/* Fuse array/hash/upvalue reference into register+offset operand. */ -static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow, - A64Ins ins) -{ - IRIns *ir = IR(ref); - if (ra_noreg(ir->r)) { - if (ir->o == IR_AREF) { - if (mayfuse(as, ref)) { - if (irref_isk(ir->op2)) { - IRRef tab = IR(ir->op1)->op1; - int32_t ofs = asm_fuseabase(as, tab); - IRRef refa = ofs ? tab : ir->op1; - ofs += 8*IR(ir->op2)->i; - if (emit_checkofs(ins, ofs)) { - *ofsp = ofs; - return ra_alloc1(as, refa, allow); - } - } else { - Reg base = ra_alloc1(as, ir->op1, allow); - *ofsp = FUSE_REG|ra_alloc1(as, ir->op2, rset_exclude(allow, base)); - return base; - } - } - } else if (ir->o == IR_HREFK) { - if (mayfuse(as, ref)) { - int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); - if (emit_checkofs(ins, ofs)) { - *ofsp = ofs; - return ra_alloc1(as, ir->op1, allow); - } - } - } else if (ir->o == IR_UREFC) { - if (irref_isk(ir->op1)) { - GCfunc *fn = ir_kfunc(IR(ir->op1)); - GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv; - int64_t ofs = glofs(as, &uv->tv); - if (emit_checkofs(ins, ofs)) { - *ofsp = (int32_t)ofs; - return RID_GL; - } - } - } - } - *ofsp = 0; - return ra_alloc1(as, ref, allow); -} - -/* Fuse m operand into arithmetic/logic instructions. */ -static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow) -{ - IRIns *ir = IR(ref); - if (ra_hasreg(ir->r)) { - ra_noweak(as, ir->r); - return A64F_M(ir->r); - } else if (irref_isk(ref)) { - uint32_t m; - int64_t k = get_k64val(ir); - if ((ai & 0x1f000000) == 0x0a000000) - m = emit_isk13(k, irt_is64(ir->t)); - else - m = emit_isk12(k); - if (m) - return m; - } else if (mayfuse(as, ref)) { - if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR && irref_isk(ir->op2)) || - (ir->o == IR_ADD && ir->op1 == ir->op2)) { - A64Shift sh = ir->o == IR_BSHR ? A64SH_LSR : - ir->o == IR_BSAR ? A64SH_ASR : A64SH_LSL; - int shift = ir->o == IR_ADD ? 1 : - (IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31)); - IRIns *irl = IR(ir->op1); - if (sh == A64SH_LSL && - irl->o == IR_CONV && - irl->op2 == ((IRT_I64<op1, allow); - return A64F_M(m) | A64F_EXSH(A64EX_SXTW, shift); - } else { - Reg m = ra_alloc1(as, ir->op1, allow); - return A64F_M(m) | A64F_SH(sh, shift); - } - } else if (ir->o == IR_CONV && - ir->op2 == ((IRT_I64<op1, allow); - return A64F_M(m) | A64F_EX(A64EX_SXTW); - } - } - return A64F_M(ra_allocref(as, ref, allow)); -} - -/* Fuse XLOAD/XSTORE reference into load/store operand. */ -static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref, - RegSet allow) -{ - IRIns *ir = IR(ref); - Reg base; - int32_t ofs = 0; - if (ra_noreg(ir->r) && canfuse(as, ir)) { - if (ir->o == IR_ADD) { - if (asm_isk32(as, ir->op2, &ofs) && emit_checkofs(ai, ofs)) { - ref = ir->op1; - } else { - Reg rn, rm; - IRRef lref = ir->op1, rref = ir->op2; - IRIns *irl = IR(lref); - if (mayfuse(as, irl->op1)) { - unsigned int shift = 4; - if (irl->o == IR_BSHL && irref_isk(irl->op2)) { - shift = (IR(irl->op2)->i & 63); - } else if (irl->o == IR_ADD && irl->op1 == irl->op2) { - shift = 1; - } - if ((ai >> 30) == shift) { - lref = irl->op1; - irl = IR(lref); - ai |= A64I_LS_SH; - } - } - if (irl->o == IR_CONV && - irl->op2 == ((IRT_I64<op1; - ai |= A64I_LS_SXTWx; - } else { - ai |= A64I_LS_LSLx; - } - rm = ra_alloc1(as, lref, allow); - rn = ra_alloc1(as, rref, rset_exclude(allow, rm)); - emit_dnm(as, (ai^A64I_LS_R), rd, rn, rm); - return; - } - } else if (ir->o == IR_STRREF) { - if (asm_isk32(as, ir->op2, &ofs)) { - ref = ir->op1; - } else if (asm_isk32(as, ir->op1, &ofs)) { - ref = ir->op2; - } else { - Reg rn = ra_alloc1(as, ir->op1, allow); - IRIns *irr = IR(ir->op2); - uint32_t m; - if (irr+1 == ir && !ra_used(irr) && - irr->o == IR_ADD && irref_isk(irr->op2)) { - ofs = sizeof(GCstr) + IR(irr->op2)->i; - if (emit_checkofs(ai, ofs)) { - Reg rm = ra_alloc1(as, irr->op1, rset_exclude(allow, rn)); - m = A64F_M(rm) | A64F_EX(A64EX_SXTW); - goto skipopm; - } - } - m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn)); - ofs = sizeof(GCstr); - skipopm: - emit_lso(as, ai, rd, rd, ofs); - emit_dn(as, A64I_ADDx^m, rd, rn); - return; - } - ofs += sizeof(GCstr); - if (!emit_checkofs(ai, ofs)) { - Reg rn = ra_alloc1(as, ref, allow); - Reg rm = ra_allock(as, ofs, rset_exclude(allow, rn)); - emit_dnm(as, (ai^A64I_LS_R)|A64I_LS_UXTWx, rd, rn, rm); - return; - } - } - } - base = ra_alloc1(as, ref, allow); - emit_lso(as, ai, (rd & 31), base, ofs); -} - -/* Fuse FP multiply-add/sub. */ -static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air) -{ - IRRef lref = ir->op1, rref = ir->op2; - IRIns *irm; - if (lref != rref && - ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && - ra_noreg(irm->r)) || - (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && - (rref = lref, ai = air, ra_noreg(irm->r))))) { - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg add = ra_hintalloc(as, rref, dest, RSET_FPR); - Reg left = ra_alloc2(as, irm, - rset_exclude(rset_exclude(RSET_FPR, dest), add)); - Reg right = (left >> 8); left &= 255; - emit_dnma(as, ai, (dest & 31), (left & 31), (right & 31), (add & 31)); - return 1; - } - return 0; -} - -/* Fuse BAND + BSHL/BSHR into UBFM. */ -static int asm_fuseandshift(ASMState *as, IRIns *ir) -{ - IRIns *irl = IR(ir->op1); - lua_assert(ir->o == IR_BAND); - if (canfuse(as, irl) && irref_isk(ir->op2)) { - uint64_t mask = get_k64val(IR(ir->op2)); - if (irref_isk(irl->op2) && (irl->o == IR_BSHR || irl->o == IR_BSHL)) { - int32_t shmask = irt_is64(irl->t) ? 63 : 31; - int32_t shift = (IR(irl->op2)->i & shmask); - int32_t imms = shift; - if (irl->o == IR_BSHL) { - mask >>= shift; - shift = (shmask-shift+1) & shmask; - imms = 0; - } - if (mask && !((mask+1) & mask)) { /* Contiguous 1-bits at the bottom. */ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, irl->op1, RSET_GPR); - A64Ins ai = shmask == 63 ? A64I_UBFMx : A64I_UBFMw; - imms += 63 - emit_clz64(mask); - if (imms > shmask) imms = shmask; - emit_dn(as, ai | A64F_IMMS(imms) | A64F_IMMR(shift), dest, left); - return 1; - } - } - } - return 0; -} - -/* Fuse BOR(BSHL, BSHR) into EXTR/ROR. */ -static int asm_fuseorshift(ASMState *as, IRIns *ir) -{ - IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); - lua_assert(ir->o == IR_BOR); - if (canfuse(as, irl) && canfuse(as, irr) && - ((irl->o == IR_BSHR && irr->o == IR_BSHL) || - (irl->o == IR_BSHL && irr->o == IR_BSHR))) { - if (irref_isk(irl->op2) && irref_isk(irr->op2)) { - IRRef lref = irl->op1, rref = irr->op1; - uint32_t lshift = IR(irl->op2)->i, rshift = IR(irr->op2)->i; - if (irl->o == IR_BSHR) { /* BSHR needs to be the right operand. */ - uint32_t tmp2; - IRRef tmp1 = lref; lref = rref; rref = tmp1; - tmp2 = lshift; lshift = rshift; rshift = tmp2; - } - if (rshift + lshift == (irt_is64(ir->t) ? 64 : 32)) { - A64Ins ai = irt_is64(ir->t) ? A64I_EXTRx : A64I_EXTRw; - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, lref, RSET_GPR); - Reg right = ra_alloc1(as, rref, rset_exclude(RSET_GPR, left)); - emit_dnm(as, ai | A64F_IMMS(rshift), dest, left, right); - return 1; - } - } - } - return 0; -} - -/* -- Calls --------------------------------------------------------------- */ - -/* Generate a call to a C function. */ -static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) -{ - uint32_t n, nargs = CCI_XNARGS(ci); - int32_t ofs = 0; - Reg gpr, fpr = REGARG_FIRSTFPR; - if ((void *)ci->func) - emit_call(as, (void *)ci->func); - for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) - as->cost[gpr] = REGCOST(~0u, ASMREF_L); - gpr = REGARG_FIRSTGPR; - for (n = 0; n < nargs; n++) { /* Setup args. */ - IRRef ref = args[n]; - IRIns *ir = IR(ref); - if (ref) { - if (irt_isfp(ir->t)) { - if (fpr <= REGARG_LASTFPR) { - lua_assert(rset_test(as->freeset, fpr)); /* Must have been evicted. */ - ra_leftov(as, fpr, ref); - fpr++; - } else { - Reg r = ra_alloc1(as, ref, RSET_FPR); - emit_spstore(as, ir, r, ofs); - ofs += 8; - } - } else { - if (gpr <= REGARG_LASTGPR) { - lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ - ra_leftov(as, gpr, ref); - gpr++; - } else { - Reg r = ra_alloc1(as, ref, RSET_GPR); - emit_spstore(as, ir, r, ofs); - ofs += 8; - } - } - } - } -} - -/* Setup result reg/sp for call. Evict scratch regs. */ -static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) -{ - RegSet drop = RSET_SCRATCH; - if (ra_hasreg(ir->r)) - rset_clear(drop, ir->r); /* Dest reg handled below. */ - ra_evictset(as, drop); /* Evictions must be performed first. */ - if (ra_used(ir)) { - lua_assert(!irt_ispri(ir->t)); - if (irt_isfp(ir->t)) { - if (ci->flags & CCI_CASTU64) { - Reg dest = ra_dest(as, ir, RSET_FPR) & 31; - emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D_R : A64I_FMOV_S_R, - dest, RID_RET); - } else { - ra_destreg(as, ir, RID_FPRET); - } - } else { - ra_destreg(as, ir, RID_RET); - } - } - UNUSED(ci); -} - -static void asm_callx(ASMState *as, IRIns *ir) -{ - IRRef args[CCI_NARGS_MAX*2]; - CCallInfo ci; - IRRef func; - IRIns *irf; - ci.flags = asm_callx_flags(as, ir); - asm_collectargs(as, ir, &ci, args); - asm_setupresult(as, ir, &ci); - func = ir->op2; irf = IR(func); - if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } - if (irref_isk(func)) { /* Call to constant address. */ - ci.func = (ASMFunction)(ir_k64(irf)->u64); - } else { /* Need a non-argument register for indirect calls. */ - Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED); - emit_n(as, A64I_BLR, freg); - ci.func = (ASMFunction)(void *)0; - } - asm_gencall(as, &ci, args); -} - -/* -- Returns ------------------------------------------------------------- */ - -/* Return to lower frame. Guard that it goes to the right spot. */ -static void asm_retf(ASMState *as, IRIns *ir) -{ - Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); - void *pc = ir_kptr(IR(ir->op2)); - int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); - as->topslot -= (BCReg)delta; - if ((int32_t)as->topslot < 0) as->topslot = 0; - irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ - /* Need to force a spill on REF_BASE now to update the stack slot. */ - emit_lso(as, A64I_STRx, base, RID_SP, ra_spill(as, IR(REF_BASE))); - emit_setgl(as, base, jit_base); - emit_addptr(as, base, -8*delta); - asm_guardcc(as, CC_NE); - emit_nm(as, A64I_CMPx, RID_TMP, - ra_allock(as, i64ptr(pc), rset_exclude(RSET_GPR, base))); - emit_lso(as, A64I_LDRx, RID_TMP, base, -8); -} - -/* -- Type conversions ---------------------------------------------------- */ - -static void asm_tointg(ASMState *as, IRIns *ir, Reg left) -{ - Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); - Reg dest = ra_dest(as, ir, RSET_GPR); - asm_guardcc(as, CC_NE); - emit_nm(as, A64I_FCMPd, (tmp & 31), (left & 31)); - emit_dn(as, A64I_FCVT_F64_S32, (tmp & 31), dest); - emit_dn(as, A64I_FCVT_S32_F64, dest, (left & 31)); -} - -static void asm_tobit(ASMState *as, IRIns *ir) -{ - RegSet allow = RSET_FPR; - Reg left = ra_alloc1(as, ir->op1, allow); - Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); - Reg tmp = ra_scratch(as, rset_clear(allow, right)); - Reg dest = ra_dest(as, ir, RSET_GPR); - emit_dn(as, A64I_FMOV_R_S, dest, (tmp & 31)); - emit_dnm(as, A64I_FADDd, (tmp & 31), (left & 31), (right & 31)); -} - -static void asm_conv(ASMState *as, IRIns *ir) -{ - IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); - int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64); - int stfp = (st == IRT_NUM || st == IRT_FLOAT); - IRRef lref = ir->op1; - lua_assert(irt_type(ir->t) != st); - if (irt_isfp(ir->t)) { - Reg dest = ra_dest(as, ir, RSET_FPR); - if (stfp) { /* FP to FP conversion. */ - emit_dn(as, st == IRT_NUM ? A64I_FCVT_F32_F64 : A64I_FCVT_F64_F32, - (dest & 31), (ra_alloc1(as, lref, RSET_FPR) & 31)); - } else { /* Integer to FP conversion. */ - Reg left = ra_alloc1(as, lref, RSET_GPR); - A64Ins ai = irt_isfloat(ir->t) ? - (((IRT_IS64 >> st) & 1) ? - (st == IRT_I64 ? A64I_FCVT_F32_S64 : A64I_FCVT_F32_U64) : - (st == IRT_INT ? A64I_FCVT_F32_S32 : A64I_FCVT_F32_U32)) : - (((IRT_IS64 >> st) & 1) ? - (st == IRT_I64 ? A64I_FCVT_F64_S64 : A64I_FCVT_F64_U64) : - (st == IRT_INT ? A64I_FCVT_F64_S32 : A64I_FCVT_F64_U32)); - emit_dn(as, ai, (dest & 31), left); - } - } else if (stfp) { /* FP to integer conversion. */ - if (irt_isguard(ir->t)) { - /* Checked conversions are only supported from number to int. */ - lua_assert(irt_isint(ir->t) && st == IRT_NUM); - asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); - } else { - Reg left = ra_alloc1(as, lref, RSET_FPR); - Reg dest = ra_dest(as, ir, RSET_GPR); - A64Ins ai = irt_is64(ir->t) ? - (st == IRT_NUM ? - (irt_isi64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_U64_F64) : - (irt_isi64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_U64_F32)) : - (st == IRT_NUM ? - (irt_isint(ir->t) ? A64I_FCVT_S32_F64 : A64I_FCVT_U32_F64) : - (irt_isint(ir->t) ? A64I_FCVT_S32_F32 : A64I_FCVT_U32_F32)); - emit_dn(as, ai, dest, (left & 31)); - } - } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, lref, RSET_GPR); - A64Ins ai = st == IRT_I8 ? A64I_SXTBw : - st == IRT_U8 ? A64I_UXTBw : - st == IRT_I16 ? A64I_SXTHw : A64I_UXTHw; - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); - emit_dn(as, ai, dest, left); - } else { - Reg dest = ra_dest(as, ir, RSET_GPR); - if (irt_is64(ir->t)) { - if (st64 || !(ir->op2 & IRCONV_SEXT)) { - /* 64/64 bit no-op (cast) or 32 to 64 bit zero extension. */ - ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ - } else { /* 32 to 64 bit sign extension. */ - Reg left = ra_alloc1(as, lref, RSET_GPR); - emit_dn(as, A64I_SXTW, dest, left); - } - } else { - if (st64) { - /* This is either a 32 bit reg/reg mov which zeroes the hiword - ** or a load of the loword from a 64 bit address. - */ - Reg left = ra_alloc1(as, lref, RSET_GPR); - emit_dm(as, A64I_MOVw, dest, left); - } else { /* 32/32 bit no-op (cast). */ - ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ - } - } - } -} - -static void asm_strto(ASMState *as, IRIns *ir) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; - IRRef args[2]; - Reg dest = 0, tmp; - int destused = ra_used(ir); - int32_t ofs = 0; - ra_evictset(as, RSET_SCRATCH); - if (destused) { - if (ra_hasspill(ir->s)) { - ofs = sps_scale(ir->s); - destused = 0; - if (ra_hasreg(ir->r)) { - ra_free(as, ir->r); - ra_modified(as, ir->r); - emit_spload(as, ir, ir->r, ofs); - } - } else { - dest = ra_dest(as, ir, RSET_FPR); - } - } - if (destused) - emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0); - asm_guardcnb(as, A64I_CBZ, RID_RET); - args[0] = ir->op1; /* GCstr *str */ - args[1] = ASMREF_TMP1; /* TValue *n */ - asm_gencall(as, ci, args); - tmp = ra_releasetmp(as, ASMREF_TMP1); - emit_opk(as, A64I_ADDx, tmp, RID_SP, ofs, RSET_GPR); -} - -/* -- Memory references --------------------------------------------------- */ - -/* Store tagged value for ref at base+ofs. */ -static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) -{ - RegSet allow = rset_exclude(RSET_GPR, base); - IRIns *ir = IR(ref); - lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); - if (irref_isk(ref)) { - TValue k; - lj_ir_kvalue(as->J->L, &k, ir); - emit_lso(as, A64I_STRx, ra_allock(as, k.u64, allow), base, ofs); - } else { - Reg src = ra_alloc1(as, ref, allow); - rset_clear(allow, src); - if (irt_isinteger(ir->t)) { - Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow); - emit_lso(as, A64I_STRx, RID_TMP, base, ofs); - emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), RID_TMP, type, src); - } else { - Reg type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); - emit_lso(as, A64I_STRx, RID_TMP, base, ofs); - emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), RID_TMP, src, type); - } - } -} - -/* Get pointer to TValue. */ -static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) -{ - IRIns *ir = IR(ref); - if (irt_isnum(ir->t)) { - if (irref_isk(ref)) { - /* Use the number constant itself as a TValue. */ - ra_allockreg(as, i64ptr(ir_knum(ir)), dest); - } else { - /* Otherwise force a spill and use the spill slot. */ - emit_opk(as, A64I_ADDx, dest, RID_SP, ra_spill(as, ir), RSET_GPR); - } - } else { - /* Otherwise use g->tmptv to hold the TValue. */ - asm_tvstore64(as, dest, 0, ref); - ra_allockreg(as, i64ptr(&J2G(as->J)->tmptv), dest); - } -} - -static void asm_aref(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg idx, base; - if (irref_isk(ir->op2)) { - IRRef tab = IR(ir->op1)->op1; - int32_t ofs = asm_fuseabase(as, tab); - IRRef refa = ofs ? tab : ir->op1; - uint32_t k = emit_isk12(ofs + 8*IR(ir->op2)->i); - if (k) { - base = ra_alloc1(as, refa, RSET_GPR); - emit_dn(as, A64I_ADDx^k, dest, base); - return; - } - } - base = ra_alloc1(as, ir->op1, RSET_GPR); - idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); - emit_dnm(as, A64I_ADDx | A64F_EXSH(A64EX_UXTW, 3), dest, base, idx); -} - -/* Inlined hash lookup. Specialized for key type and for const keys. -** The equivalent C code is: -** Node *n = hashkey(t, key); -** do { -** if (lj_obj_equal(&n->key, key)) return &n->val; -** } while ((n = nextnode(n))); -** return niltv(L); -*/ -static void asm_href(ASMState *as, IRIns *ir, IROp merge) -{ - RegSet allow = RSET_GPR; - int destused = ra_used(ir); - Reg dest = ra_dest(as, ir, allow); - Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); - Reg key = 0, tmp = RID_TMP; - IRRef refkey = ir->op2; - IRIns *irkey = IR(refkey); - int isk = irref_isk(ir->op2); - IRType1 kt = irkey->t; - uint32_t k = 0; - uint32_t khash; - MCLabel l_end, l_loop, l_next; - rset_clear(allow, tab); - - if (!isk) { - key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow); - rset_clear(allow, key); - if (!irt_isstr(kt)) { - tmp = ra_scratch(as, allow); - rset_clear(allow, tmp); - } - } else if (irt_isnum(kt)) { - int64_t val = (int64_t)ir_knum(irkey)->u64; - if (!(k = emit_isk12(val))) { - key = ra_allock(as, val, allow); - rset_clear(allow, key); - } - } else if (!irt_ispri(kt)) { - if (!(k = emit_isk12(irkey->i))) { - key = ra_alloc1(as, refkey, allow); - rset_clear(allow, key); - } - } - - /* Key not found in chain: jump to exit (if merged) or load niltv. */ - l_end = emit_label(as); - as->invmcp = NULL; - if (merge == IR_NE) - asm_guardcc(as, CC_AL); - else if (destused) - emit_loada(as, dest, niltvg(J2G(as->J))); - - /* Follow hash chain until the end. */ - l_loop = --as->mcp; - emit_n(as, A64I_CMPx^A64I_K12^0, dest); - emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next)); - l_next = emit_label(as); - - /* Type and value comparison. */ - if (merge == IR_EQ) - asm_guardcc(as, CC_EQ); - else - emit_cond_branch(as, CC_EQ, l_end); - - if (irt_isnum(kt)) { - if (isk) { - /* Assumes -0.0 is already canonicalized to +0.0. */ - if (k) - emit_n(as, A64I_CMPx^k, tmp); - else - emit_nm(as, A64I_CMPx, key, tmp); - emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64)); - } else { - Reg tisnum = ra_allock(as, LJ_TISNUM << 15, allow); - Reg ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key)); - rset_clear(allow, tisnum); - emit_nm(as, A64I_FCMPd, key, ftmp); - emit_dn(as, A64I_FMOV_D_R, (ftmp & 31), (tmp & 31)); - emit_cond_branch(as, CC_LO, l_next); - emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), tisnum, tmp); - emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.n)); - } - } else if (irt_isaddr(kt)) { - Reg scr; - if (isk) { - int64_t kk = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; - scr = ra_allock(as, kk, allow); - emit_nm(as, A64I_CMPx, scr, tmp); - emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64)); - } else { - scr = ra_scratch(as, allow); - emit_nm(as, A64I_CMPx, tmp, scr); - emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key.u64)); - } - rset_clear(allow, scr); - } else { - Reg type, scr; - lua_assert(irt_ispri(kt) && !irt_isnil(kt)); - type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); - scr = ra_scratch(as, rset_clear(allow, type)); - rset_clear(allow, scr); - emit_nm(as, A64I_CMPw, scr, type); - emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key)); - } - - *l_loop = A64I_BCC | A64F_S19(as->mcp - l_loop) | CC_NE; - if (!isk && irt_isaddr(kt)) { - Reg type = ra_allock(as, (int32_t)irt_toitype(kt), allow); - emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, key, type); - rset_clear(allow, type); - } - /* Load main position relative to tab->node into dest. */ - khash = isk ? ir_khash(irkey) : 1; - if (khash == 0) { - emit_lso(as, A64I_LDRx, dest, tab, offsetof(GCtab, node)); - } else { - emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 3), dest, tmp, dest); - emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 1), dest, dest, dest); - emit_lso(as, A64I_LDRx, tmp, tab, offsetof(GCtab, node)); - if (isk) { - Reg tmphash = ra_allock(as, khash, allow); - emit_dnm(as, A64I_ANDw, dest, dest, tmphash); - emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); - } else if (irt_isstr(kt)) { - /* Fetch of str->hash is cheaper than ra_allock. */ - emit_dnm(as, A64I_ANDw, dest, dest, tmp); - emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, hash)); - emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); - } else { /* Must match with hash*() in lj_tab.c. */ - emit_dnm(as, A64I_ANDw, dest, dest, tmp); - emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask)); - emit_dnm(as, A64I_SUBw, dest, dest, tmp); - emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp); - emit_dnm(as, A64I_EORw, dest, dest, tmp); - emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT2)), dest, dest, dest); - emit_dnm(as, A64I_SUBw, tmp, tmp, dest); - emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest); - emit_dnm(as, A64I_EORw, tmp, tmp, dest); - if (irt_isnum(kt)) { - emit_dnm(as, A64I_ADDw, dest, dest, dest); - emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest); - emit_dm(as, A64I_MOVw, tmp, dest); - emit_dn(as, A64I_FMOV_R_D, dest, (key & 31)); - } else { - checkmclim(as); - emit_dm(as, A64I_MOVw, tmp, key); - emit_dnm(as, A64I_EORw, dest, dest, - ra_allock(as, irt_toitype(kt) << 15, allow)); - emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest); - emit_dm(as, A64I_MOVx, dest, key); - } - } - } -} - -static void asm_hrefk(ASMState *as, IRIns *ir) -{ - IRIns *kslot = IR(ir->op2); - IRIns *irkey = IR(kslot->op1); - int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); - int32_t kofs = ofs + (int32_t)offsetof(Node, key); - int bigofs = !emit_checkofs(A64I_LDRx, ofs); - RegSet allow = RSET_GPR; - Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; - Reg node = ra_alloc1(as, ir->op1, allow); - Reg key = ra_scratch(as, rset_clear(allow, node)); - Reg idx = node; - uint64_t k; - lua_assert(ofs % sizeof(Node) == 0); - rset_clear(allow, key); - if (bigofs) { - idx = dest; - rset_clear(allow, dest); - kofs = (int32_t)offsetof(Node, key); - } else if (ra_hasreg(dest)) { - emit_opk(as, A64I_ADDx, dest, node, ofs, allow); - } - asm_guardcc(as, CC_NE); - if (irt_ispri(irkey->t)) { - k = ~((int64_t)~irt_toitype(irkey->t) << 47); - } else if (irt_isnum(irkey->t)) { - k = ir_knum(irkey)->u64; - } else { - k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey); - } - emit_nm(as, A64I_CMPx, key, ra_allock(as, k, allow)); - emit_lso(as, A64I_LDRx, key, idx, kofs); - if (bigofs) - emit_opk(as, A64I_ADDx, dest, node, ofs, RSET_GPR); -} - -static void asm_uref(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - if (irref_isk(ir->op1)) { - GCfunc *fn = ir_kfunc(IR(ir->op1)); - MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; - emit_lsptr(as, A64I_LDRx, dest, v); - } else { - Reg uv = ra_scratch(as, RSET_GPR); - Reg func = ra_alloc1(as, ir->op1, RSET_GPR); - if (ir->o == IR_UREFC) { - asm_guardcc(as, CC_NE); - emit_n(as, (A64I_CMPx^A64I_K12) | A64F_U12(1), RID_TMP); - emit_opk(as, A64I_ADDx, dest, uv, - (int32_t)offsetof(GCupval, tv), RSET_GPR); - emit_lso(as, A64I_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); - } else { - emit_lso(as, A64I_LDRx, dest, uv, (int32_t)offsetof(GCupval, v)); - } - emit_lso(as, A64I_LDRx, uv, func, - (int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8)); - } -} - -static void asm_fref(ASMState *as, IRIns *ir) -{ - UNUSED(as); UNUSED(ir); - lua_assert(!ra_used(ir)); -} - -static void asm_strref(ASMState *as, IRIns *ir) -{ - RegSet allow = RSET_GPR; - Reg dest = ra_dest(as, ir, allow); - Reg base = ra_alloc1(as, ir->op1, allow); - IRIns *irr = IR(ir->op2); - int32_t ofs = sizeof(GCstr); - uint32_t m; - rset_clear(allow, base); - if (irref_isk(ir->op2) && (m = emit_isk12(ofs + irr->i))) { - emit_dn(as, A64I_ADDx^m, dest, base); - } else { - emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, dest); - emit_dnm(as, A64I_ADDx, dest, base, ra_alloc1(as, ir->op2, allow)); - } -} - -/* -- Loads and stores ---------------------------------------------------- */ - -static A64Ins asm_fxloadins(IRIns *ir) -{ - switch (irt_type(ir->t)) { - case IRT_I8: return A64I_LDRB ^ A64I_LS_S; - case IRT_U8: return A64I_LDRB; - case IRT_I16: return A64I_LDRH ^ A64I_LS_S; - case IRT_U16: return A64I_LDRH; - case IRT_NUM: return A64I_LDRd; - case IRT_FLOAT: return A64I_LDRs; - default: return irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw; - } -} - -static A64Ins asm_fxstoreins(IRIns *ir) -{ - switch (irt_type(ir->t)) { - case IRT_I8: case IRT_U8: return A64I_STRB; - case IRT_I16: case IRT_U16: return A64I_STRH; - case IRT_NUM: return A64I_STRd; - case IRT_FLOAT: return A64I_STRs; - default: return irt_is64(ir->t) ? A64I_STRx : A64I_STRw; - } -} - -static void asm_fload(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg idx; - A64Ins ai = asm_fxloadins(ir); - int32_t ofs; - if (ir->op1 == REF_NIL) { - idx = RID_GL; - ofs = (ir->op2 << 2) - GG_OFS(g); - } else { - idx = ra_alloc1(as, ir->op1, RSET_GPR); - if (ir->op2 == IRFL_TAB_ARRAY) { - ofs = asm_fuseabase(as, ir->op1); - if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ - emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, idx); - return; - } - } - ofs = field_ofs[ir->op2]; - } - emit_lso(as, ai, (dest & 31), idx, ofs); -} - -static void asm_fstore(ASMState *as, IRIns *ir) -{ - if (ir->r != RID_SINK) { - Reg src = ra_alloc1(as, ir->op2, RSET_GPR); - IRIns *irf = IR(ir->op1); - Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); - int32_t ofs = field_ofs[irf->op2]; - emit_lso(as, asm_fxstoreins(ir), (src & 31), idx, ofs); - } -} - -static void asm_xload(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); - lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); - asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR); -} - -static void asm_xstore(ASMState *as, IRIns *ir) -{ - if (ir->r != RID_SINK) { - Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); - asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, - rset_exclude(RSET_GPR, src)); - } -} - -static void asm_ahuvload(ASMState *as, IRIns *ir) -{ - Reg idx, tmp, type; - int32_t ofs = 0; - RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; - lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || - irt_isint(ir->t)); - if (ra_used(ir)) { - Reg dest = ra_dest(as, ir, allow); - tmp = irt_isnum(ir->t) ? ra_scratch(as, rset_clear(gpr, dest)) : dest; - if (irt_isaddr(ir->t)) { - emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest); - } else if (irt_isnum(ir->t)) { - emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp); - } else if (irt_isint(ir->t)) { - emit_dm(as, A64I_MOVw, dest, dest); - } - } else { - tmp = ra_scratch(as, gpr); - } - type = ra_scratch(as, rset_clear(gpr, tmp)); - idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, type), A64I_LDRx); - /* Always do the type check, even if the load result is unused. */ - asm_guardcc(as, irt_isnum(ir->t) ? CC_LS : CC_NE); - if (irt_type(ir->t) >= IRT_NUM) { - lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t)); - emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), - ra_allock(as, LJ_TISNUM << 15, rset_exclude(gpr, idx)), tmp); - } else if (irt_isaddr(ir->t)) { - emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), type); - emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp); - } else if (irt_isnil(ir->t)) { - emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp); - } else { - emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), - ra_allock(as, (irt_toitype(ir->t) << 15) | 0x7fff, allow), tmp); - } - if (ofs & FUSE_REG) - emit_dnm(as, (A64I_LDRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31)); - else - emit_lso(as, A64I_LDRx, tmp, idx, ofs); -} - -static void asm_ahustore(ASMState *as, IRIns *ir) -{ - if (ir->r != RID_SINK) { - RegSet allow = RSET_GPR; - Reg idx, src = RID_NONE, tmp = RID_TMP, type = RID_NONE; - int32_t ofs = 0; - if (irt_isnum(ir->t)) { - src = ra_alloc1(as, ir->op2, RSET_FPR); - idx = asm_fuseahuref(as, ir->op1, &ofs, allow, A64I_STRd); - if (ofs & FUSE_REG) - emit_dnm(as, (A64I_STRd^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, (src & 31), idx, (ofs &31)); - else - emit_lso(as, A64I_STRd, (src & 31), idx, ofs); - } else { - if (!irt_ispri(ir->t)) { - src = ra_alloc1(as, ir->op2, allow); - rset_clear(allow, src); - if (irt_isinteger(ir->t)) - type = ra_allock(as, (int64_t)LJ_TISNUM << 47, allow); - else - type = ra_allock(as, irt_toitype(ir->t), allow); - } else { - tmp = type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t)<<47), allow); - } - idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type), - A64I_STRx); - if (ofs & FUSE_REG) - emit_dnm(as, (A64I_STRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31)); - else - emit_lso(as, A64I_STRx, tmp, idx, ofs); - if (ra_hasreg(src)) { - if (irt_isinteger(ir->t)) { - emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), tmp, type, src); - } else { - emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, src, type); - } - } - } - } -} - -static void asm_sload(ASMState *as, IRIns *ir) -{ - int32_t ofs = 8*((int32_t)ir->op1-2); - IRType1 t = ir->t; - Reg dest = RID_NONE, base; - RegSet allow = RSET_GPR; - lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ - lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); - if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { - dest = ra_scratch(as, RSET_FPR); - asm_tointg(as, ir, dest); - t.irt = IRT_NUM; /* Continue with a regular number type check. */ - } else if (ra_used(ir)) { - Reg tmp = RID_NONE; - if ((ir->op2 & IRSLOAD_CONVERT)) - tmp = ra_scratch(as, irt_isint(t) ? RSET_FPR : RSET_GPR); - lua_assert((irt_isnum(t)) || irt_isint(t) || irt_isaddr(t)); - dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow); - base = ra_alloc1(as, REF_BASE, rset_clear(allow, dest)); - if (irt_isaddr(t)) { - emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest); - } else if ((ir->op2 & IRSLOAD_CONVERT)) { - if (irt_isint(t)) { - emit_dn(as, A64I_FCVT_S32_F64, dest, (tmp & 31)); - /* If value is already loaded for type check, move it to FPR. */ - if ((ir->op2 & IRSLOAD_TYPECHECK)) - emit_dn(as, A64I_FMOV_D_R, (tmp & 31), dest); - else - dest = tmp; - t.irt = IRT_NUM; /* Check for original type. */ - } else { - emit_dn(as, A64I_FCVT_F64_S32, (dest & 31), tmp); - dest = tmp; - t.irt = IRT_INT; /* Check for original type. */ - } - } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) { - emit_dm(as, A64I_MOVw, dest, dest); - } - goto dotypecheck; - } - base = ra_alloc1(as, REF_BASE, allow); -dotypecheck: - rset_clear(allow, base); - if ((ir->op2 & IRSLOAD_TYPECHECK)) { - Reg tmp; - if (ra_hasreg(dest) && rset_test(RSET_GPR, dest)) { - tmp = dest; - } else { - tmp = ra_scratch(as, allow); - rset_clear(allow, tmp); - } - if (irt_isnum(t) && !(ir->op2 & IRSLOAD_CONVERT)) - emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp); - /* Need type check, even if the load result is unused. */ - asm_guardcc(as, irt_isnum(t) ? CC_LS : CC_NE); - if (irt_type(t) >= IRT_NUM) { - lua_assert(irt_isinteger(t) || irt_isnum(t)); - emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), - ra_allock(as, LJ_TISNUM << 15, allow), tmp); - } else if (irt_isnil(t)) { - emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp); - } else if (irt_ispri(t)) { - emit_nm(as, A64I_CMPx, - ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow), tmp); - } else { - Reg type = ra_scratch(as, allow); - emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), type); - emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp); - } - emit_lso(as, A64I_LDRx, tmp, base, ofs); - return; - } - if (ra_hasreg(dest)) { - emit_lso(as, irt_isnum(t) ? A64I_LDRd : - (irt_isint(t) ? A64I_LDRw : A64I_LDRx), (dest & 31), base, ofs); - } -} - -/* -- Allocations --------------------------------------------------------- */ - -#if LJ_HASFFI -static void asm_cnew(ASMState *as, IRIns *ir) -{ - CTState *cts = ctype_ctsG(J2G(as->J)); - CTypeID id = (CTypeID)IR(ir->op1)->i; - CTSize sz; - CTInfo info = lj_ctype_info(cts, id, &sz); - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; - IRRef args[4]; - RegSet allow = (RSET_GPR & ~RSET_SCRATCH); - lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); - - as->gcsteps++; - asm_setupresult(as, ir, ci); /* GCcdata * */ - /* Initialize immutable cdata object. */ - if (ir->o == IR_CNEWI) { - int32_t ofs = sizeof(GCcdata); - Reg r = ra_alloc1(as, ir->op2, allow); - lua_assert(sz == 4 || sz == 8); - emit_lso(as, sz == 8 ? A64I_STRx : A64I_STRw, r, RID_RET, ofs); - } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ - ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ir->op1; /* CTypeID id */ - args[2] = ir->op2; /* CTSize sz */ - args[3] = ASMREF_TMP1; /* CTSize align */ - asm_gencall(as, ci, args); - emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); - return; - } - - /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ - { - Reg r = (id < 65536) ? RID_X1 : ra_allock(as, id, allow); - emit_lso(as, A64I_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); - emit_lso(as, A64I_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); - emit_d(as, A64I_MOVZw | A64F_U16(~LJ_TCDATA), RID_TMP); - if (id < 65536) emit_d(as, A64I_MOVZw | A64F_U16(id), RID_X1); - } - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ASMREF_TMP1; /* MSize size */ - asm_gencall(as, ci, args); - ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), - ra_releasetmp(as, ASMREF_TMP1)); -} -#else -#define asm_cnew(as, ir) ((void)0) -#endif - -/* -- Write barriers ------------------------------------------------------ */ - -static void asm_tbar(ASMState *as, IRIns *ir) -{ - Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); - Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab)); - Reg gr = ra_allock(as, i64ptr(J2G(as->J)), - rset_exclude(rset_exclude(RSET_GPR, tab), link)); - Reg mark = RID_TMP; - MCLabel l_end = emit_label(as); - emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist)); - emit_lso(as, A64I_STRB, mark, tab, (int32_t)offsetof(GCtab, marked)); - emit_lso(as, A64I_STRx, tab, gr, - (int32_t)offsetof(global_State, gc.grayagain)); - emit_dn(as, A64I_ANDw^emit_isk13(~LJ_GC_BLACK, 0), mark, mark); - emit_lso(as, A64I_LDRx, link, gr, - (int32_t)offsetof(global_State, gc.grayagain)); - emit_cond_branch(as, CC_EQ, l_end); - emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), mark); - emit_lso(as, A64I_LDRB, mark, tab, (int32_t)offsetof(GCtab, marked)); -} - -static void asm_obar(ASMState *as, IRIns *ir) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; - IRRef args[2]; - MCLabel l_end; - RegSet allow = RSET_GPR; - Reg obj, val, tmp; - /* No need for other object barriers (yet). */ - lua_assert(IR(ir->op1)->o == IR_UREFC); - ra_evictset(as, RSET_SCRATCH); - l_end = emit_label(as); - args[0] = ASMREF_TMP1; /* global_State *g */ - args[1] = ir->op1; /* TValue *tv */ - asm_gencall(as, ci, args); - ra_allockreg(as, i64ptr(J2G(as->J)), ra_releasetmp(as, ASMREF_TMP1) ); - obj = IR(ir->op1)->r; - tmp = ra_scratch(as, rset_exclude(allow, obj)); - emit_cond_branch(as, CC_EQ, l_end); - emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), tmp); - emit_cond_branch(as, CC_EQ, l_end); - emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_WHITES, 0), RID_TMP); - val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); - emit_lso(as, A64I_LDRB, tmp, obj, - (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); - emit_lso(as, A64I_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked)); -} - -/* -- Arithmetic and logic operations ------------------------------------- */ - -static void asm_fparith(ASMState *as, IRIns *ir, A64Ins ai) -{ - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = (left >> 8); left &= 255; - emit_dnm(as, ai, (dest & 31), (left & 31), (right & 31)); -} - -static void asm_fpunary(ASMState *as, IRIns *ir, A64Ins ai) -{ - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); - emit_dn(as, ai, (dest & 31), (left & 31)); -} - -static void asm_fpmath(ASMState *as, IRIns *ir) -{ - IRFPMathOp fpm = (IRFPMathOp)ir->op2; - if (fpm == IRFPM_SQRT) { - asm_fpunary(as, ir, A64I_FSQRTd); - } else if (fpm <= IRFPM_TRUNC) { - asm_fpunary(as, ir, fpm == IRFPM_FLOOR ? A64I_FRINTMd : - fpm == IRFPM_CEIL ? A64I_FRINTPd : A64I_FRINTZd); - } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) { - return; - } else { - asm_callid(as, ir, IRCALL_lj_vm_floor + fpm); - } -} - -static int asm_swapops(ASMState *as, IRRef lref, IRRef rref) -{ - IRIns *ir; - if (irref_isk(rref)) - return 0; /* Don't swap constants to the left. */ - if (irref_isk(lref)) - return 1; /* But swap constants to the right. */ - ir = IR(rref); - if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) || - (ir->o == IR_ADD && ir->op1 == ir->op2) || - (ir->o == IR_CONV && ir->op2 == ((IRT_I64<o >= IR_BSHL && ir->o <= IR_BSAR) || - (ir->o == IR_ADD && ir->op1 == ir->op2) || - (ir->o == IR_CONV && ir->op2 == ((IRT_I64<op1, rref = ir->op2; - Reg left, dest = ra_dest(as, ir, RSET_GPR); - uint32_t m; - if ((ai & ~A64I_S) != A64I_SUBw && asm_swapops(as, lref, rref)) { - IRRef tmp = lref; lref = rref; rref = tmp; - } - left = ra_hintalloc(as, lref, dest, RSET_GPR); - if (irt_is64(ir->t)) ai |= A64I_X; - m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left)); - if (irt_isguard(ir->t)) { /* For IR_ADDOV etc. */ - asm_guardcc(as, CC_VS); - ai |= A64I_S; - } - emit_dn(as, ai^m, dest, left); -} - -static void asm_intop_s(ASMState *as, IRIns *ir, A64Ins ai) -{ - if (as->flagmcp == as->mcp) { /* Drop cmp r, #0. */ - as->flagmcp = NULL; - as->mcp++; - ai |= A64I_S; - } - asm_intop(as, ir, ai); -} - -static void asm_intneg(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - emit_dm(as, irt_is64(ir->t) ? A64I_NEGx : A64I_NEGw, dest, left); -} - -/* NYI: use add/shift for MUL(OV) with constants. FOLD only does 2^k. */ -static void asm_intmul(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest)); - Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - if (irt_isguard(ir->t)) { /* IR_MULOV */ - asm_guardcc(as, CC_NE); - emit_dm(as, A64I_MOVw, dest, dest); /* Zero-extend. */ - emit_nm(as, A64I_CMPw | A64F_SH(A64SH_ASR, 31), RID_TMP, dest); - emit_dn(as, A64I_ASRx | A64F_IMMR(32), RID_TMP, dest); - emit_dnm(as, A64I_SMULL, dest, right, left); - } else { - emit_dnm(as, irt_is64(ir->t) ? A64I_MULx : A64I_MULw, dest, left, right); - } -} - -static void asm_add(ASMState *as, IRIns *ir) -{ - if (irt_isnum(ir->t)) { - if (!asm_fusemadd(as, ir, A64I_FMADDd, A64I_FMADDd)) - asm_fparith(as, ir, A64I_FADDd); - return; - } - asm_intop_s(as, ir, A64I_ADDw); -} - -static void asm_sub(ASMState *as, IRIns *ir) -{ - if (irt_isnum(ir->t)) { - if (!asm_fusemadd(as, ir, A64I_FNMSUBd, A64I_FMSUBd)) - asm_fparith(as, ir, A64I_FSUBd); - return; - } - asm_intop_s(as, ir, A64I_SUBw); -} - -static void asm_mul(ASMState *as, IRIns *ir) -{ - if (irt_isnum(ir->t)) { - asm_fparith(as, ir, A64I_FMULd); - return; - } - asm_intmul(as, ir); -} - -static void asm_div(ASMState *as, IRIns *ir) -{ -#if LJ_HASFFI - if (!irt_isnum(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : - IRCALL_lj_carith_divu64); - else -#endif - asm_fparith(as, ir, A64I_FDIVd); -} - -static void asm_pow(ASMState *as, IRIns *ir) -{ -#if LJ_HASFFI - if (!irt_isnum(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : - IRCALL_lj_carith_powu64); - else -#endif - asm_callid(as, ir, IRCALL_lj_vm_powi); -} - -#define asm_addov(as, ir) asm_add(as, ir) -#define asm_subov(as, ir) asm_sub(as, ir) -#define asm_mulov(as, ir) asm_mul(as, ir) - -#define asm_abs(as, ir) asm_fpunary(as, ir, A64I_FABS) -#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) -#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) - -static void asm_mod(ASMState *as, IRIns *ir) -{ -#if LJ_HASFFI - if (!irt_isint(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : - IRCALL_lj_carith_modu64); - else -#endif - asm_callid(as, ir, IRCALL_lj_vm_modi); -} - -static void asm_neg(ASMState *as, IRIns *ir) -{ - if (irt_isnum(ir->t)) { - asm_fpunary(as, ir, A64I_FNEGd); - return; - } - asm_intneg(as, ir); -} - -static void asm_band(ASMState *as, IRIns *ir) -{ - A64Ins ai = A64I_ANDw; - if (asm_fuseandshift(as, ir)) - return; - if (as->flagmcp == as->mcp) { - /* Try to drop cmp r, #0. */ - as->flagmcp = NULL; - as->mcp++; - ai = A64I_ANDSw; - } - asm_intop(as, ir, ai); -} - -static void asm_borbxor(ASMState *as, IRIns *ir, A64Ins ai) -{ - IRRef lref = ir->op1, rref = ir->op2; - IRIns *irl = IR(lref), *irr = IR(rref); - if ((canfuse(as, irl) && irl->o == IR_BNOT && !irref_isk(rref)) || - (canfuse(as, irr) && irr->o == IR_BNOT && !irref_isk(lref))) { - Reg left, dest = ra_dest(as, ir, RSET_GPR); - uint32_t m; - if (irl->o == IR_BNOT) { - IRRef tmp = lref; lref = rref; rref = tmp; - } - left = ra_alloc1(as, lref, RSET_GPR); - ai |= A64I_ON; - if (irt_is64(ir->t)) ai |= A64I_X; - m = asm_fuseopm(as, ai, IR(rref)->op1, rset_exclude(RSET_GPR, left)); - emit_dn(as, ai^m, dest, left); - } else { - asm_intop(as, ir, ai); - } -} - -static void asm_bor(ASMState *as, IRIns *ir) -{ - if (asm_fuseorshift(as, ir)) - return; - asm_borbxor(as, ir, A64I_ORRw); -} - -#define asm_bxor(as, ir) asm_borbxor(as, ir, A64I_EORw) - -static void asm_bnot(ASMState *as, IRIns *ir) -{ - A64Ins ai = A64I_MVNw; - Reg dest = ra_dest(as, ir, RSET_GPR); - uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR); - if (irt_is64(ir->t)) ai |= A64I_X; - emit_d(as, ai^m, dest); -} - -static void asm_bswap(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - emit_dn(as, irt_is64(ir->t) ? A64I_REVx : A64I_REVw, dest, left); -} - -static void asm_bitshift(ASMState *as, IRIns *ir, A64Ins ai, A64Shift sh) -{ - int32_t shmask = irt_is64(ir->t) ? 63 : 31; - if (irref_isk(ir->op2)) { /* Constant shifts. */ - Reg left, dest = ra_dest(as, ir, RSET_GPR); - int32_t shift = (IR(ir->op2)->i & shmask); - IRIns *irl = IR(ir->op1); - if (shmask == 63) ai += A64I_UBFMx - A64I_UBFMw; - - /* Fuse BSHL + BSHR/BSAR into UBFM/SBFM aka UBFX/SBFX/UBFIZ/SBFIZ. */ - if ((sh == A64SH_LSR || sh == A64SH_ASR) && canfuse(as, irl)) { - if (irl->o == IR_BSHL && irref_isk(irl->op2)) { - int32_t shift2 = (IR(irl->op2)->i & shmask); - shift = ((shift - shift2) & shmask); - shmask -= shift2; - ir = irl; - } - } - - left = ra_alloc1(as, ir->op1, RSET_GPR); - switch (sh) { - case A64SH_LSL: - emit_dn(as, ai | A64F_IMMS(shmask-shift) | - A64F_IMMR((shmask-shift+1)&shmask), dest, left); - break; - case A64SH_LSR: case A64SH_ASR: - emit_dn(as, ai | A64F_IMMS(shmask) | A64F_IMMR(shift), dest, left); - break; - case A64SH_ROR: - emit_dnm(as, ai | A64F_IMMS(shift), dest, left, left); - break; - } - } else { /* Variable-length shifts. */ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_dnm(as, (shmask == 63 ? A64I_SHRx : A64I_SHRw) | A64F_BSH(sh), dest, left, right); - } -} - -#define asm_bshl(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSL) -#define asm_bshr(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSR) -#define asm_bsar(as, ir) asm_bitshift(as, ir, A64I_SBFMw, A64SH_ASR) -#define asm_bror(as, ir) asm_bitshift(as, ir, A64I_EXTRw, A64SH_ROR) -#define asm_brol(as, ir) lua_assert(0) - -static void asm_intmin_max(ASMState *as, IRIns *ir, A64CC cc) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_dnm(as, A64I_CSELw|A64F_CC(cc), dest, left, right); - emit_nm(as, A64I_CMPw, left, right); -} - -static void asm_fpmin_max(ASMState *as, IRIns *ir, A64CC fcc) -{ - Reg dest = (ra_dest(as, ir, RSET_FPR) & 31); - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = ((left >> 8) & 31); left &= 31; - emit_dnm(as, A64I_FCSELd | A64F_CC(fcc), dest, left, right); - emit_nm(as, A64I_FCMPd, left, right); -} - -static void asm_min_max(ASMState *as, IRIns *ir, A64CC cc, A64CC fcc) -{ - if (irt_isnum(ir->t)) - asm_fpmin_max(as, ir, fcc); - else - asm_intmin_max(as, ir, cc); -} - -#define asm_max(as, ir) asm_min_max(as, ir, CC_GT, CC_HI) -#define asm_min(as, ir) asm_min_max(as, ir, CC_LT, CC_LO) - -/* -- Comparisons --------------------------------------------------------- */ - -/* Map of comparisons to flags. ORDER IR. */ -static const uint8_t asm_compmap[IR_ABC+1] = { - /* op FP swp int cc FP cc */ - /* LT */ CC_GE + (CC_HS << 4), - /* GE x */ CC_LT + (CC_HI << 4), - /* LE */ CC_GT + (CC_HI << 4), - /* GT x */ CC_LE + (CC_HS << 4), - /* ULT x */ CC_HS + (CC_LS << 4), - /* UGE */ CC_LO + (CC_LO << 4), - /* ULE x */ CC_HI + (CC_LO << 4), - /* UGT */ CC_LS + (CC_LS << 4), - /* EQ */ CC_NE + (CC_NE << 4), - /* NE */ CC_EQ + (CC_EQ << 4), - /* ABC */ CC_LS + (CC_LS << 4) /* Same as UGT. */ -}; - -/* FP comparisons. */ -static void asm_fpcomp(ASMState *as, IRIns *ir) -{ - Reg left, right; - A64Ins ai; - int swp = ((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1); - if (!swp && irref_isk(ir->op2) && ir_knum(IR(ir->op2))->u64 == 0) { - left = (ra_alloc1(as, ir->op1, RSET_FPR) & 31); - right = 0; - ai = A64I_FCMPZd; - } else { - left = ra_alloc2(as, ir, RSET_FPR); - if (swp) { - right = (left & 31); left = ((left >> 8) & 31); - } else { - right = ((left >> 8) & 31); left &= 31; - } - ai = A64I_FCMPd; - } - asm_guardcc(as, (asm_compmap[ir->o] >> 4)); - emit_nm(as, ai, left, right); -} - -/* Integer comparisons. */ -static void asm_intcomp(ASMState *as, IRIns *ir) -{ - A64CC oldcc, cc = (asm_compmap[ir->o] & 15); - A64Ins ai = irt_is64(ir->t) ? A64I_CMPx : A64I_CMPw; - IRRef lref = ir->op1, rref = ir->op2; - Reg left; - uint32_t m; - int cmpprev0 = 0; - lua_assert(irt_is64(ir->t) || irt_isint(ir->t) || - irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t)); - if (asm_swapops(as, lref, rref)) { - IRRef tmp = lref; lref = rref; rref = tmp; - if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */ - else if (cc > CC_NE) cc ^= 11; /* LO <-> HI, LS <-> HS */ - } - oldcc = cc; - if (irref_isk(rref) && get_k64val(IR(rref)) == 0) { - IRIns *irl = IR(lref); - if (cc == CC_GE) cc = CC_PL; - else if (cc == CC_LT) cc = CC_MI; - else if (cc > CC_NE) goto nocombine; /* Other conds don't work with tst. */ - cmpprev0 = (irl+1 == ir); - /* Combine and-cmp-bcc into tbz/tbnz or and-cmp into tst. */ - if (cmpprev0 && irl->o == IR_BAND && !ra_used(irl)) { - IRRef blref = irl->op1, brref = irl->op2; - uint32_t m2 = 0; - Reg bleft; - if (asm_swapops(as, blref, brref)) { - Reg tmp = blref; blref = brref; brref = tmp; - } - if (irref_isk(brref)) { - uint64_t k = get_k64val(IR(brref)); - if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE)) { - asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ, - ra_alloc1(as, blref, RSET_GPR), emit_ctz64(k)); - return; - } - m2 = emit_isk13(k, irt_is64(irl->t)); - } - bleft = ra_alloc1(as, blref, RSET_GPR); - ai = (irt_is64(irl->t) ? A64I_TSTx : A64I_TSTw); - if (!m2) - m2 = asm_fuseopm(as, ai, brref, rset_exclude(RSET_GPR, bleft)); - asm_guardcc(as, cc); - emit_n(as, ai^m2, bleft); - return; - } - if (cc == CC_EQ || cc == CC_NE) { - /* Combine cmp-bcc into cbz/cbnz. */ - ai = cc == CC_EQ ? A64I_CBZ : A64I_CBNZ; - if (irt_is64(ir->t)) ai |= A64I_X; - asm_guardcnb(as, ai, ra_alloc1(as, lref, RSET_GPR)); - return; - } - } -nocombine: - left = ra_alloc1(as, lref, RSET_GPR); - m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left)); - asm_guardcc(as, cc); - emit_n(as, ai^m, left); - /* Signed comparison with zero and referencing previous ins? */ - if (cmpprev0 && (oldcc <= CC_NE || oldcc >= CC_GE)) - as->flagmcp = as->mcp; /* Allow elimination of the compare. */ -} - -static void asm_comp(ASMState *as, IRIns *ir) -{ - if (irt_isnum(ir->t)) - asm_fpcomp(as, ir); - else - asm_intcomp(as, ir); -} - -#define asm_equal(as, ir) asm_comp(as, ir) - -/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ - -/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ -static void asm_hiop(ASMState *as, IRIns *ir) -{ - UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused on 64 bit. */ -} - -/* -- Profiling ----------------------------------------------------------- */ - -static void asm_prof(ASMState *as, IRIns *ir) -{ - uint32_t k = emit_isk13(HOOK_PROFILE, 0); - lua_assert(k != 0); - UNUSED(ir); - asm_guardcc(as, CC_NE); - emit_n(as, A64I_TSTw^k, RID_TMP); - emit_lsptr(as, A64I_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask); -} - -/* -- Stack handling ------------------------------------------------------ */ - -/* Check Lua stack size for overflow. Use exit handler as fallback. */ -static void asm_stack_check(ASMState *as, BCReg topslot, - IRIns *irp, RegSet allow, ExitNo exitno) -{ - Reg pbase; - uint32_t k; - if (irp) { - if (!ra_hasspill(irp->s)) { - pbase = irp->r; - lua_assert(ra_hasreg(pbase)); - } else if (allow) { - pbase = rset_pickbot(allow); - } else { - pbase = RID_RET; - emit_lso(as, A64I_LDRx, RID_RET, RID_SP, 0); /* Restore temp register. */ - } - } else { - pbase = RID_BASE; - } - emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno)); - k = emit_isk12((8*topslot)); - lua_assert(k); - emit_n(as, A64I_CMPx^k, RID_TMP); - emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, pbase); - emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP, - (int32_t)offsetof(lua_State, maxstack)); - if (irp) { /* Must not spill arbitrary registers in head of side trace. */ - if (ra_hasspill(irp->s)) - emit_lso(as, A64I_LDRx, pbase, RID_SP, sps_scale(irp->s)); - emit_lso(as, A64I_LDRx, RID_TMP, RID_GL, glofs(as, &J2G(as->J)->cur_L)); - if (ra_hasspill(irp->s) && !allow) - emit_lso(as, A64I_STRx, RID_RET, RID_SP, 0); /* Save temp register. */ - } else { - emit_getgl(as, RID_TMP, cur_L); - } -} - -/* Restore Lua stack from on-trace state. */ -static void asm_stack_restore(ASMState *as, SnapShot *snap) -{ - SnapEntry *map = &as->T->snapmap[snap->mapofs]; -#ifdef LUA_USE_ASSERT - SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; -#endif - MSize n, nent = snap->nent; - /* Store the value of all modified slots to the Lua stack. */ - for (n = 0; n < nent; n++) { - SnapEntry sn = map[n]; - BCReg s = snap_slot(sn); - int32_t ofs = 8*((int32_t)s-1-LJ_FR2); - IRRef ref = snap_ref(sn); - IRIns *ir = IR(ref); - if ((sn & SNAP_NORESTORE)) - continue; - if (irt_isnum(ir->t)) { - Reg src = ra_alloc1(as, ref, RSET_FPR); - emit_lso(as, A64I_STRd, (src & 31), RID_BASE, ofs); - } else { - asm_tvstore64(as, RID_BASE, ofs, ref); - } - checkmclim(as); - } - lua_assert(map + nent == flinks); -} - -/* -- GC handling --------------------------------------------------------- */ - -/* Check GC threshold and do one or more GC steps. */ -static void asm_gc_check(ASMState *as) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; - IRRef args[2]; - MCLabel l_end; - Reg tmp1, tmp2; - ra_evictset(as, RSET_SCRATCH); - l_end = emit_label(as); - /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ - asm_guardcnb(as, A64I_CBNZ, RID_RET); /* Assumes asm_snap_prep() is done. */ - args[0] = ASMREF_TMP1; /* global_State *g */ - args[1] = ASMREF_TMP2; /* MSize steps */ - asm_gencall(as, ci, args); - tmp1 = ra_releasetmp(as, ASMREF_TMP1); - tmp2 = ra_releasetmp(as, ASMREF_TMP2); - emit_loadi(as, tmp2, as->gcsteps); - /* Jump around GC step if GC total < GC threshold. */ - emit_cond_branch(as, CC_LS, l_end); - emit_nm(as, A64I_CMPx, RID_TMP, tmp2); - emit_lso(as, A64I_LDRx, tmp2, tmp1, - (int32_t)offsetof(global_State, gc.threshold)); - emit_lso(as, A64I_LDRx, RID_TMP, tmp1, - (int32_t)offsetof(global_State, gc.total)); - ra_allockreg(as, i64ptr(J2G(as->J)), tmp1); - as->gcsteps = 0; - checkmclim(as); -} - -/* -- Loop handling ------------------------------------------------------- */ - -/* Fixup the loop branch. */ -static void asm_loop_fixup(ASMState *as) -{ - MCode *p = as->mctop; - MCode *target = as->mcp; - if (as->loopinv) { /* Inverted loop branch? */ - uint32_t mask = (p[-2] & 0x7e000000) == 0x36000000 ? 0x3fffu : 0x7ffffu; - ptrdiff_t delta = target - (p - 2); - /* asm_guard* already inverted the bcc/tnb/cnb and patched the final b. */ - p[-2] |= ((uint32_t)delta & mask) << 5; - } else { - ptrdiff_t delta = target - (p - 1); - p[-1] = A64I_B | ((uint32_t)(delta) & 0x03ffffffu); - } -} - -/* -- Head of trace ------------------------------------------------------- */ - -/* Reload L register from g->cur_L. */ -static void asm_head_lreg(ASMState *as) -{ - IRIns *ir = IR(ASMREF_L); - if (ra_used(ir)) { - Reg r = ra_dest(as, ir, RSET_GPR); - emit_getgl(as, r, cur_L); - ra_evictk(as); - } -} - -/* Coalesce BASE register for a root trace. */ -static void asm_head_root_base(ASMState *as) -{ - IRIns *ir; - asm_head_lreg(as); - ir = IR(REF_BASE); - if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) - ra_spill(as, ir); - ra_destreg(as, ir, RID_BASE); -} - -/* Coalesce BASE register for a side trace. */ -static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) -{ - IRIns *ir; - asm_head_lreg(as); - ir = IR(REF_BASE); - if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) - ra_spill(as, ir); - if (ra_hasspill(irp->s)) { - rset_clear(allow, ra_dest(as, ir, allow)); - } else { - Reg r = irp->r; - lua_assert(ra_hasreg(r)); - rset_clear(allow, r); - if (r != ir->r && !rset_test(as->freeset, r)) - ra_restore(as, regcost_ref(as->cost[r])); - ra_destreg(as, ir, r); - } - return allow; -} - -/* -- Tail of trace ------------------------------------------------------- */ - -/* Fixup the tail code. */ -static void asm_tail_fixup(ASMState *as, TraceNo lnk) -{ - MCode *p = as->mctop; - MCode *target; - /* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */ - int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED)); - if (spadj == 0) { - *--p = A64I_NOP; - as->mctop = p; - } else { - /* Patch stack adjustment. */ - uint32_t k = emit_isk12(spadj); - lua_assert(k); - p[-2] = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP); - } - /* Patch exit branch. */ - target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; - p[-1] = A64I_B | (((target-p)+1)&0x03ffffffu); -} - -/* Prepare tail of code. */ -static void asm_tail_prep(ASMState *as) -{ - MCode *p = as->mctop - 1; /* Leave room for exit branch. */ - if (as->loopref) { - as->invmcp = as->mcp = p; - } else { - as->mcp = p-1; /* Leave room for stack pointer adjustment. */ - as->invmcp = NULL; - } - *p = 0; /* Prevent load/store merging. */ -} - -/* -- Trace setup --------------------------------------------------------- */ - -/* Ensure there are enough stack slots for call arguments. */ -static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) -{ - IRRef args[CCI_NARGS_MAX*2]; - uint32_t i, nargs = CCI_XNARGS(ci); - int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; - asm_collectargs(as, ir, ci, args); - for (i = 0; i < nargs; i++) { - if (args[i] && irt_isfp(IR(args[i])->t)) { - if (nfpr > 0) nfpr--; else nslots += 2; - } else { - if (ngpr > 0) ngpr--; else nslots += 2; - } - } - if (nslots > as->evenspill) /* Leave room for args in stack slots. */ - as->evenspill = nslots; - return REGSP_HINT(RID_RET); -} - -static void asm_setup_target(ASMState *as) -{ - /* May need extra exit for asm_stack_check on side traces. */ - asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0)); -} - -/* -- Trace patching ------------------------------------------------------ */ - -/* Patch exit jumps of existing machine code to a new target. */ -void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) -{ - MCode *p = T->mcode; - MCode *pe = (MCode *)((char *)p + T->szmcode); - MCode *cstart = NULL, *cend = p; - MCode *mcarea = lj_mcode_patch(J, p, 0); - MCode *px = exitstub_trace_addr(T, exitno); - for (; p < pe; p++) { - /* Look for exitstub branch, replace with branch to target. */ - uint32_t ins = *p; - if ((ins & 0xff000000u) == 0x54000000u && - ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) { - /* Patch bcc exitstub. */ - *p = (ins & 0xff00001fu) | (((target-p)<<5) & 0x00ffffe0u); - cend = p+1; - if (!cstart) cstart = p; - } else if ((ins & 0xfc000000u) == 0x14000000u && - ((ins ^ (px-p)) & 0x03ffffffu) == 0) { - /* Patch b exitstub. */ - *p = (ins & 0xfc000000u) | ((target-p) & 0x03ffffffu); - cend = p+1; - if (!cstart) cstart = p; - } else if ((ins & 0x7e000000u) == 0x34000000u && - ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) { - /* Patch cbz/cbnz exitstub. */ - *p = (ins & 0xff00001f) | (((target-p)<<5) & 0x00ffffe0u); - cend = p+1; - if (!cstart) cstart = p; - } else if ((ins & 0x7e000000u) == 0x36000000u && - ((ins ^ ((px-p)<<5)) & 0x0007ffe0u) == 0) { - /* Patch tbz/tbnz exitstub. */ - *p = (ins & 0xfff8001fu) | (((target-p)<<5) & 0x0007ffe0u); - cend = p+1; - if (!cstart) cstart = p; - } - } - lua_assert(cstart != NULL); - lj_mcode_sync(cstart, cend); - lj_mcode_patch(J, mcarea, 1); -} - diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h deleted file mode 100644 index affe7d8996..0000000000 --- a/src/lj_asm_mips.h +++ /dev/null @@ -1,2505 +0,0 @@ -/* -** MIPS IR assembler (SSA IR -> machine code). -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -/* -- Register allocator extensions --------------------------------------- */ - -/* Allocate a register with a hint. */ -static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) -{ - Reg r = IR(ref)->r; - if (ra_noreg(r)) { - if (!ra_hashint(r) && !iscrossref(as, ref)) - ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ - r = ra_allocref(as, ref, allow); - } - ra_noweak(as, r); - return r; -} - -/* Allocate a register or RID_ZERO. */ -static Reg ra_alloc1z(ASMState *as, IRRef ref, RegSet allow) -{ - Reg r = IR(ref)->r; - if (ra_noreg(r)) { - if (!(allow & RSET_FPR) && irref_isk(ref) && get_kval(IR(ref)) == 0) - return RID_ZERO; - r = ra_allocref(as, ref, allow); - } else { - ra_noweak(as, r); - } - return r; -} - -/* Allocate two source registers for three-operand instructions. */ -static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) -{ - IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); - Reg left = irl->r, right = irr->r; - if (ra_hasreg(left)) { - ra_noweak(as, left); - if (ra_noreg(right)) - right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left)); - else - ra_noweak(as, right); - } else if (ra_hasreg(right)) { - ra_noweak(as, right); - left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right)); - } else if (ra_hashint(right)) { - right = ra_alloc1z(as, ir->op2, allow); - left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right)); - } else { - left = ra_alloc1z(as, ir->op1, allow); - right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left)); - } - return left | (right << 8); -} - -/* -- Guard handling ------------------------------------------------------ */ - -/* Need some spare long-range jump slots, for out-of-range branches. */ -#define MIPS_SPAREJUMP 4 - -/* Setup spare long-range jump slots per mcarea. */ -static void asm_sparejump_setup(ASMState *as) -{ - MCode *mxp = as->mcbot; - /* Assumes sizeof(MCLink) == 8. */ - if (((uintptr_t)mxp & (LJ_PAGESIZE-1)) == 8) { - lua_assert(MIPSI_NOP == 0); - memset(mxp+2, 0, MIPS_SPAREJUMP*8); - mxp += MIPS_SPAREJUMP*2; - lua_assert(mxp < as->mctop); - lj_mcode_sync(as->mcbot, mxp); - lj_mcode_commitbot(as->J, mxp); - as->mcbot = mxp; - as->mclim = as->mcbot + MCLIM_REDZONE; - } -} - -/* Setup exit stub after the end of each trace. */ -static void asm_exitstub_setup(ASMState *as) -{ - MCode *mxp = as->mctop; - /* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */ - *--mxp = MIPSI_LI|MIPSF_T(RID_TMP)|as->T->traceno; - *--mxp = MIPSI_J|((((uintptr_t)(void *)lj_vm_exit_handler)>>2)&0x03ffffffu); - lua_assert(((uintptr_t)mxp ^ (uintptr_t)(void *)lj_vm_exit_handler)>>28 == 0); - *--mxp = MIPSI_SW|MIPSF_T(RID_TMP)|MIPSF_S(RID_SP)|0; - as->mctop = mxp; -} - -/* Keep this in-sync with exitstub_trace_addr(). */ -#define asm_exitstub_addr(as) ((as)->mctop) - -/* Emit conditional branch to exit for guard. */ -static void asm_guard(ASMState *as, MIPSIns mi, Reg rs, Reg rt) -{ - MCode *target = asm_exitstub_addr(as); - MCode *p = as->mcp; - if (LJ_UNLIKELY(p == as->invmcp)) { - as->invmcp = NULL; - as->loopinv = 1; - as->mcp = p+1; - mi = mi ^ ((mi>>28) == 1 ? 0x04000000u : 0x00010000u); /* Invert cond. */ - target = p; /* Patch target later in asm_loop_fixup. */ - } - emit_ti(as, MIPSI_LI, RID_TMP, as->snapno); - emit_branch(as, mi, rs, rt, target); -} - -/* -- Operand fusion ------------------------------------------------------ */ - -/* Limit linear search to this distance. Avoids O(n^2) behavior. */ -#define CONFLICT_SEARCH_LIM 31 - -/* Check if there's no conflicting instruction between curins and ref. */ -static int noconflict(ASMState *as, IRRef ref, IROp conflict) -{ - IRIns *ir = as->ir; - IRRef i = as->curins; - if (i > ref + CONFLICT_SEARCH_LIM) - return 0; /* Give up, ref is too far away. */ - while (--i > ref) - if (ir[i].o == conflict) - return 0; /* Conflict found. */ - return 1; /* Ok, no conflict. */ -} - -/* Fuse the array base of colocated arrays. */ -static int32_t asm_fuseabase(ASMState *as, IRRef ref) -{ - IRIns *ir = IR(ref); - if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE && - !neverfuse(as) && noconflict(as, ref, IR_NEWREF)) - return (int32_t)sizeof(GCtab); - return 0; -} - -/* Fuse array/hash/upvalue reference into register+offset operand. */ -static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) -{ - IRIns *ir = IR(ref); - if (ra_noreg(ir->r)) { - if (ir->o == IR_AREF) { - if (mayfuse(as, ref)) { - if (irref_isk(ir->op2)) { - IRRef tab = IR(ir->op1)->op1; - int32_t ofs = asm_fuseabase(as, tab); - IRRef refa = ofs ? tab : ir->op1; - ofs += 8*IR(ir->op2)->i; - if (checki16(ofs)) { - *ofsp = ofs; - return ra_alloc1(as, refa, allow); - } - } - } - } else if (ir->o == IR_HREFK) { - if (mayfuse(as, ref)) { - int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); - if (checki16(ofs)) { - *ofsp = ofs; - return ra_alloc1(as, ir->op1, allow); - } - } - } else if (ir->o == IR_UREFC) { - if (irref_isk(ir->op1)) { - GCfunc *fn = ir_kfunc(IR(ir->op1)); - intptr_t ofs = (intptr_t)&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv; - intptr_t jgl = (intptr_t)J2G(as->J); - if ((uintptr_t)(ofs-jgl) < 65536) { - *ofsp = ofs-jgl-32768; - return RID_JGL; - } else { - *ofsp = (int16_t)ofs; - return ra_allock(as, ofs-(int16_t)ofs, allow); - } - } - } - } - *ofsp = 0; - return ra_alloc1(as, ref, allow); -} - -/* Fuse XLOAD/XSTORE reference into load/store operand. */ -static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref, - RegSet allow, int32_t ofs) -{ - IRIns *ir = IR(ref); - Reg base; - if (ra_noreg(ir->r) && canfuse(as, ir)) { - if (ir->o == IR_ADD) { - intptr_t ofs2; - if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(IR(ir->op2)), - checki16(ofs2))) { - ref = ir->op1; - ofs = (int32_t)ofs2; - } - } else if (ir->o == IR_STRREF) { - intptr_t ofs2 = 65536; - lua_assert(ofs == 0); - ofs = (int32_t)sizeof(GCstr); - if (irref_isk(ir->op2)) { - ofs2 = ofs + get_kval(IR(ir->op2)); - ref = ir->op1; - } else if (irref_isk(ir->op1)) { - ofs2 = ofs + get_kval(IR(ir->op1)); - ref = ir->op2; - } - if (!checki16(ofs2)) { - /* NYI: Fuse ADD with constant. */ - Reg right, left = ra_alloc2(as, ir, allow); - right = (left >> 8); left &= 255; - emit_hsi(as, mi, rt, RID_TMP, ofs); - emit_dst(as, MIPSI_AADDU, RID_TMP, left, right); - return; - } - ofs = ofs2; - } - } - base = ra_alloc1(as, ref, allow); - emit_hsi(as, mi, rt, base, ofs); -} - -/* -- Calls --------------------------------------------------------------- */ - -/* Generate a call to a C function. */ -static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) -{ - uint32_t n, nargs = CCI_XNARGS(ci); - int32_t ofs = LJ_32 ? 16 : 0; -#if LJ_SOFTFP - Reg gpr = REGARG_FIRSTGPR; -#else - Reg gpr, fpr = REGARG_FIRSTFPR; -#endif - if ((void *)ci->func) - emit_call(as, (void *)ci->func, 1); -#if !LJ_SOFTFP - for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) - as->cost[gpr] = REGCOST(~0u, ASMREF_L); - gpr = REGARG_FIRSTGPR; -#endif - for (n = 0; n < nargs; n++) { /* Setup args. */ - IRRef ref = args[n]; - if (ref) { - IRIns *ir = IR(ref); -#if !LJ_SOFTFP - if (irt_isfp(ir->t) && fpr <= REGARG_LASTFPR && - !(ci->flags & CCI_VARARG)) { - lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ - ra_leftov(as, fpr, ref); - fpr += LJ_32 ? 2 : 1; - gpr += (LJ_32 && irt_isnum(ir->t)) ? 2 : 1; - } else -#endif - { -#if LJ_32 && !LJ_SOFTFP - fpr = REGARG_LASTFPR+1; -#endif - if (LJ_32 && irt_isnum(ir->t)) gpr = (gpr+1) & ~1; - if (gpr <= REGARG_LASTGPR) { - lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ -#if !LJ_SOFTFP - if (irt_isfp(ir->t)) { - RegSet of = as->freeset; - Reg r; - /* Workaround to protect argument GPRs from being used for remat. */ - as->freeset &= ~RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1); - r = ra_alloc1(as, ref, RSET_FPR); - as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); - if (irt_isnum(ir->t)) { -#if LJ_32 - emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?0:1), r+1); - emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?1:0), r); - lua_assert(rset_test(as->freeset, gpr+1)); /* Already evicted. */ - gpr += 2; -#else - emit_tg(as, MIPSI_DMFC1, gpr, r); - gpr++; fpr++; -#endif - } else if (irt_isfloat(ir->t)) { - emit_tg(as, MIPSI_MFC1, gpr, r); - gpr++; -#if LJ_64 - fpr++; -#endif - } - } else -#endif - { - ra_leftov(as, gpr, ref); - gpr++; -#if LJ_64 - fpr++; -#endif - } - } else { - Reg r = ra_alloc1z(as, ref, !LJ_SOFTFP && irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); -#if LJ_32 - if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4; - emit_spstore(as, ir, r, ofs); - ofs += irt_isnum(ir->t) ? 8 : 4; -#else - emit_spstore(as, ir, r, ofs + ((LJ_BE && (LJ_SOFTFP || r < RID_MAX_GPR) && !irt_is64(ir->t)) ? 4 : 0)); - ofs += 8; -#endif - } - } - } else { -#if !LJ_SOFTFP - fpr = REGARG_LASTFPR+1; -#endif - if (gpr <= REGARG_LASTGPR) { - gpr++; -#if LJ_64 - fpr++; -#endif - } else { - ofs += LJ_32 ? 4 : 8; - } - } - checkmclim(as); - } -} - -/* Setup result reg/sp for call. Evict scratch regs. */ -static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) -{ - RegSet drop = RSET_SCRATCH; -#if LJ_32 - int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); -#endif -#if !LJ_SOFTFP - if ((ci->flags & CCI_NOFPRCLOBBER)) - drop &= ~RSET_FPR; -#endif - if (ra_hasreg(ir->r)) - rset_clear(drop, ir->r); /* Dest reg handled below. */ -#if LJ_32 - if (hiop && ra_hasreg((ir+1)->r)) - rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ -#endif - ra_evictset(as, drop); /* Evictions must be performed first. */ - if (ra_used(ir)) { - lua_assert(!irt_ispri(ir->t)); - if (!LJ_SOFTFP && irt_isfp(ir->t)) { - if ((ci->flags & CCI_CASTU64)) { - int32_t ofs = sps_scale(ir->s); - Reg dest = ir->r; - if (ra_hasreg(dest)) { - ra_free(as, dest); - ra_modified(as, dest); -#if LJ_32 - emit_tg(as, MIPSI_MTC1, RID_RETHI, dest+1); - emit_tg(as, MIPSI_MTC1, RID_RETLO, dest); -#else - emit_tg(as, MIPSI_DMTC1, RID_RET, dest); -#endif - } - if (ofs) { -#if LJ_32 - emit_tsi(as, MIPSI_SW, RID_RETLO, RID_SP, ofs+(LJ_BE?4:0)); - emit_tsi(as, MIPSI_SW, RID_RETHI, RID_SP, ofs+(LJ_BE?0:4)); -#else - emit_tsi(as, MIPSI_SD, RID_RET, RID_SP, ofs); -#endif - } - } else { - ra_destreg(as, ir, RID_FPRET); - } -#if LJ_32 - } else if (hiop) { - ra_destpair(as, ir); -#endif - } else { - ra_destreg(as, ir, RID_RET); - } - } -} - -static void asm_callx(ASMState *as, IRIns *ir) -{ - IRRef args[CCI_NARGS_MAX*2]; - CCallInfo ci; - IRRef func; - IRIns *irf; - ci.flags = asm_callx_flags(as, ir); - asm_collectargs(as, ir, &ci, args); - asm_setupresult(as, ir, &ci); - func = ir->op2; irf = IR(func); - if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } - if (irref_isk(func)) { /* Call to constant address. */ - ci.func = (ASMFunction)(void *)get_kval(irf); - } else { /* Need specific register for indirect calls. */ - Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR)); - MCode *p = as->mcp; - if (r == RID_CFUNCADDR) - *--p = MIPSI_NOP; - else - *--p = MIPSI_MOVE | MIPSF_D(RID_CFUNCADDR) | MIPSF_S(r); - *--p = MIPSI_JALR | MIPSF_S(r); - as->mcp = p; - ci.func = (ASMFunction)(void *)0; - } - asm_gencall(as, &ci, args); -} - -#if !LJ_SOFTFP -static void asm_callround(ASMState *as, IRIns *ir, IRCallID id) -{ - /* The modified regs must match with the *.dasc implementation. */ - RegSet drop = RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_FPRET)| - RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR); - if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); - ra_evictset(as, drop); - ra_destreg(as, ir, RID_FPRET); - emit_call(as, (void *)lj_ir_callinfo[id].func, 0); - ra_leftov(as, REGARG_FIRSTFPR, ir->op1); -} -#endif - -/* -- Returns ------------------------------------------------------------- */ - -/* Return to lower frame. Guard that it goes to the right spot. */ -static void asm_retf(ASMState *as, IRIns *ir) -{ - Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); - void *pc = ir_kptr(IR(ir->op2)); - int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); - as->topslot -= (BCReg)delta; - if ((int32_t)as->topslot < 0) as->topslot = 0; - irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ - emit_setgl(as, base, jit_base); - emit_addptr(as, base, -8*delta); - asm_guard(as, MIPSI_BNE, RID_TMP, - ra_allock(as, igcptr(pc), rset_exclude(RSET_GPR, base))); - emit_tsi(as, MIPSI_AL, RID_TMP, base, -8); -} - -/* -- Type conversions ---------------------------------------------------- */ - -#if !LJ_SOFTFP -static void asm_tointg(ASMState *as, IRIns *ir, Reg left) -{ - Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); - Reg dest = ra_dest(as, ir, RSET_GPR); - asm_guard(as, MIPSI_BC1F, 0, 0); - emit_fgh(as, MIPSI_C_EQ_D, 0, tmp, left); - emit_fg(as, MIPSI_CVT_D_W, tmp, tmp); - emit_tg(as, MIPSI_MFC1, dest, tmp); - emit_fg(as, MIPSI_CVT_W_D, tmp, left); -} - -static void asm_tobit(ASMState *as, IRIns *ir) -{ - RegSet allow = RSET_FPR; - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, allow); - Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); - Reg tmp = ra_scratch(as, rset_clear(allow, right)); - emit_tg(as, MIPSI_MFC1, dest, tmp); - emit_fgh(as, MIPSI_ADD_D, tmp, left, right); -} -#endif - -static void asm_conv(ASMState *as, IRIns *ir) -{ - IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); -#if !LJ_SOFTFP - int stfp = (st == IRT_NUM || st == IRT_FLOAT); -#endif -#if LJ_64 - int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64); -#endif - IRRef lref = ir->op1; -#if LJ_32 - lua_assert(!(irt_isint64(ir->t) || - (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ -#endif -#if LJ_32 && LJ_SOFTFP - /* FP conversions are handled by SPLIT. */ - lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); - /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ -#else - lua_assert(irt_type(ir->t) != st); - if (irt_isfp(ir->t)) { - Reg dest = ra_dest(as, ir, RSET_FPR); - if (stfp) { /* FP to FP conversion. */ - emit_fg(as, st == IRT_NUM ? MIPSI_CVT_S_D : MIPSI_CVT_D_S, - dest, ra_alloc1(as, lref, RSET_FPR)); - } else if (st == IRT_U32) { /* U32 to FP conversion. */ - /* y = (x ^ 0x8000000) + 2147483648.0 */ - Reg left = ra_alloc1(as, lref, RSET_GPR); - Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, dest)); - if (irt_isfloat(ir->t)) - emit_fg(as, MIPSI_CVT_S_D, dest, dest); - /* Must perform arithmetic with doubles to keep the precision. */ - emit_fgh(as, MIPSI_ADD_D, dest, dest, tmp); - emit_fg(as, MIPSI_CVT_D_W, dest, dest); - emit_lsptr(as, MIPSI_LDC1, (tmp & 31), - (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR); - emit_tg(as, MIPSI_MTC1, RID_TMP, dest); - emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, left); - emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); -#if LJ_64 - } else if(st == IRT_U64) { /* U64 to FP conversion. */ - /* if (x >= 1u<<63) y = (double)(int64_t)(x&(1u<<63)-1) + pow(2.0, 63) */ - Reg left = ra_alloc1(as, lref, RSET_GPR); - Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, dest)); - MCLabel l_end = emit_label(as); - if (irt_isfloat(ir->t)) { - emit_fgh(as, MIPSI_ADD_S, dest, dest, tmp); - emit_lsptr(as, MIPSI_LWC1, (tmp & 31), (void *)&as->J->k32[LJ_K32_2P63], - rset_exclude(RSET_GPR, left)); - emit_fg(as, MIPSI_CVT_S_L, dest, dest); - } else { - emit_fgh(as, MIPSI_ADD_D, dest, dest, tmp); - emit_lsptr(as, MIPSI_LDC1, (tmp & 31), (void *)&as->J->k64[LJ_K64_2P63], - rset_exclude(RSET_GPR, left)); - emit_fg(as, MIPSI_CVT_D_L, dest, dest); - } - emit_branch(as, MIPSI_BGEZ, left, RID_ZERO, l_end); - emit_tg(as, MIPSI_DMTC1, RID_TMP, dest); - emit_tsml(as, MIPSI_DEXTM, RID_TMP, left, 30, 0); -#endif - } else { /* Integer to FP conversion. */ - Reg left = ra_alloc1(as, lref, RSET_GPR); -#if LJ_32 - emit_fg(as, irt_isfloat(ir->t) ? MIPSI_CVT_S_W : MIPSI_CVT_D_W, - dest, dest); - emit_tg(as, MIPSI_MTC1, left, dest); -#else - MIPSIns mi = irt_isfloat(ir->t) ? - (st64 ? MIPSI_CVT_S_L : MIPSI_CVT_S_W) : - (st64 ? MIPSI_CVT_D_L : MIPSI_CVT_D_W); - emit_fg(as, mi, dest, dest); - emit_tg(as, st64 ? MIPSI_DMTC1 : MIPSI_MTC1, left, dest); -#endif - } - } else if (stfp) { /* FP to integer conversion. */ - if (irt_isguard(ir->t)) { - /* Checked conversions are only supported from number to int. */ - lua_assert(irt_isint(ir->t) && st == IRT_NUM); - asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); - } else { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, lref, RSET_FPR); - Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); - if (irt_isu32(ir->t)) { /* FP to U32 conversion. */ - /* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */ - emit_dst(as, MIPSI_XOR, dest, dest, RID_TMP); - emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); - emit_tg(as, MIPSI_MFC1, dest, tmp); - emit_fg(as, st == IRT_FLOAT ? MIPSI_FLOOR_W_S : MIPSI_FLOOR_W_D, - tmp, tmp); - emit_fgh(as, st == IRT_FLOAT ? MIPSI_SUB_S : MIPSI_SUB_D, - tmp, left, tmp); - if (st == IRT_FLOAT) - emit_lsptr(as, MIPSI_LWC1, (tmp & 31), - (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR); - else - emit_lsptr(as, MIPSI_LDC1, (tmp & 31), - (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR); -#if LJ_64 - } else if (irt_isu64(ir->t)) { /* FP to U64 conversion. */ - MCLabel l_end; - emit_tg(as, MIPSI_DMFC1, dest, tmp); - l_end = emit_label(as); - /* For inputs >= 2^63 add -2^64 and convert again. */ - if (st == IRT_NUM) { - emit_fg(as, MIPSI_TRUNC_L_D, tmp, tmp); - emit_fgh(as, MIPSI_ADD_D, tmp, left, tmp); - emit_lsptr(as, MIPSI_LDC1, (tmp & 31), - (void *)&as->J->k64[LJ_K64_M2P64], - rset_exclude(RSET_GPR, dest)); - emit_fg(as, MIPSI_TRUNC_L_D, tmp, left); /* Delay slot. */ - emit_branch(as, MIPSI_BC1T, 0, 0, l_end); - emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp); - emit_lsptr(as, MIPSI_LDC1, (tmp & 31), - (void *)&as->J->k64[LJ_K64_2P63], - rset_exclude(RSET_GPR, dest)); - } else { - emit_fg(as, MIPSI_TRUNC_L_S, tmp, tmp); - emit_fgh(as, MIPSI_ADD_S, tmp, left, tmp); - emit_lsptr(as, MIPSI_LWC1, (tmp & 31), - (void *)&as->J->k32[LJ_K32_M2P64], - rset_exclude(RSET_GPR, dest)); - emit_fg(as, MIPSI_TRUNC_L_S, tmp, left); /* Delay slot. */ - emit_branch(as, MIPSI_BC1T, 0, 0, l_end); - emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp); - emit_lsptr(as, MIPSI_LWC1, (tmp & 31), - (void *)&as->J->k32[LJ_K32_2P63], - rset_exclude(RSET_GPR, dest)); - } -#endif - } else { -#if LJ_32 - emit_tg(as, MIPSI_MFC1, dest, tmp); - emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D, - tmp, left); -#else - MIPSIns mi = irt_is64(ir->t) ? - (st == IRT_NUM ? MIPSI_TRUNC_L_D : MIPSI_TRUNC_L_S) : - (st == IRT_NUM ? MIPSI_TRUNC_W_D : MIPSI_TRUNC_W_S); - emit_tg(as, irt_is64(ir->t) ? MIPSI_DMFC1 : MIPSI_MFC1, dest, left); - emit_fg(as, mi, left, left); -#endif - } - } - } else -#endif - { - Reg dest = ra_dest(as, ir, RSET_GPR); - if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); - if ((ir->op2 & IRCONV_SEXT)) { - if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) { - emit_dst(as, st == IRT_I8 ? MIPSI_SEB : MIPSI_SEH, dest, 0, left); - } else { - uint32_t shift = st == IRT_I8 ? 24 : 16; - emit_dta(as, MIPSI_SRA, dest, dest, shift); - emit_dta(as, MIPSI_SLL, dest, left, shift); - } - } else { - emit_tsi(as, MIPSI_ANDI, dest, left, - (int32_t)(st == IRT_U8 ? 0xff : 0xffff)); - } - } else { /* 32/64 bit integer conversions. */ -#if LJ_32 - /* Only need to handle 32/32 bit no-op (cast) on 32 bit archs. */ - ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ -#else - if (irt_is64(ir->t)) { - if (st64) { - /* 64/64 bit no-op (cast)*/ - ra_leftov(as, dest, lref); - } else { - Reg left = ra_alloc1(as, lref, RSET_GPR); - if ((ir->op2 & IRCONV_SEXT)) { /* 32 to 64 bit sign extension. */ - emit_dta(as, MIPSI_SLL, dest, left, 0); - } else { /* 32 to 64 bit zero extension. */ - emit_tsml(as, MIPSI_DEXT, dest, left, 31, 0); - } - } - } else { - if (st64) { - /* This is either a 32 bit reg/reg mov which zeroes the hiword - ** or a load of the loword from a 64 bit address. - */ - Reg left = ra_alloc1(as, lref, RSET_GPR); - emit_tsml(as, MIPSI_DEXT, dest, left, 31, 0); - } else { /* 32/32 bit no-op (cast). */ - /* Do nothing, but may need to move regs. */ - ra_leftov(as, dest, lref); - } - } -#endif - } - } -} - -static void asm_strto(ASMState *as, IRIns *ir) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; - IRRef args[2]; - int32_t ofs = 0; -#if LJ_SOFTFP - ra_evictset(as, RSET_SCRATCH); - if (ra_used(ir)) { - if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) && - (ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) { - int i; - for (i = 0; i < 2; i++) { - Reg r = (ir+i)->r; - if (ra_hasreg(r)) { - ra_free(as, r); - ra_modified(as, r); - emit_spload(as, ir+i, r, sps_scale((ir+i)->s)); - } - } - ofs = sps_scale(ir->s & ~1); - } else { - Reg rhi = ra_dest(as, ir+1, RSET_GPR); - Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi)); - emit_tsi(as, MIPSI_LW, rhi, RID_SP, ofs+(LJ_BE?0:4)); - emit_tsi(as, MIPSI_LW, rlo, RID_SP, ofs+(LJ_BE?4:0)); - } - } -#else - RegSet drop = RSET_SCRATCH; - if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ - ra_evictset(as, drop); - ofs = sps_scale(ir->s); -#endif - asm_guard(as, MIPSI_BEQ, RID_RET, RID_ZERO); /* Test return status. */ - args[0] = ir->op1; /* GCstr *str */ - args[1] = ASMREF_TMP1; /* TValue *n */ - asm_gencall(as, ci, args); - /* Store the result to the spill slot or temp slots. */ - emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), - RID_SP, ofs); -} - -/* -- Memory references --------------------------------------------------- */ - -#if LJ_64 -/* Store tagged value for ref at base+ofs. */ -static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) -{ - RegSet allow = rset_exclude(RSET_GPR, base); - IRIns *ir = IR(ref); - lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); - if (irref_isk(ref)) { - TValue k; - lj_ir_kvalue(as->J->L, &k, ir); - emit_tsi(as, MIPSI_SD, ra_allock(as, (int64_t)k.u64, allow), base, ofs); - } else { - Reg src = ra_alloc1(as, ref, allow); - Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, - rset_exclude(allow, src)); - emit_tsi(as, MIPSI_SD, RID_TMP, base, ofs); - if (irt_isinteger(ir->t)) { - emit_dst(as, MIPSI_DADDU, RID_TMP, RID_TMP, type); - emit_tsml(as, MIPSI_DEXT, RID_TMP, src, 31, 0); - } else { - emit_dst(as, MIPSI_DADDU, RID_TMP, src, type); - } - } -} -#endif - -/* Get pointer to TValue. */ -static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) -{ - IRIns *ir = IR(ref); - if (irt_isnum(ir->t)) { - if (irref_isk(ref)) /* Use the number constant itself as a TValue. */ - ra_allockreg(as, igcptr(ir_knum(ir)), dest); - else /* Otherwise force a spill and use the spill slot. */ - emit_tsi(as, MIPSI_AADDIU, dest, RID_SP, ra_spill(as, ir)); - } else { - /* Otherwise use g->tmptv to hold the TValue. */ -#if LJ_32 - RegSet allow = rset_exclude(RSET_GPR, dest); - Reg type; - emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, (int32_t)(offsetof(global_State, tmptv)-32768)); - if (!irt_ispri(ir->t)) { - Reg src = ra_alloc1(as, ref, allow); - emit_setgl(as, src, tmptv.gcr); - } - if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) - type = ra_alloc1(as, ref+1, allow); - else - type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); - emit_setgl(as, type, tmptv.it); -#else - asm_tvstore64(as, dest, 0, ref); - emit_tsi(as, MIPSI_DADDIU, dest, RID_JGL, - (int32_t)(offsetof(global_State, tmptv)-32768)); -#endif - } -} - -static void asm_aref(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg idx, base; - if (irref_isk(ir->op2)) { - IRRef tab = IR(ir->op1)->op1; - int32_t ofs = asm_fuseabase(as, tab); - IRRef refa = ofs ? tab : ir->op1; - ofs += 8*IR(ir->op2)->i; - if (checki16(ofs)) { - base = ra_alloc1(as, refa, RSET_GPR); - emit_tsi(as, MIPSI_AADDIU, dest, base, ofs); - return; - } - } - base = ra_alloc1(as, ir->op1, RSET_GPR); - idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); - emit_dst(as, MIPSI_AADDU, dest, RID_TMP, base); - emit_dta(as, MIPSI_SLL, RID_TMP, idx, 3); -} - -/* Inlined hash lookup. Specialized for key type and for const keys. -** The equivalent C code is: -** Node *n = hashkey(t, key); -** do { -** if (lj_obj_equal(&n->key, key)) return &n->val; -** } while ((n = nextnode(n))); -** return niltv(L); -*/ -static void asm_href(ASMState *as, IRIns *ir, IROp merge) -{ - RegSet allow = RSET_GPR; - int destused = ra_used(ir); - Reg dest = ra_dest(as, ir, allow); - Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); - Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1 = RID_TMP, tmp2; - IRRef refkey = ir->op2; - IRIns *irkey = IR(refkey); - int isk = irref_isk(refkey); - IRType1 kt = irkey->t; - uint32_t khash; - MCLabel l_end, l_loop, l_next; - - rset_clear(allow, tab); -#if LJ_32 && LJ_SOFTFP - if (!isk) { - key = ra_alloc1(as, refkey, allow); - rset_clear(allow, key); - if (irkey[1].o == IR_HIOP) { - if (ra_hasreg((irkey+1)->r)) { - type = tmpnum = (irkey+1)->r; - tmp1 = ra_scratch(as, allow); - rset_clear(allow, tmp1); - ra_noweak(as, tmpnum); - } else { - type = tmpnum = ra_allocref(as, refkey+1, allow); - } - rset_clear(allow, tmpnum); - } else { - type = ra_allock(as, (int32_t)irt_toitype(irkey->t), allow); - rset_clear(allow, type); - } - } -#else - if (irt_isnum(kt)) { - key = ra_alloc1(as, refkey, RSET_FPR); - tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); - } else if (!irt_ispri(kt)) { - key = ra_alloc1(as, refkey, allow); - rset_clear(allow, key); -#if LJ_32 - type = ra_allock(as, (int32_t)irt_toitype(irkey->t), allow); - rset_clear(allow, type); -#endif - } -#endif - tmp2 = ra_scratch(as, allow); - rset_clear(allow, tmp2); - - /* Key not found in chain: jump to exit (if merged) or load niltv. */ - l_end = emit_label(as); - as->invmcp = NULL; - if (merge == IR_NE) - asm_guard(as, MIPSI_B, RID_ZERO, RID_ZERO); - else if (destused) - emit_loada(as, dest, niltvg(J2G(as->J))); - /* Follow hash chain until the end. */ - emit_move(as, dest, tmp1); - l_loop = --as->mcp; - emit_tsi(as, MIPSI_AL, tmp1, dest, (int32_t)offsetof(Node, next)); - l_next = emit_label(as); - - /* Type and value comparison. */ - if (merge == IR_EQ) { /* Must match asm_guard(). */ - emit_ti(as, MIPSI_LI, RID_TMP, as->snapno); - l_end = asm_exitstub_addr(as); - } - if (!LJ_SOFTFP && irt_isnum(kt)) { - emit_branch(as, MIPSI_BC1T, 0, 0, l_end); - emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key); - *--as->mcp = MIPSI_NOP; /* Avoid NaN comparison overhead. */ - emit_branch(as, MIPSI_BEQ, tmp1, RID_ZERO, l_next); - emit_tsi(as, MIPSI_SLTIU, tmp1, tmp1, (int32_t)LJ_TISNUM); -#if LJ_32 - emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n)); - } else { - if (irt_ispri(kt)) { - emit_branch(as, MIPSI_BEQ, tmp1, type, l_end); - } else { - emit_branch(as, MIPSI_BEQ, tmp2, key, l_end); - emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, key.gcr)); - emit_branch(as, MIPSI_BNE, tmp1, type, l_next); - } - } - emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.it)); - *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu); -#else - emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 15); - emit_tg(as, MIPSI_DMTC1, tmp1, tmpnum); - emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); - } else if (irt_isaddr(kt)) { - Reg refk = tmp2; - if (isk) { - int64_t k = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; - refk = ra_allock(as, k, allow); - rset_clear(allow, refk); - } - emit_branch(as, MIPSI_BEQ, tmp1, refk, l_end); - emit_tsi(as, MIPSI_LD, tmp1, dest, offsetof(Node, key)); - } else { - Reg pri = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); - rset_clear(allow, pri); - lua_assert(irt_ispri(kt) && !irt_isnil(kt)); - emit_branch(as, MIPSI_BEQ, tmp1, pri, l_end); - emit_tsi(as, MIPSI_LD, tmp1, dest, offsetof(Node, key)); - } - *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu); - if (!isk && irt_isaddr(kt)) { - type = ra_allock(as, (int64_t)irt_toitype(kt) << 47, allow); - emit_dst(as, MIPSI_DADDU, tmp2, key, type); - rset_clear(allow, type); - } -#endif - - /* Load main position relative to tab->node into dest. */ - khash = isk ? ir_khash(irkey) : 1; - if (khash == 0) { - emit_tsi(as, MIPSI_AL, dest, tab, (int32_t)offsetof(GCtab, node)); - } else { - Reg tmphash = tmp1; - if (isk) - tmphash = ra_allock(as, khash, allow); - emit_dst(as, MIPSI_AADDU, dest, dest, tmp1); - lua_assert(sizeof(Node) == 24); - emit_dst(as, MIPSI_SUBU, tmp1, tmp2, tmp1); - emit_dta(as, MIPSI_SLL, tmp1, tmp1, 3); - emit_dta(as, MIPSI_SLL, tmp2, tmp1, 5); - emit_dst(as, MIPSI_AND, tmp1, tmp2, tmphash); - emit_tsi(as, MIPSI_AL, dest, tab, (int32_t)offsetof(GCtab, node)); - emit_tsi(as, MIPSI_LW, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); - if (isk) { - /* Nothing to do. */ - } else if (irt_isstr(kt)) { - emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, hash)); - } else { /* Must match with hash*() in lj_tab.c. */ - emit_dst(as, MIPSI_SUBU, tmp1, tmp1, tmp2); - emit_rotr(as, tmp2, tmp2, dest, (-HASH_ROT3)&31); - emit_dst(as, MIPSI_XOR, tmp1, tmp1, tmp2); - emit_rotr(as, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&31); - emit_dst(as, MIPSI_SUBU, tmp2, tmp2, dest); -#if LJ_32 - if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) { - emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1); - if ((as->flags & JIT_F_MIPSXXR2)) { - emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31); - } else { - emit_dst(as, MIPSI_OR, dest, dest, tmp1); - emit_dta(as, MIPSI_SLL, tmp1, tmp1, HASH_ROT1); - emit_dta(as, MIPSI_SRL, dest, tmp1, (-HASH_ROT1)&31); - } - emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1); -#if LJ_SOFTFP - emit_ds(as, MIPSI_MOVE, tmp1, type); - emit_ds(as, MIPSI_MOVE, tmp2, key); -#else - emit_tg(as, MIPSI_MFC1, tmp2, key); - emit_tg(as, MIPSI_MFC1, tmp1, key+1); -#endif - } else { - emit_dst(as, MIPSI_XOR, tmp2, key, tmp1); - emit_rotr(as, dest, tmp1, tmp2, (-HASH_ROT1)&31); - emit_dst(as, MIPSI_ADDU, tmp1, key, ra_allock(as, HASH_BIAS, allow)); - } -#else - emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1); - emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31); - if (irt_isnum(kt)) { - emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1); - emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 0); - emit_dta(as, MIPSI_SLL, tmp2, LJ_SOFTFP ? key : tmp1, 0); -#if !LJ_SOFTFP - emit_tg(as, MIPSI_DMFC1, tmp1, key); -#endif - } else { - checkmclim(as); - emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 0); - emit_dta(as, MIPSI_SLL, tmp2, key, 0); - emit_dst(as, MIPSI_DADDU, tmp1, key, type); - } -#endif - } - } -} - -static void asm_hrefk(ASMState *as, IRIns *ir) -{ - IRIns *kslot = IR(ir->op2); - IRIns *irkey = IR(kslot->op1); - int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); - int32_t kofs = ofs + (int32_t)offsetof(Node, key); - Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; - Reg node = ra_alloc1(as, ir->op1, RSET_GPR); - RegSet allow = rset_exclude(RSET_GPR, node); - Reg idx = node; -#if LJ_32 - Reg key = RID_NONE, type = RID_TMP; - int32_t lo, hi; -#else - Reg key = ra_scratch(as, allow); - int64_t k; -#endif - lua_assert(ofs % sizeof(Node) == 0); - if (ofs > 32736) { - idx = dest; - rset_clear(allow, dest); - kofs = (int32_t)offsetof(Node, key); - } else if (ra_hasreg(dest)) { - emit_tsi(as, MIPSI_AADDIU, dest, node, ofs); - } -#if LJ_32 - if (!irt_ispri(irkey->t)) { - key = ra_scratch(as, allow); - rset_clear(allow, key); - } - if (irt_isnum(irkey->t)) { - lo = (int32_t)ir_knum(irkey)->u32.lo; - hi = (int32_t)ir_knum(irkey)->u32.hi; - } else { - lo = irkey->i; - hi = irt_toitype(irkey->t); - if (!ra_hasreg(key)) - goto nolo; - } - asm_guard(as, MIPSI_BNE, key, lo ? ra_allock(as, lo, allow) : RID_ZERO); -nolo: - asm_guard(as, MIPSI_BNE, type, hi ? ra_allock(as, hi, allow) : RID_ZERO); - if (ra_hasreg(key)) emit_tsi(as, MIPSI_LW, key, idx, kofs+(LJ_BE?4:0)); - emit_tsi(as, MIPSI_LW, type, idx, kofs+(LJ_BE?0:4)); -#else - if (irt_ispri(irkey->t)) { - lua_assert(!irt_isnil(irkey->t)); - k = ~((int64_t)~irt_toitype(irkey->t) << 47); - } else if (irt_isnum(irkey->t)) { - k = (int64_t)ir_knum(irkey)->u64; - } else { - k = ((int64_t)irt_toitype(irkey->t) << 47) | (int64_t)ir_kgc(irkey); - } - asm_guard(as, MIPSI_BNE, key, ra_allock(as, k, allow)); - emit_tsi(as, MIPSI_LD, key, idx, kofs); -#endif - if (ofs > 32736) - emit_tsi(as, MIPSI_AADDU, dest, node, ra_allock(as, ofs, allow)); -} - -static void asm_uref(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - if (irref_isk(ir->op1)) { - GCfunc *fn = ir_kfunc(IR(ir->op1)); - MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; - emit_lsptr(as, MIPSI_AL, dest, v, RSET_GPR); - } else { - Reg uv = ra_scratch(as, RSET_GPR); - Reg func = ra_alloc1(as, ir->op1, RSET_GPR); - if (ir->o == IR_UREFC) { - asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); - emit_tsi(as, MIPSI_AADDIU, dest, uv, (int32_t)offsetof(GCupval, tv)); - emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); - } else { - emit_tsi(as, MIPSI_AL, dest, uv, (int32_t)offsetof(GCupval, v)); - } - emit_tsi(as, MIPSI_AL, uv, func, (int32_t)offsetof(GCfuncL, uvptr) + - (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); - } -} - -static void asm_fref(ASMState *as, IRIns *ir) -{ - UNUSED(as); UNUSED(ir); - lua_assert(!ra_used(ir)); -} - -static void asm_strref(ASMState *as, IRIns *ir) -{ -#if LJ_32 - Reg dest = ra_dest(as, ir, RSET_GPR); - IRRef ref = ir->op2, refk = ir->op1; - int32_t ofs = (int32_t)sizeof(GCstr); - Reg r; - if (irref_isk(ref)) { - IRRef tmp = refk; refk = ref; ref = tmp; - } else if (!irref_isk(refk)) { - Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); - IRIns *irr = IR(ir->op2); - if (ra_hasreg(irr->r)) { - ra_noweak(as, irr->r); - right = irr->r; - } else if (mayfuse(as, irr->op2) && - irr->o == IR_ADD && irref_isk(irr->op2) && - checki16(ofs + IR(irr->op2)->i)) { - ofs += IR(irr->op2)->i; - right = ra_alloc1(as, irr->op1, rset_exclude(RSET_GPR, left)); - } else { - right = ra_allocref(as, ir->op2, rset_exclude(RSET_GPR, left)); - } - emit_tsi(as, MIPSI_ADDIU, dest, dest, ofs); - emit_dst(as, MIPSI_ADDU, dest, left, right); - return; - } - r = ra_alloc1(as, ref, RSET_GPR); - ofs += IR(refk)->i; - if (checki16(ofs)) - emit_tsi(as, MIPSI_ADDIU, dest, r, ofs); - else - emit_dst(as, MIPSI_ADDU, dest, r, - ra_allock(as, ofs, rset_exclude(RSET_GPR, r))); -#else - RegSet allow = RSET_GPR; - Reg dest = ra_dest(as, ir, allow); - Reg base = ra_alloc1(as, ir->op1, allow); - IRIns *irr = IR(ir->op2); - int32_t ofs = sizeof(GCstr); - rset_clear(allow, base); - if (irref_isk(ir->op2) && checki16(ofs + irr->i)) { - emit_tsi(as, MIPSI_DADDIU, dest, base, ofs + irr->i); - } else { - emit_tsi(as, MIPSI_DADDIU, dest, dest, ofs); - emit_dst(as, MIPSI_DADDU, dest, base, ra_alloc1(as, ir->op2, allow)); - } -#endif -} - -/* -- Loads and stores ---------------------------------------------------- */ - -static MIPSIns asm_fxloadins(IRIns *ir) -{ - switch (irt_type(ir->t)) { - case IRT_I8: return MIPSI_LB; - case IRT_U8: return MIPSI_LBU; - case IRT_I16: return MIPSI_LH; - case IRT_U16: return MIPSI_LHU; - case IRT_NUM: lua_assert(!LJ_SOFTFP); return MIPSI_LDC1; - case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_LWC1; - default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_LD : MIPSI_LW; - } -} - -static MIPSIns asm_fxstoreins(IRIns *ir) -{ - switch (irt_type(ir->t)) { - case IRT_I8: case IRT_U8: return MIPSI_SB; - case IRT_I16: case IRT_U16: return MIPSI_SH; - case IRT_NUM: lua_assert(!LJ_SOFTFP); return MIPSI_SDC1; - case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_SWC1; - default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_SD : MIPSI_SW; - } -} - -static void asm_fload(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - MIPSIns mi = asm_fxloadins(ir); - Reg idx; - int32_t ofs; - if (ir->op1 == REF_NIL) { - idx = RID_JGL; - ofs = (ir->op2 << 2) - 32768 - GG_OFS(g); - } else { - idx = ra_alloc1(as, ir->op1, RSET_GPR); - if (ir->op2 == IRFL_TAB_ARRAY) { - ofs = asm_fuseabase(as, ir->op1); - if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ - emit_tsi(as, MIPSI_AADDIU, dest, idx, ofs); - return; - } - } - ofs = field_ofs[ir->op2]; - } - lua_assert(!irt_isfp(ir->t)); - emit_tsi(as, mi, dest, idx, ofs); -} - -static void asm_fstore(ASMState *as, IRIns *ir) -{ - if (ir->r != RID_SINK) { - Reg src = ra_alloc1z(as, ir->op2, RSET_GPR); - IRIns *irf = IR(ir->op1); - Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); - int32_t ofs = field_ofs[irf->op2]; - MIPSIns mi = asm_fxstoreins(ir); - lua_assert(!irt_isfp(ir->t)); - emit_tsi(as, mi, src, idx, ofs); - } -} - -static void asm_xload(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, - (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); - lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); - asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); -} - -static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) -{ - if (ir->r != RID_SINK) { - Reg src = ra_alloc1z(as, ir->op2, - (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); - asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, - rset_exclude(RSET_GPR, src), ofs); - } -} - -#define asm_xstore(as, ir) asm_xstore_(as, ir, 0) - -static void asm_ahuvload(ASMState *as, IRIns *ir) -{ - int hiop = (LJ_32 && LJ_SOFTFP && (ir+1)->o == IR_HIOP); - Reg dest = RID_NONE, type = RID_TMP, idx; - RegSet allow = RSET_GPR; - int32_t ofs = 0; - IRType1 t = ir->t; - if (hiop) { - t.irt = IRT_NUM; - if (ra_used(ir+1)) { - type = ra_dest(as, ir+1, allow); - rset_clear(allow, type); - } - } - if (ra_used(ir)) { - lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || - irt_isint(ir->t) || irt_isaddr(ir->t)); - dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); - rset_clear(allow, dest); -#if LJ_64 - if (irt_isaddr(t)) - emit_tsml(as, MIPSI_DEXTM, dest, dest, 14, 0); - else if (irt_isint(t)) - emit_dta(as, MIPSI_SLL, dest, dest, 0); -#endif - } - idx = asm_fuseahuref(as, ir->op1, &ofs, allow); - rset_clear(allow, idx); - if (irt_isnum(t)) { - asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); - emit_tsi(as, MIPSI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM); - } else { - asm_guard(as, MIPSI_BNE, type, - ra_allock(as, (int32_t)irt_toitype(t), allow)); - } -#if LJ_32 - if (ra_hasreg(dest)) { - if (!LJ_SOFTFP && irt_isnum(t)) - emit_hsi(as, MIPSI_LDC1, dest, idx, ofs); - else - emit_tsi(as, MIPSI_LW, dest, idx, ofs+(LJ_BE?4:0)); - } - emit_tsi(as, MIPSI_LW, type, idx, ofs+(LJ_BE?0:4)); -#else - if (ra_hasreg(dest)) { - if (!LJ_SOFTFP && irt_isnum(t)) { - emit_hsi(as, MIPSI_LDC1, dest, idx, ofs); - dest = type; - } - } else { - dest = type; - } - emit_dta(as, MIPSI_DSRA32, type, dest, 15); - emit_tsi(as, MIPSI_LD, dest, idx, ofs); -#endif -} - -static void asm_ahustore(ASMState *as, IRIns *ir) -{ - RegSet allow = RSET_GPR; - Reg idx, src = RID_NONE, type = RID_NONE; - int32_t ofs = 0; - if (ir->r == RID_SINK) - return; - if (!LJ_SOFTFP && irt_isnum(ir->t)) { - src = ra_alloc1(as, ir->op2, RSET_FPR); - idx = asm_fuseahuref(as, ir->op1, &ofs, allow); - emit_hsi(as, MIPSI_SDC1, src, idx, ofs); - } else { -#if LJ_32 - if (!irt_ispri(ir->t)) { - src = ra_alloc1(as, ir->op2, allow); - rset_clear(allow, src); - } - if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) - type = ra_alloc1(as, (ir+1)->op2, allow); - else - type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); - rset_clear(allow, type); - idx = asm_fuseahuref(as, ir->op1, &ofs, allow); - if (ra_hasreg(src)) - emit_tsi(as, MIPSI_SW, src, idx, ofs+(LJ_BE?4:0)); - emit_tsi(as, MIPSI_SW, type, idx, ofs+(LJ_BE?0:4)); -#else - Reg tmp = RID_TMP; - if (irt_ispri(ir->t)) { - tmp = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); - rset_clear(allow, tmp); - } else { - src = ra_alloc1(as, ir->op2, allow); - rset_clear(allow, src); - type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow); - rset_clear(allow, type); - } - idx = asm_fuseahuref(as, ir->op1, &ofs, allow); - emit_tsi(as, MIPSI_SD, tmp, idx, ofs); - if (ra_hasreg(src)) { - if (irt_isinteger(ir->t)) { - emit_dst(as, MIPSI_DADDU, tmp, tmp, type); - emit_tsml(as, MIPSI_DEXT, tmp, src, 31, 0); - } else { - emit_dst(as, MIPSI_DADDU, tmp, src, type); - } - } -#endif - } -} - -static void asm_sload(ASMState *as, IRIns *ir) -{ - Reg dest = RID_NONE, type = RID_NONE, base; - RegSet allow = RSET_GPR; - IRType1 t = ir->t; -#if LJ_32 - int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); - int hiop = (LJ_32 && LJ_SOFTFP && (ir+1)->o == IR_HIOP); - if (hiop) - t.irt = IRT_NUM; -#else - int32_t ofs = 8*((int32_t)ir->op1-2); -#endif - lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ - lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); -#if LJ_32 && LJ_SOFTFP - lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ - if (hiop && ra_used(ir+1)) { - type = ra_dest(as, ir+1, allow); - rset_clear(allow, type); - } -#else - if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { - dest = ra_scratch(as, RSET_FPR); - asm_tointg(as, ir, dest); - t.irt = IRT_NUM; /* Continue with a regular number type check. */ - } else -#endif - if (ra_used(ir)) { - lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || - irt_isint(ir->t) || irt_isaddr(ir->t)); - dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); - rset_clear(allow, dest); - base = ra_alloc1(as, REF_BASE, allow); - rset_clear(allow, base); - if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) { - if (irt_isint(t)) { - Reg tmp = ra_scratch(as, RSET_FPR); - emit_tg(as, MIPSI_MFC1, dest, tmp); - emit_fg(as, MIPSI_TRUNC_W_D, tmp, tmp); - dest = tmp; - t.irt = IRT_NUM; /* Check for original type. */ - } else { - Reg tmp = ra_scratch(as, RSET_GPR); - emit_fg(as, MIPSI_CVT_D_W, dest, dest); - emit_tg(as, MIPSI_MTC1, tmp, dest); - dest = tmp; - t.irt = IRT_INT; /* Check for original type. */ - } - } -#if LJ_64 - else if (irt_isaddr(t)) { - /* Clear type from pointers. */ - emit_tsml(as, MIPSI_DEXTM, dest, dest, 14, 0); - } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) { - /* Sign-extend integers. */ - emit_dta(as, MIPSI_SLL, dest, dest, 0); - } -#endif - goto dotypecheck; - } - base = ra_alloc1(as, REF_BASE, allow); - rset_clear(allow, base); -dotypecheck: -#if LJ_32 - if ((ir->op2 & IRSLOAD_TYPECHECK)) { - if (ra_noreg(type)) - type = RID_TMP; - if (irt_isnum(t)) { - asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); - emit_tsi(as, MIPSI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM); - } else { - Reg ktype = ra_allock(as, irt_toitype(t), allow); - asm_guard(as, MIPSI_BNE, type, ktype); - } - } - if (ra_hasreg(dest)) { - if (!LJ_SOFTFP && irt_isnum(t)) - emit_hsi(as, MIPSI_LDC1, dest, base, ofs); - else - emit_tsi(as, MIPSI_LW, dest, base, ofs ^ (LJ_BE?4:0)); - } - if (ra_hasreg(type)) - emit_tsi(as, MIPSI_LW, type, base, ofs ^ (LJ_BE?0:4)); -#else - if ((ir->op2 & IRSLOAD_TYPECHECK)) { - type = dest < RID_MAX_GPR ? dest : RID_TMP; - if (irt_ispri(t)) { - asm_guard(as, MIPSI_BNE, type, - ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow)); - } else { - if (irt_isnum(t)) { - asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); - emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)LJ_TISNUM); - if (ra_hasreg(dest)) - emit_hsi(as, MIPSI_LDC1, dest, base, ofs); - } else { - asm_guard(as, MIPSI_BNE, RID_TMP, - ra_allock(as, (int32_t)irt_toitype(t), allow)); - } - emit_dta(as, MIPSI_DSRA32, RID_TMP, type, 15); - } - emit_tsi(as, MIPSI_LD, type, base, ofs); - } else if (ra_hasreg(dest)) { - if (irt_isnum(t)) - emit_hsi(as, MIPSI_LDC1, dest, base, ofs); - else - emit_tsi(as, irt_isint(t) ? MIPSI_LW : MIPSI_LD, dest, base, - ofs ^ ((LJ_BE && irt_isint(t)) ? 4 : 0)); - } -#endif -} - -/* -- Allocations --------------------------------------------------------- */ - -#if LJ_HASFFI -static void asm_cnew(ASMState *as, IRIns *ir) -{ - CTState *cts = ctype_ctsG(J2G(as->J)); - CTypeID id = (CTypeID)IR(ir->op1)->i; - CTSize sz; - CTInfo info = lj_ctype_info(cts, id, &sz); - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; - IRRef args[4]; - RegSet drop = RSET_SCRATCH; - lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); - - as->gcsteps++; - if (ra_hasreg(ir->r)) - rset_clear(drop, ir->r); /* Dest reg handled below. */ - ra_evictset(as, drop); - if (ra_used(ir)) - ra_destreg(as, ir, RID_RET); /* GCcdata * */ - - /* Initialize immutable cdata object. */ - if (ir->o == IR_CNEWI) { - RegSet allow = (RSET_GPR & ~RSET_SCRATCH); -#if LJ_32 - int32_t ofs = sizeof(GCcdata); - if (sz == 8) { - ofs += 4; - lua_assert((ir+1)->o == IR_HIOP); - if (LJ_LE) ir++; - } - for (;;) { - Reg r = ra_alloc1z(as, ir->op2, allow); - emit_tsi(as, MIPSI_SW, r, RID_RET, ofs); - rset_clear(allow, r); - if (ofs == sizeof(GCcdata)) break; - ofs -= 4; if (LJ_BE) ir++; else ir--; - } -#else - emit_tsi(as, MIPSI_SD, ra_alloc1(as, ir->op2, allow), - RID_RET, sizeof(GCcdata)); -#endif - lua_assert(sz == 4 || sz == 8); - } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ - ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ir->op1; /* CTypeID id */ - args[2] = ir->op2; /* CTSize sz */ - args[3] = ASMREF_TMP1; /* CTSize align */ - asm_gencall(as, ci, args); - emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); - return; - } - - /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ - emit_tsi(as, MIPSI_SB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); - emit_tsi(as, MIPSI_SH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); - emit_ti(as, MIPSI_LI, RID_RET+1, ~LJ_TCDATA); - emit_ti(as, MIPSI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */ - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ASMREF_TMP1; /* MSize size */ - asm_gencall(as, ci, args); - ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), - ra_releasetmp(as, ASMREF_TMP1)); -} -#else -#define asm_cnew(as, ir) ((void)0) -#endif - -/* -- Write barriers ------------------------------------------------------ */ - -static void asm_tbar(ASMState *as, IRIns *ir) -{ - Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); - Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab)); - Reg link = RID_TMP; - MCLabel l_end = emit_label(as); - emit_tsi(as, MIPSI_AS, link, tab, (int32_t)offsetof(GCtab, gclist)); - emit_tsi(as, MIPSI_SB, mark, tab, (int32_t)offsetof(GCtab, marked)); - emit_setgl(as, tab, gc.grayagain); - emit_getgl(as, link, gc.grayagain); - emit_dst(as, MIPSI_XOR, mark, mark, RID_TMP); /* Clear black bit. */ - emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end); - emit_tsi(as, MIPSI_ANDI, RID_TMP, mark, LJ_GC_BLACK); - emit_tsi(as, MIPSI_LBU, mark, tab, (int32_t)offsetof(GCtab, marked)); -} - -static void asm_obar(ASMState *as, IRIns *ir) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; - IRRef args[2]; - MCLabel l_end; - Reg obj, val, tmp; - /* No need for other object barriers (yet). */ - lua_assert(IR(ir->op1)->o == IR_UREFC); - ra_evictset(as, RSET_SCRATCH); - l_end = emit_label(as); - args[0] = ASMREF_TMP1; /* global_State *g */ - args[1] = ir->op1; /* TValue *tv */ - asm_gencall(as, ci, args); - emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); - obj = IR(ir->op1)->r; - tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); - emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end); - emit_tsi(as, MIPSI_ANDI, tmp, tmp, LJ_GC_BLACK); - emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end); - emit_tsi(as, MIPSI_ANDI, RID_TMP, RID_TMP, LJ_GC_WHITES); - val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); - emit_tsi(as, MIPSI_LBU, tmp, obj, - (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); - emit_tsi(as, MIPSI_LBU, RID_TMP, val, (int32_t)offsetof(GChead, marked)); -} - -/* -- Arithmetic and logic operations ------------------------------------- */ - -#if !LJ_SOFTFP -static void asm_fparith(ASMState *as, IRIns *ir, MIPSIns mi) -{ - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = (left >> 8); left &= 255; - emit_fgh(as, mi, dest, left, right); -} - -static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi) -{ - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); - emit_fg(as, mi, dest, left); -} - -static void asm_fpmath(ASMState *as, IRIns *ir) -{ - if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) - return; - if (ir->op2 <= IRFPM_TRUNC) - asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2); - else if (ir->op2 == IRFPM_SQRT) - asm_fpunary(as, ir, MIPSI_SQRT_D); - else - asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); -} -#endif - -static void asm_add(ASMState *as, IRIns *ir) -{ - IRType1 t = ir->t; -#if !LJ_SOFTFP - if (irt_isnum(t)) { - asm_fparith(as, ir, MIPSI_ADD_D); - } else -#endif - { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - if (irref_isk(ir->op2)) { - intptr_t k = get_kval(IR(ir->op2)); - if (checki16(k)) { - emit_tsi(as, (LJ_64 && irt_is64(t)) ? MIPSI_DADDIU : MIPSI_ADDIU, dest, - left, k); - return; - } - } - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_dst(as, (LJ_64 && irt_is64(t)) ? MIPSI_DADDU : MIPSI_ADDU, dest, - left, right); - } -} - -static void asm_sub(ASMState *as, IRIns *ir) -{ -#if !LJ_SOFTFP - if (irt_isnum(ir->t)) { - asm_fparith(as, ir, MIPSI_SUB_D); - } else -#endif - { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - emit_dst(as, (LJ_64 && irt_is64(ir->t)) ? MIPSI_DSUBU : MIPSI_SUBU, dest, - left, right); - } -} - -static void asm_mul(ASMState *as, IRIns *ir) -{ -#if !LJ_SOFTFP - if (irt_isnum(ir->t)) { - asm_fparith(as, ir, MIPSI_MUL_D); - } else -#endif - { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - if (LJ_64 && irt_is64(ir->t)) { - emit_dst(as, MIPSI_MFLO, dest, 0, 0); - emit_dst(as, MIPSI_DMULT, 0, left, right); - } else { - emit_dst(as, MIPSI_MUL, dest, left, right); - } - } -} - -static void asm_mod(ASMState *as, IRIns *ir) -{ -#if LJ_64 && LJ_HASFFI - if (!irt_isint(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : - IRCALL_lj_carith_modu64); - else -#endif - asm_callid(as, ir, IRCALL_lj_vm_modi); -} - -#if !LJ_SOFTFP -static void asm_pow(ASMState *as, IRIns *ir) -{ -#if LJ_64 && LJ_HASFFI - if (!irt_isnum(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : - IRCALL_lj_carith_powu64); - else -#endif - asm_callid(as, ir, IRCALL_lj_vm_powi); -} - -static void asm_div(ASMState *as, IRIns *ir) -{ -#if LJ_64 && LJ_HASFFI - if (!irt_isnum(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : - IRCALL_lj_carith_divu64); - else -#endif - asm_fparith(as, ir, MIPSI_DIV_D); -} -#endif - -static void asm_neg(ASMState *as, IRIns *ir) -{ -#if !LJ_SOFTFP - if (irt_isnum(ir->t)) { - asm_fpunary(as, ir, MIPSI_NEG_D); - } else -#endif - { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - emit_dst(as, (LJ_64 && irt_is64(ir->t)) ? MIPSI_DSUBU : MIPSI_SUBU, dest, - RID_ZERO, left); - } -} - -#define asm_abs(as, ir) asm_fpunary(as, ir, MIPSI_ABS_D) -#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) -#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) - -static void asm_arithov(ASMState *as, IRIns *ir) -{ - Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR); - lua_assert(!irt_is64(ir->t)); - if (irref_isk(ir->op2)) { - int k = IR(ir->op2)->i; - if (ir->o == IR_SUBOV) k = -k; - if (checki16(k)) { /* (dest < left) == (k >= 0 ? 1 : 0) */ - left = ra_alloc1(as, ir->op1, RSET_GPR); - asm_guard(as, k >= 0 ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO); - emit_dst(as, MIPSI_SLT, RID_TMP, dest, dest == left ? RID_TMP : left); - emit_tsi(as, MIPSI_ADDIU, dest, left, k); - if (dest == left) emit_move(as, RID_TMP, left); - return; - } - } - left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left), - right), dest)); - asm_guard(as, MIPSI_BLTZ, RID_TMP, 0); - emit_dst(as, MIPSI_AND, RID_TMP, RID_TMP, tmp); - if (ir->o == IR_ADDOV) { /* ((dest^left) & (dest^right)) < 0 */ - emit_dst(as, MIPSI_XOR, RID_TMP, dest, dest == right ? RID_TMP : right); - } else { /* ((dest^left) & (dest^~right)) < 0 */ - emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, dest); - emit_dst(as, MIPSI_NOR, RID_TMP, dest == right ? RID_TMP : right, RID_ZERO); - } - emit_dst(as, MIPSI_XOR, tmp, dest, dest == left ? RID_TMP : left); - emit_dst(as, ir->o == IR_ADDOV ? MIPSI_ADDU : MIPSI_SUBU, dest, left, right); - if (dest == left || dest == right) - emit_move(as, RID_TMP, dest == left ? left : right); -} - -#define asm_addov(as, ir) asm_arithov(as, ir) -#define asm_subov(as, ir) asm_arithov(as, ir) - -static void asm_mulov(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg tmp, right, left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left), - right), dest)); - asm_guard(as, MIPSI_BNE, RID_TMP, tmp); - emit_dta(as, MIPSI_SRA, RID_TMP, dest, 31); - emit_dst(as, MIPSI_MFHI, tmp, 0, 0); - emit_dst(as, MIPSI_MFLO, dest, 0, 0); - emit_dst(as, MIPSI_MULT, 0, left, right); -} - -#if LJ_32 && LJ_HASFFI -static void asm_add64(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); - if (irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; - if (k == 0) { - emit_dst(as, MIPSI_ADDU, dest, left, RID_TMP); - goto loarith; - } else if (checki16(k)) { - emit_dst(as, MIPSI_ADDU, dest, dest, RID_TMP); - emit_tsi(as, MIPSI_ADDIU, dest, left, k); - goto loarith; - } - } - emit_dst(as, MIPSI_ADDU, dest, dest, RID_TMP); - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_dst(as, MIPSI_ADDU, dest, left, right); -loarith: - ir--; - dest = ra_dest(as, ir, RSET_GPR); - left = ra_alloc1(as, ir->op1, RSET_GPR); - if (irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; - if (k == 0) { - if (dest != left) - emit_move(as, dest, left); - return; - } else if (checki16(k)) { - if (dest == left) { - Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, left)); - emit_move(as, dest, tmp); - dest = tmp; - } - emit_dst(as, MIPSI_SLTU, RID_TMP, dest, left); - emit_tsi(as, MIPSI_ADDIU, dest, left, k); - return; - } - } - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - if (dest == left && dest == right) { - Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, left), right)); - emit_move(as, dest, tmp); - dest = tmp; - } - emit_dst(as, MIPSI_SLTU, RID_TMP, dest, dest == left ? right : left); - emit_dst(as, MIPSI_ADDU, dest, left, right); -} - -static void asm_sub64(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - emit_dst(as, MIPSI_SUBU, dest, dest, RID_TMP); - emit_dst(as, MIPSI_SUBU, dest, left, right); - ir--; - dest = ra_dest(as, ir, RSET_GPR); - left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - if (dest == left) { - Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, left), right)); - emit_move(as, dest, tmp); - dest = tmp; - } - emit_dst(as, MIPSI_SLTU, RID_TMP, left, dest); - emit_dst(as, MIPSI_SUBU, dest, left, right); -} - -static void asm_neg64(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - emit_dst(as, MIPSI_SUBU, dest, dest, RID_TMP); - emit_dst(as, MIPSI_SUBU, dest, RID_ZERO, left); - ir--; - dest = ra_dest(as, ir, RSET_GPR); - left = ra_alloc1(as, ir->op1, RSET_GPR); - emit_dst(as, MIPSI_SLTU, RID_TMP, RID_ZERO, dest); - emit_dst(as, MIPSI_SUBU, dest, RID_ZERO, left); -} -#endif - -static void asm_bnot(ASMState *as, IRIns *ir) -{ - Reg left, right, dest = ra_dest(as, ir, RSET_GPR); - IRIns *irl = IR(ir->op1); - if (mayfuse(as, ir->op1) && irl->o == IR_BOR) { - left = ra_alloc2(as, irl, RSET_GPR); - right = (left >> 8); left &= 255; - } else { - left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - right = RID_ZERO; - } - emit_dst(as, MIPSI_NOR, dest, left, right); -} - -static void asm_bswap(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); -#if LJ_32 - if ((as->flags & JIT_F_MIPSXXR2)) { - emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16); - emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left); - } else { - Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, left), dest)); - emit_dst(as, MIPSI_OR, dest, dest, tmp); - emit_dst(as, MIPSI_OR, dest, dest, RID_TMP); - emit_tsi(as, MIPSI_ANDI, dest, dest, 0xff00); - emit_dta(as, MIPSI_SLL, RID_TMP, RID_TMP, 8); - emit_dta(as, MIPSI_SRL, dest, left, 8); - emit_tsi(as, MIPSI_ANDI, RID_TMP, left, 0xff00); - emit_dst(as, MIPSI_OR, tmp, tmp, RID_TMP); - emit_dta(as, MIPSI_SRL, tmp, left, 24); - emit_dta(as, MIPSI_SLL, RID_TMP, left, 24); - } -#else - if (irt_is64(ir->t)) { - emit_dst(as, MIPSI_DSHD, dest, 0, RID_TMP); - emit_dst(as, MIPSI_DSBH, RID_TMP, 0, left); - } else { - emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16); - emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left); - } -#endif -} - -static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - if (irref_isk(ir->op2)) { - intptr_t k = get_kval(IR(ir->op2)); - if (checku16(k)) { - emit_tsi(as, mik, dest, left, k); - return; - } - } - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_dst(as, mi, dest, left, right); -} - -#define asm_band(as, ir) asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI) -#define asm_bor(as, ir) asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI) -#define asm_bxor(as, ir) asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI) - -static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - if (irref_isk(ir->op2)) { /* Constant shifts. */ - uint32_t shift = (uint32_t)IR(ir->op2)->i; - if (LJ_64 && irt_is64(ir->t)) mik |= (shift & 32) ? MIPSI_D32 : MIPSI_D; - emit_dta(as, mik, dest, ra_hintalloc(as, ir->op1, dest, RSET_GPR), - (shift & 31)); - } else { - Reg right, left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - if (LJ_64 && irt_is64(ir->t)) mi |= MIPSI_DV; - emit_dst(as, mi, dest, right, left); /* Shift amount is in rs. */ - } -} - -#define asm_bshl(as, ir) asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL) -#define asm_bshr(as, ir) asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL) -#define asm_bsar(as, ir) asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA) -#define asm_brol(as, ir) lua_assert(0) - -static void asm_bror(ASMState *as, IRIns *ir) -{ - if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) { - asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR); - } else { - Reg dest = ra_dest(as, ir, RSET_GPR); - if (irref_isk(ir->op2)) { /* Constant shifts. */ - uint32_t shift = (uint32_t)(IR(ir->op2)->i & 31); - Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - emit_rotr(as, dest, left, RID_TMP, shift); - } else { - Reg right, left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - emit_dst(as, MIPSI_OR, dest, dest, RID_TMP); - emit_dst(as, MIPSI_SRLV, dest, right, left); - emit_dst(as, MIPSI_SLLV, RID_TMP, RID_TMP, left); - emit_dst(as, MIPSI_SUBU, RID_TMP, ra_allock(as, 32, RSET_GPR), right); - } - } -} - -#if LJ_32 && LJ_SOFTFP -static void asm_sfpmin_max(ASMState *as, IRIns *ir) -{ - CCallInfo ci = lj_ir_callinfo[(IROp)ir->o == IR_MIN ? IRCALL_lj_vm_sfmin : IRCALL_lj_vm_sfmax]; - IRRef args[4]; - args[0^LJ_BE] = ir->op1; - args[1^LJ_BE] = (ir+1)->op1; - args[2^LJ_BE] = ir->op2; - args[3^LJ_BE] = (ir+1)->op2; - asm_setupresult(as, ir, &ci); - emit_call(as, (void *)ci.func, 0); - ci.func = NULL; - asm_gencall(as, &ci, args); -} -#endif - -static void asm_min_max(ASMState *as, IRIns *ir, int ismax) -{ - if (!LJ_SOFTFP && irt_isnum(ir->t)) { - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = (left >> 8); left &= 255; - if (dest == left) { - emit_fg(as, MIPSI_MOVT_D, dest, right); - } else { - emit_fg(as, MIPSI_MOVF_D, dest, left); - if (dest != right) emit_fg(as, MIPSI_MOV_D, dest, right); - } - emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? left : right, ismax ? right : left); - } else { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - if (dest == left) { - emit_dst(as, MIPSI_MOVN, dest, right, RID_TMP); - } else { - emit_dst(as, MIPSI_MOVZ, dest, left, RID_TMP); - if (dest != right) emit_move(as, dest, right); - } - emit_dst(as, MIPSI_SLT, RID_TMP, - ismax ? left : right, ismax ? right : left); - } -} - -#define asm_min(as, ir) asm_min_max(as, ir, 0) -#define asm_max(as, ir) asm_min_max(as, ir, 1) - -/* -- Comparisons --------------------------------------------------------- */ - -#if LJ_32 && LJ_SOFTFP -/* SFP comparisons. */ -static void asm_sfpcomp(ASMState *as, IRIns *ir) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; - RegSet drop = RSET_SCRATCH; - Reg r; - IRRef args[4]; - args[LJ_LE ? 0 : 1] = ir->op1; args[LJ_LE ? 1 : 0] = (ir+1)->op1; - args[LJ_LE ? 2 : 3] = ir->op2; args[LJ_LE ? 3 : 2] = (ir+1)->op2; - - for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) { - if (!rset_test(as->freeset, r) && - regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR]) - rset_clear(drop, r); - } - ra_evictset(as, drop); - - asm_setupresult(as, ir, ci); - - switch ((IROp)ir->o) { - case IR_LT: - asm_guard(as, MIPSI_BGEZ, RID_RET, 0); - break; - case IR_ULT: - asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP); - emit_loadi(as, RID_TMP, 1); - asm_guard(as, MIPSI_BEQ, RID_RET, RID_ZERO); - break; - case IR_GE: - asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP); - emit_loadi(as, RID_TMP, 2); - asm_guard(as, MIPSI_BLTZ, RID_RET, 0); - break; - case IR_LE: - asm_guard(as, MIPSI_BGTZ, RID_RET, 0); - break; - case IR_GT: - asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP); - emit_loadi(as, RID_TMP, 2); - asm_guard(as, MIPSI_BLEZ, RID_RET, 0); - break; - case IR_UGE: - asm_guard(as, MIPSI_BLTZ, RID_RET, 0); - break; - case IR_ULE: - asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP); - emit_loadi(as, RID_TMP, 1); - break; - case IR_UGT: case IR_ABC: - asm_guard(as, MIPSI_BLEZ, RID_RET, 0); - break; - case IR_EQ: case IR_NE: - asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, RID_RET, RID_ZERO); - default: - break; - } - asm_gencall(as, ci, args); -} -#endif - -static void asm_comp(ASMState *as, IRIns *ir) -{ - /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */ - IROp op = ir->o; - if (!LJ_SOFTFP && irt_isnum(ir->t)) { - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = (left >> 8); left &= 255; - asm_guard(as, (op&1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); - emit_fgh(as, MIPSI_C_OLT_D + ((op&3) ^ ((op>>2)&1)), 0, left, right); - } else { - Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); - if (op == IR_ABC) op = IR_UGT; - if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(IR(ir->op2)) == 0) { - MIPSIns mi = (op&2) ? ((op&1) ? MIPSI_BLEZ : MIPSI_BGTZ) : - ((op&1) ? MIPSI_BLTZ : MIPSI_BGEZ); - asm_guard(as, mi, left, 0); - } else { - if (irref_isk(ir->op2)) { - intptr_t k = get_kval(IR(ir->op2)); - if ((op&2)) k++; - if (checki16(k)) { - asm_guard(as, (op&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO); - emit_tsi(as, (op&4) ? MIPSI_SLTIU : MIPSI_SLTI, - RID_TMP, left, k); - return; - } - } - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - asm_guard(as, ((op^(op>>1))&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO); - emit_dst(as, (op&4) ? MIPSI_SLTU : MIPSI_SLT, - RID_TMP, (op&2) ? right : left, (op&2) ? left : right); - } - } -} - -static void asm_equal(ASMState *as, IRIns *ir) -{ - Reg right, left = ra_alloc2(as, ir, (!LJ_SOFTFP && irt_isnum(ir->t)) ? - RSET_FPR : RSET_GPR); - right = (left >> 8); left &= 255; - if (!LJ_SOFTFP && irt_isnum(ir->t)) { - asm_guard(as, (ir->o & 1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); - emit_fgh(as, MIPSI_C_EQ_D, 0, left, right); - } else { - asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, left, right); - } -} - -#if LJ_32 && LJ_HASFFI -/* 64 bit integer comparisons. */ -static void asm_comp64(ASMState *as, IRIns *ir) -{ - /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */ - IROp op = (ir-1)->o; - MCLabel l_end; - Reg rightlo, leftlo, righthi, lefthi = ra_alloc2(as, ir, RSET_GPR); - righthi = (lefthi >> 8); lefthi &= 255; - leftlo = ra_alloc2(as, ir-1, - rset_exclude(rset_exclude(RSET_GPR, lefthi), righthi)); - rightlo = (leftlo >> 8); leftlo &= 255; - asm_guard(as, ((op^(op>>1))&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO); - l_end = emit_label(as); - if (lefthi != righthi) - emit_dst(as, (op&4) ? MIPSI_SLTU : MIPSI_SLT, RID_TMP, - (op&2) ? righthi : lefthi, (op&2) ? lefthi : righthi); - emit_dst(as, MIPSI_SLTU, RID_TMP, - (op&2) ? rightlo : leftlo, (op&2) ? leftlo : rightlo); - if (lefthi != righthi) - emit_branch(as, MIPSI_BEQ, lefthi, righthi, l_end); -} - -static void asm_comp64eq(ASMState *as, IRIns *ir) -{ - Reg tmp, right, left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - asm_guard(as, ((ir-1)->o & 1) ? MIPSI_BEQ : MIPSI_BNE, RID_TMP, RID_ZERO); - tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, left), right)); - emit_dst(as, MIPSI_OR, RID_TMP, RID_TMP, tmp); - emit_dst(as, MIPSI_XOR, tmp, left, right); - left = ra_alloc2(as, ir-1, RSET_GPR); - right = (left >> 8); left &= 255; - emit_dst(as, MIPSI_XOR, RID_TMP, left, right); -} -#endif - -/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ - -/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ -static void asm_hiop(ASMState *as, IRIns *ir) -{ -#if LJ_32 && (LJ_HASFFI || LJ_SOFTFP) - /* HIOP is marked as a store because it needs its own DCE logic. */ - int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ - if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; - if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ - as->curins--; /* Always skip the CONV. */ -#if LJ_HASFFI && !LJ_SOFTFP - if (usehi || uselo) - asm_conv64(as, ir); - return; -#endif - } else if ((ir-1)->o < IR_EQ) { /* 64 bit integer comparisons. ORDER IR. */ - as->curins--; /* Always skip the loword comparison. */ -#if LJ_SOFTFP - if (!irt_isint(ir->t)) { - asm_sfpcomp(as, ir-1); - return; - } -#endif -#if LJ_HASFFI - asm_comp64(as, ir); -#endif - return; - } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ - as->curins--; /* Always skip the loword comparison. */ -#if LJ_SOFTFP - if (!irt_isint(ir->t)) { - asm_sfpcomp(as, ir-1); - return; - } -#endif -#if LJ_HASFFI - asm_comp64eq(as, ir); -#endif - return; -#if LJ_SOFTFP - } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { - as->curins--; /* Always skip the loword min/max. */ - if (uselo || usehi) - asm_sfpmin_max(as, ir-1); - return; -#endif - } else if ((ir-1)->o == IR_XSTORE) { - as->curins--; /* Handle both stores here. */ - if ((ir-1)->r != RID_SINK) { - asm_xstore_(as, ir, LJ_LE ? 4 : 0); - asm_xstore_(as, ir-1, LJ_LE ? 0 : 4); - } - return; - } - if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ - switch ((ir-1)->o) { -#if LJ_HASFFI - case IR_ADD: as->curins--; asm_add64(as, ir); break; - case IR_SUB: as->curins--; asm_sub64(as, ir); break; - case IR_NEG: as->curins--; asm_neg64(as, ir); break; -#endif -#if LJ_SOFTFP - case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: - case IR_STRTO: - if (!uselo) - ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ - break; -#endif - case IR_CALLN: - case IR_CALLS: - case IR_CALLXS: - if (!uselo) - ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ - break; -#if LJ_SOFTFP - case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: -#endif - case IR_CNEWI: - /* Nothing to do here. Handled by lo op itself. */ - break; - default: lua_assert(0); break; - } -#else - UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused without FFI. */ -#endif -} - -/* -- Profiling ----------------------------------------------------------- */ - -static void asm_prof(ASMState *as, IRIns *ir) -{ - UNUSED(ir); - asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO); - emit_tsi(as, MIPSI_ANDI, RID_TMP, RID_TMP, HOOK_PROFILE); - emit_lsglptr(as, MIPSI_LBU, RID_TMP, - (int32_t)offsetof(global_State, hookmask)); -} - -/* -- Stack handling ------------------------------------------------------ */ - -/* Check Lua stack size for overflow. Use exit handler as fallback. */ -static void asm_stack_check(ASMState *as, BCReg topslot, - IRIns *irp, RegSet allow, ExitNo exitno) -{ - /* Try to get an unused temp. register, otherwise spill/restore RID_RET*. */ - Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE; - ExitNo oldsnap = as->snapno; - rset_clear(allow, pbase); -#if LJ_32 - tmp = allow ? rset_pickbot(allow) : - (pbase == RID_RETHI ? RID_RETLO : RID_RETHI); -#else - tmp = allow ? rset_pickbot(allow) : RID_RET; -#endif - as->snapno = exitno; - asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO); - as->snapno = oldsnap; - if (allow == RSET_EMPTY) /* Restore temp. register. */ - emit_tsi(as, MIPSI_AL, tmp, RID_SP, 0); - else - ra_modified(as, tmp); - emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)(8*topslot)); - emit_dst(as, MIPSI_ASUBU, RID_TMP, tmp, pbase); - emit_tsi(as, MIPSI_AL, tmp, tmp, offsetof(lua_State, maxstack)); - if (pbase == RID_TMP) - emit_getgl(as, RID_TMP, jit_base); - emit_getgl(as, tmp, cur_L); - if (allow == RSET_EMPTY) /* Spill temp. register. */ - emit_tsi(as, MIPSI_AS, tmp, RID_SP, 0); -} - -/* Restore Lua stack from on-trace state. */ -static void asm_stack_restore(ASMState *as, SnapShot *snap) -{ - SnapEntry *map = &as->T->snapmap[snap->mapofs]; -#if LJ_32 || defined(LUA_USE_ASSERT) - SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; -#endif - MSize n, nent = snap->nent; - /* Store the value of all modified slots to the Lua stack. */ - for (n = 0; n < nent; n++) { - SnapEntry sn = map[n]; - BCReg s = snap_slot(sn); - int32_t ofs = 8*((int32_t)s-1-LJ_FR2); - IRRef ref = snap_ref(sn); - IRIns *ir = IR(ref); - if ((sn & SNAP_NORESTORE)) - continue; - if (irt_isnum(ir->t)) { -#if LJ_SOFTFP - Reg tmp; - RegSet allow = rset_exclude(RSET_GPR, RID_BASE); - lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */ - tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow); - emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?4:0)); - if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1); - tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow); - emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?0:4)); -#else - Reg src = ra_alloc1(as, ref, RSET_FPR); - emit_hsi(as, MIPSI_SDC1, src, RID_BASE, ofs); -#endif - } else { -#if LJ_32 - RegSet allow = rset_exclude(RSET_GPR, RID_BASE); - Reg type; - lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); - if (!irt_ispri(ir->t)) { - Reg src = ra_alloc1(as, ref, allow); - rset_clear(allow, src); - emit_tsi(as, MIPSI_SW, src, RID_BASE, ofs+(LJ_BE?4:0)); - } - if ((sn & (SNAP_CONT|SNAP_FRAME))) { - if (s == 0) continue; /* Do not overwrite link to previous frame. */ - type = ra_allock(as, (int32_t)(*flinks--), allow); -#if LJ_SOFTFP - } else if ((sn & SNAP_SOFTFPNUM)) { - type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE)); -#endif - } else { - type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); - } - emit_tsi(as, MIPSI_SW, type, RID_BASE, ofs+(LJ_BE?0:4)); -#else - asm_tvstore64(as, RID_BASE, ofs, ref); -#endif - } - checkmclim(as); - } - lua_assert(map + nent == flinks); -} - -/* -- GC handling --------------------------------------------------------- */ - -/* Check GC threshold and do one or more GC steps. */ -static void asm_gc_check(ASMState *as) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; - IRRef args[2]; - MCLabel l_end; - Reg tmp; - ra_evictset(as, RSET_SCRATCH); - l_end = emit_label(as); - /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ - /* Assumes asm_snap_prep() already done. */ - asm_guard(as, MIPSI_BNE, RID_RET, RID_ZERO); - args[0] = ASMREF_TMP1; /* global_State *g */ - args[1] = ASMREF_TMP2; /* MSize steps */ - asm_gencall(as, ci, args); - emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); - tmp = ra_releasetmp(as, ASMREF_TMP2); - emit_loadi(as, tmp, as->gcsteps); - /* Jump around GC step if GC total < GC threshold. */ - emit_branch(as, MIPSI_BNE, RID_TMP, RID_ZERO, l_end); - emit_dst(as, MIPSI_SLTU, RID_TMP, RID_TMP, tmp); - emit_getgl(as, tmp, gc.threshold); - emit_getgl(as, RID_TMP, gc.total); - as->gcsteps = 0; - checkmclim(as); -} - -/* -- Loop handling ------------------------------------------------------- */ - -/* Fixup the loop branch. */ -static void asm_loop_fixup(ASMState *as) -{ - MCode *p = as->mctop; - MCode *target = as->mcp; - p[-1] = MIPSI_NOP; - if (as->loopinv) { /* Inverted loop branch? */ - /* asm_guard already inverted the cond branch. Only patch the target. */ - p[-3] |= ((target-p+2) & 0x0000ffffu); - } else { - p[-2] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu); - } -} - -/* -- Head of trace ------------------------------------------------------- */ - -/* Coalesce BASE register for a root trace. */ -static void asm_head_root_base(ASMState *as) -{ - IRIns *ir = IR(REF_BASE); - Reg r = ir->r; - if (as->loopinv) as->mctop--; - if (ra_hasreg(r)) { - ra_free(as, r); - if (rset_test(as->modset, r) || irt_ismarked(ir->t)) - ir->r = RID_INIT; /* No inheritance for modified BASE register. */ - if (r != RID_BASE) - emit_move(as, r, RID_BASE); - } -} - -/* Coalesce BASE register for a side trace. */ -static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) -{ - IRIns *ir = IR(REF_BASE); - Reg r = ir->r; - if (as->loopinv) as->mctop--; - if (ra_hasreg(r)) { - ra_free(as, r); - if (rset_test(as->modset, r) || irt_ismarked(ir->t)) - ir->r = RID_INIT; /* No inheritance for modified BASE register. */ - if (irp->r == r) { - rset_clear(allow, r); /* Mark same BASE register as coalesced. */ - } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { - rset_clear(allow, irp->r); - emit_move(as, r, irp->r); /* Move from coalesced parent reg. */ - } else { - emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ - } - } - return allow; -} - -/* -- Tail of trace ------------------------------------------------------- */ - -/* Fixup the tail code. */ -static void asm_tail_fixup(ASMState *as, TraceNo lnk) -{ - MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp; - int32_t spadj = as->T->spadjust; - MCode *p = as->mctop-1; - *p = spadj ? (MIPSI_AADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP; - p[-1] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu); -} - -/* Prepare tail of code. */ -static void asm_tail_prep(ASMState *as) -{ - as->mcp = as->mctop-2; /* Leave room for branch plus nop or stack adj. */ - as->invmcp = as->loopref ? as->mcp : NULL; -} - -/* -- Trace setup --------------------------------------------------------- */ - -/* Ensure there are enough stack slots for call arguments. */ -static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) -{ - IRRef args[CCI_NARGS_MAX*2]; - uint32_t i, nargs = CCI_XNARGS(ci); -#if LJ_32 - int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; -#else - int nslots = 0, ngpr = REGARG_NUMGPR; -#endif - asm_collectargs(as, ir, ci, args); - for (i = 0; i < nargs; i++) { -#if LJ_32 - if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t) && - nfpr > 0 && !(ci->flags & CCI_VARARG)) { - nfpr--; - ngpr -= irt_isnum(IR(args[i])->t) ? 2 : 1; - } else if (!LJ_SOFTFP && args[i] && irt_isnum(IR(args[i])->t)) { - nfpr = 0; - ngpr = ngpr & ~1; - if (ngpr > 0) ngpr -= 2; else nslots = (nslots+3) & ~1; - } else { - nfpr = 0; - if (ngpr > 0) ngpr--; else nslots++; - } -#else - if (ngpr > 0) ngpr--; else nslots += 2; -#endif - } - if (nslots > as->evenspill) /* Leave room for args in stack slots. */ - as->evenspill = nslots; - return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET); -} - -static void asm_setup_target(ASMState *as) -{ - asm_sparejump_setup(as); - asm_exitstub_setup(as); -} - -/* -- Trace patching ------------------------------------------------------ */ - -/* Patch exit jumps of existing machine code to a new target. */ -void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) -{ - MCode *p = T->mcode; - MCode *pe = (MCode *)((char *)p + T->szmcode); - MCode *px = exitstub_trace_addr(T, exitno); - MCode *cstart = NULL, *cstop = NULL; - MCode *mcarea = lj_mcode_patch(J, p, 0); - MCode exitload = MIPSI_LI | MIPSF_T(RID_TMP) | exitno; - MCode tjump = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu); - for (p++; p < pe; p++) { - if (*p == exitload) { /* Look for load of exit number. */ - if (((p[-1] ^ (px-p)) & 0xffffu) == 0) { /* Look for exitstub branch. */ - ptrdiff_t delta = target - p; - if (((delta + 0x8000) >> 16) == 0) { /* Patch in-range branch. */ - patchbranch: - p[-1] = (p[-1] & 0xffff0000u) | (delta & 0xffffu); - *p = MIPSI_NOP; /* Replace the load of the exit number. */ - cstop = p; - if (!cstart) cstart = p-1; - } else { /* Branch out of range. Use spare jump slot in mcarea. */ - int i; - for (i = 2; i < 2+MIPS_SPAREJUMP*2; i += 2) { - if (mcarea[i] == tjump) { - delta = mcarea+i - p; - goto patchbranch; - } else if (mcarea[i] == MIPSI_NOP) { - mcarea[i] = tjump; - cstart = mcarea+i; - delta = mcarea+i - p; - goto patchbranch; - } - } - /* Ignore jump slot overflow. Child trace is simply not attached. */ - } - } else if (p+1 == pe) { - /* Patch NOP after code for inverted loop branch. Use of J is ok. */ - lua_assert(p[1] == MIPSI_NOP); - p[1] = tjump; - *p = MIPSI_NOP; /* Replace the load of the exit number. */ - cstop = p+2; - if (!cstart) cstart = p+1; - } - } - } - if (cstart) lj_mcode_sync(cstart, cstop); - lj_mcode_patch(J, mcarea, 1); -} - diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h deleted file mode 100644 index 6daa861b91..0000000000 --- a/src/lj_asm_ppc.h +++ /dev/null @@ -1,2016 +0,0 @@ -/* -** PPC IR assembler (SSA IR -> machine code). -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -/* -- Register allocator extensions --------------------------------------- */ - -/* Allocate a register with a hint. */ -static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) -{ - Reg r = IR(ref)->r; - if (ra_noreg(r)) { - if (!ra_hashint(r) && !iscrossref(as, ref)) - ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ - r = ra_allocref(as, ref, allow); - } - ra_noweak(as, r); - return r; -} - -/* Allocate two source registers for three-operand instructions. */ -static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) -{ - IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); - Reg left = irl->r, right = irr->r; - if (ra_hasreg(left)) { - ra_noweak(as, left); - if (ra_noreg(right)) - right = ra_allocref(as, ir->op2, rset_exclude(allow, left)); - else - ra_noweak(as, right); - } else if (ra_hasreg(right)) { - ra_noweak(as, right); - left = ra_allocref(as, ir->op1, rset_exclude(allow, right)); - } else if (ra_hashint(right)) { - right = ra_allocref(as, ir->op2, allow); - left = ra_alloc1(as, ir->op1, rset_exclude(allow, right)); - } else { - left = ra_allocref(as, ir->op1, allow); - right = ra_alloc1(as, ir->op2, rset_exclude(allow, left)); - } - return left | (right << 8); -} - -/* -- Guard handling ------------------------------------------------------ */ - -/* Setup exit stubs after the end of each trace. */ -static void asm_exitstub_setup(ASMState *as, ExitNo nexits) -{ - ExitNo i; - MCode *mxp = as->mctop; - if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim) - asm_mclimit(as); - /* 1: mflr r0; bl ->vm_exit_handler; li r0, traceno; bl <1; bl <1; ... */ - for (i = nexits-1; (int32_t)i >= 0; i--) - *--mxp = PPCI_BL|(((-3-i)&0x00ffffffu)<<2); - *--mxp = PPCI_LI|PPCF_T(RID_TMP)|as->T->traceno; /* Read by exit handler. */ - mxp--; - *mxp = PPCI_BL|((((MCode *)(void *)lj_vm_exit_handler-mxp)&0x00ffffffu)<<2); - *--mxp = PPCI_MFLR|PPCF_T(RID_TMP); - as->mctop = mxp; -} - -static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno) -{ - /* Keep this in-sync with exitstub_trace_addr(). */ - return as->mctop + exitno + 3; -} - -/* Emit conditional branch to exit for guard. */ -static void asm_guardcc(ASMState *as, PPCCC cc) -{ - MCode *target = asm_exitstub_addr(as, as->snapno); - MCode *p = as->mcp; - if (LJ_UNLIKELY(p == as->invmcp)) { - as->loopinv = 1; - *p = PPCI_B | (((target-p) & 0x00ffffffu) << 2); - emit_condbranch(as, PPCI_BC, cc^4, p); - return; - } - emit_condbranch(as, PPCI_BC, cc, target); -} - -/* -- Operand fusion ------------------------------------------------------ */ - -/* Limit linear search to this distance. Avoids O(n^2) behavior. */ -#define CONFLICT_SEARCH_LIM 31 - -/* Check if there's no conflicting instruction between curins and ref. */ -static int noconflict(ASMState *as, IRRef ref, IROp conflict) -{ - IRIns *ir = as->ir; - IRRef i = as->curins; - if (i > ref + CONFLICT_SEARCH_LIM) - return 0; /* Give up, ref is too far away. */ - while (--i > ref) - if (ir[i].o == conflict) - return 0; /* Conflict found. */ - return 1; /* Ok, no conflict. */ -} - -/* Fuse the array base of colocated arrays. */ -static int32_t asm_fuseabase(ASMState *as, IRRef ref) -{ - IRIns *ir = IR(ref); - if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE && - !neverfuse(as) && noconflict(as, ref, IR_NEWREF)) - return (int32_t)sizeof(GCtab); - return 0; -} - -/* Indicates load/store indexed is ok. */ -#define AHUREF_LSX ((int32_t)0x80000000) - -/* Fuse array/hash/upvalue reference into register+offset operand. */ -static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) -{ - IRIns *ir = IR(ref); - if (ra_noreg(ir->r)) { - if (ir->o == IR_AREF) { - if (mayfuse(as, ref)) { - if (irref_isk(ir->op2)) { - IRRef tab = IR(ir->op1)->op1; - int32_t ofs = asm_fuseabase(as, tab); - IRRef refa = ofs ? tab : ir->op1; - ofs += 8*IR(ir->op2)->i; - if (checki16(ofs)) { - *ofsp = ofs; - return ra_alloc1(as, refa, allow); - } - } - if (*ofsp == AHUREF_LSX) { - Reg base = ra_alloc1(as, ir->op1, allow); - Reg idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); - return base | (idx << 8); - } - } - } else if (ir->o == IR_HREFK) { - if (mayfuse(as, ref)) { - int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); - if (checki16(ofs)) { - *ofsp = ofs; - return ra_alloc1(as, ir->op1, allow); - } - } - } else if (ir->o == IR_UREFC) { - if (irref_isk(ir->op1)) { - GCfunc *fn = ir_kfunc(IR(ir->op1)); - int32_t ofs = i32ptr(&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv); - int32_t jgl = (intptr_t)J2G(as->J); - if ((uint32_t)(ofs-jgl) < 65536) { - *ofsp = ofs-jgl-32768; - return RID_JGL; - } else { - *ofsp = (int16_t)ofs; - return ra_allock(as, ofs-(int16_t)ofs, allow); - } - } - } - } - *ofsp = 0; - return ra_alloc1(as, ref, allow); -} - -/* Fuse XLOAD/XSTORE reference into load/store operand. */ -static void asm_fusexref(ASMState *as, PPCIns pi, Reg rt, IRRef ref, - RegSet allow, int32_t ofs) -{ - IRIns *ir = IR(ref); - Reg base; - if (ra_noreg(ir->r) && canfuse(as, ir)) { - if (ir->o == IR_ADD) { - int32_t ofs2; - if (irref_isk(ir->op2) && (ofs2 = ofs + IR(ir->op2)->i, checki16(ofs2))) { - ofs = ofs2; - ref = ir->op1; - } else if (ofs == 0) { - Reg right, left = ra_alloc2(as, ir, allow); - right = (left >> 8); left &= 255; - emit_fab(as, PPCI_LWZX | ((pi >> 20) & 0x780), rt, left, right); - return; - } - } else if (ir->o == IR_STRREF) { - lua_assert(ofs == 0); - ofs = (int32_t)sizeof(GCstr); - if (irref_isk(ir->op2)) { - ofs += IR(ir->op2)->i; - ref = ir->op1; - } else if (irref_isk(ir->op1)) { - ofs += IR(ir->op1)->i; - ref = ir->op2; - } else { - /* NYI: Fuse ADD with constant. */ - Reg tmp, right, left = ra_alloc2(as, ir, allow); - right = (left >> 8); left &= 255; - tmp = ra_scratch(as, rset_exclude(rset_exclude(allow, left), right)); - emit_fai(as, pi, rt, tmp, ofs); - emit_tab(as, PPCI_ADD, tmp, left, right); - return; - } - if (!checki16(ofs)) { - Reg left = ra_alloc1(as, ref, allow); - Reg right = ra_allock(as, ofs, rset_exclude(allow, left)); - emit_fab(as, PPCI_LWZX | ((pi >> 20) & 0x780), rt, left, right); - return; - } - } - } - base = ra_alloc1(as, ref, allow); - emit_fai(as, pi, rt, base, ofs); -} - -/* Fuse XLOAD/XSTORE reference into indexed-only load/store operand. */ -static void asm_fusexrefx(ASMState *as, PPCIns pi, Reg rt, IRRef ref, - RegSet allow) -{ - IRIns *ira = IR(ref); - Reg right, left; - if (canfuse(as, ira) && ira->o == IR_ADD && ra_noreg(ira->r)) { - left = ra_alloc2(as, ira, allow); - right = (left >> 8); left &= 255; - } else { - right = ra_alloc1(as, ref, allow); - left = RID_R0; - } - emit_tab(as, pi, rt, left, right); -} - -/* Fuse to multiply-add/sub instruction. */ -static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) -{ - IRRef lref = ir->op1, rref = ir->op2; - IRIns *irm; - if (lref != rref && - ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && - ra_noreg(irm->r)) || - (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && - (rref = lref, pi = pir, ra_noreg(irm->r))))) { - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg add = ra_alloc1(as, rref, RSET_FPR); - Reg right, left = ra_alloc2(as, irm, rset_exclude(RSET_FPR, add)); - right = (left >> 8); left &= 255; - emit_facb(as, pi, dest, left, right, add); - return 1; - } - return 0; -} - -/* -- Calls --------------------------------------------------------------- */ - -/* Generate a call to a C function. */ -static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) -{ - uint32_t n, nargs = CCI_XNARGS(ci); - int32_t ofs = 8; - Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR; - if ((void *)ci->func) - emit_call(as, (void *)ci->func); - for (n = 0; n < nargs; n++) { /* Setup args. */ - IRRef ref = args[n]; - if (ref) { - IRIns *ir = IR(ref); - if (irt_isfp(ir->t)) { - if (fpr <= REGARG_LASTFPR) { - lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ - ra_leftov(as, fpr, ref); - fpr++; - } else { - Reg r = ra_alloc1(as, ref, RSET_FPR); - if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4; - emit_spstore(as, ir, r, ofs); - ofs += irt_isnum(ir->t) ? 8 : 4; - } - } else { - if (gpr <= REGARG_LASTGPR) { - lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ - ra_leftov(as, gpr, ref); - gpr++; - } else { - Reg r = ra_alloc1(as, ref, RSET_GPR); - emit_spstore(as, ir, r, ofs); - ofs += 4; - } - } - } else { - if (gpr <= REGARG_LASTGPR) - gpr++; - else - ofs += 4; - } - checkmclim(as); - } - if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */ - emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6); -} - -/* Setup result reg/sp for call. Evict scratch regs. */ -static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) -{ - RegSet drop = RSET_SCRATCH; - int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); - if ((ci->flags & CCI_NOFPRCLOBBER)) - drop &= ~RSET_FPR; - if (ra_hasreg(ir->r)) - rset_clear(drop, ir->r); /* Dest reg handled below. */ - if (hiop && ra_hasreg((ir+1)->r)) - rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ - ra_evictset(as, drop); /* Evictions must be performed first. */ - if (ra_used(ir)) { - lua_assert(!irt_ispri(ir->t)); - if (irt_isfp(ir->t)) { - if ((ci->flags & CCI_CASTU64)) { - /* Use spill slot or temp slots. */ - int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; - Reg dest = ir->r; - if (ra_hasreg(dest)) { - ra_free(as, dest); - ra_modified(as, dest); - emit_fai(as, PPCI_LFD, dest, RID_SP, ofs); - } - emit_tai(as, PPCI_STW, RID_RETHI, RID_SP, ofs); - emit_tai(as, PPCI_STW, RID_RETLO, RID_SP, ofs+4); - } else { - ra_destreg(as, ir, RID_FPRET); - } -#if LJ_32 - } else if (hiop) { - ra_destpair(as, ir); -#endif - } else { - ra_destreg(as, ir, RID_RET); - } - } -} - -static void asm_callx(ASMState *as, IRIns *ir) -{ - IRRef args[CCI_NARGS_MAX*2]; - CCallInfo ci; - IRRef func; - IRIns *irf; - ci.flags = asm_callx_flags(as, ir); - asm_collectargs(as, ir, &ci, args); - asm_setupresult(as, ir, &ci); - func = ir->op2; irf = IR(func); - if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } - if (irref_isk(func)) { /* Call to constant address. */ - ci.func = (ASMFunction)(void *)(intptr_t)(irf->i); - } else { /* Need a non-argument register for indirect calls. */ - RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1); - Reg freg = ra_alloc1(as, func, allow); - *--as->mcp = PPCI_BCTRL; - *--as->mcp = PPCI_MTCTR | PPCF_T(freg); - ci.func = (ASMFunction)(void *)0; - } - asm_gencall(as, &ci, args); -} - -/* -- Returns ------------------------------------------------------------- */ - -/* Return to lower frame. Guard that it goes to the right spot. */ -static void asm_retf(ASMState *as, IRIns *ir) -{ - Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); - void *pc = ir_kptr(IR(ir->op2)); - int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); - as->topslot -= (BCReg)delta; - if ((int32_t)as->topslot < 0) as->topslot = 0; - irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ - emit_setgl(as, base, jit_base); - emit_addptr(as, base, -8*delta); - asm_guardcc(as, CC_NE); - emit_ab(as, PPCI_CMPW, RID_TMP, - ra_allock(as, i32ptr(pc), rset_exclude(RSET_GPR, base))); - emit_tai(as, PPCI_LWZ, RID_TMP, base, -8); -} - -/* -- Type conversions ---------------------------------------------------- */ - -static void asm_tointg(ASMState *as, IRIns *ir, Reg left) -{ - RegSet allow = RSET_FPR; - Reg tmp = ra_scratch(as, rset_clear(allow, left)); - Reg fbias = ra_scratch(as, rset_clear(allow, tmp)); - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg hibias = ra_allock(as, 0x43300000, rset_exclude(RSET_GPR, dest)); - asm_guardcc(as, CC_NE); - emit_fab(as, PPCI_FCMPU, 0, tmp, left); - emit_fab(as, PPCI_FSUB, tmp, tmp, fbias); - emit_fai(as, PPCI_LFD, tmp, RID_SP, SPOFS_TMP); - emit_tai(as, PPCI_STW, RID_TMP, RID_SP, SPOFS_TMPLO); - emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI); - emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000); - emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); - emit_lsptr(as, PPCI_LFS, (fbias & 31), - (void *)&as->J->k32[LJ_K32_2P52_2P31], RSET_GPR); - emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); - emit_fb(as, PPCI_FCTIWZ, tmp, left); -} - -static void asm_tobit(ASMState *as, IRIns *ir) -{ - RegSet allow = RSET_FPR; - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, allow); - Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); - Reg tmp = ra_scratch(as, rset_clear(allow, right)); - emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); - emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); - emit_fab(as, PPCI_FADD, tmp, left, right); -} - -static void asm_conv(ASMState *as, IRIns *ir) -{ - IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); - int stfp = (st == IRT_NUM || st == IRT_FLOAT); - IRRef lref = ir->op1; - lua_assert(irt_type(ir->t) != st); - lua_assert(!(irt_isint64(ir->t) || - (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ - if (irt_isfp(ir->t)) { - Reg dest = ra_dest(as, ir, RSET_FPR); - if (stfp) { /* FP to FP conversion. */ - if (st == IRT_NUM) /* double -> float conversion. */ - emit_fb(as, PPCI_FRSP, dest, ra_alloc1(as, lref, RSET_FPR)); - else /* float -> double conversion is a no-op on PPC. */ - ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ - } else { /* Integer to FP conversion. */ - /* IRT_INT: Flip hibit, bias with 2^52, subtract 2^52+2^31. */ - /* IRT_U32: Bias with 2^52, subtract 2^52. */ - RegSet allow = RSET_GPR; - Reg left = ra_alloc1(as, lref, allow); - Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left)); - Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); - if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest); - emit_fab(as, PPCI_FSUB, dest, dest, fbias); - emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); - emit_lsptr(as, PPCI_LFS, (fbias & 31), - &as->J->k32[st == IRT_U32 ? LJ_K32_2P52 : LJ_K32_2P52_2P31], - rset_clear(allow, hibias)); - emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP, - RID_SP, SPOFS_TMPLO); - emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI); - if (st != IRT_U32) emit_asi(as, PPCI_XORIS, RID_TMP, left, 0x8000); - } - } else if (stfp) { /* FP to integer conversion. */ - if (irt_isguard(ir->t)) { - /* Checked conversions are only supported from number to int. */ - lua_assert(irt_isint(ir->t) && st == IRT_NUM); - asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); - } else { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, lref, RSET_FPR); - Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); - if (irt_isu32(ir->t)) { - /* Convert both x and x-2^31 to int and merge results. */ - Reg tmpi = ra_scratch(as, rset_exclude(RSET_GPR, dest)); - emit_asb(as, PPCI_OR, dest, dest, tmpi); /* Select with mask idiom. */ - emit_asb(as, PPCI_AND, tmpi, tmpi, RID_TMP); - emit_asb(as, PPCI_ANDC, dest, dest, RID_TMP); - emit_tai(as, PPCI_LWZ, tmpi, RID_SP, SPOFS_TMPLO); /* tmp = (int)(x) */ - emit_tai(as, PPCI_ADDIS, dest, dest, 0x8000); /* dest += 2^31 */ - emit_asb(as, PPCI_SRAWI, RID_TMP, dest, 31); /* mask = -(dest < 0) */ - emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); - emit_tai(as, PPCI_LWZ, dest, - RID_SP, SPOFS_TMPLO); /* dest = (int)(x-2^31) */ - emit_fb(as, PPCI_FCTIWZ, tmp, left); - emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); - emit_fb(as, PPCI_FCTIWZ, tmp, tmp); - emit_fab(as, PPCI_FSUB, tmp, left, tmp); - emit_lsptr(as, PPCI_LFS, (tmp & 31), - (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR); - } else { - emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); - emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); - emit_fb(as, PPCI_FCTIWZ, tmp, left); - } - } - } else { - Reg dest = ra_dest(as, ir, RSET_GPR); - if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); - if ((ir->op2 & IRCONV_SEXT)) - emit_as(as, st == IRT_I8 ? PPCI_EXTSB : PPCI_EXTSH, dest, left); - else - emit_rot(as, PPCI_RLWINM, dest, left, 0, st == IRT_U8 ? 24 : 16, 31); - } else { /* 32/64 bit integer conversions. */ - /* Only need to handle 32/32 bit no-op (cast) on 32 bit archs. */ - ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ - } - } -} - -static void asm_strto(ASMState *as, IRIns *ir) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; - IRRef args[2]; - int32_t ofs; - RegSet drop = RSET_SCRATCH; - if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ - ra_evictset(as, drop); - asm_guardcc(as, CC_EQ); - emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */ - args[0] = ir->op1; /* GCstr *str */ - args[1] = ASMREF_TMP1; /* TValue *n */ - asm_gencall(as, ci, args); - /* Store the result to the spill slot or temp slots. */ - ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; - emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs); -} - -/* -- Memory references --------------------------------------------------- */ - -/* Get pointer to TValue. */ -static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) -{ - IRIns *ir = IR(ref); - if (irt_isnum(ir->t)) { - if (irref_isk(ref)) /* Use the number constant itself as a TValue. */ - ra_allockreg(as, i32ptr(ir_knum(ir)), dest); - else /* Otherwise force a spill and use the spill slot. */ - emit_tai(as, PPCI_ADDI, dest, RID_SP, ra_spill(as, ir)); - } else { - /* Otherwise use g->tmptv to hold the TValue. */ - RegSet allow = rset_exclude(RSET_GPR, dest); - Reg type; - emit_tai(as, PPCI_ADDI, dest, RID_JGL, (int32_t)offsetof(global_State, tmptv)-32768); - if (!irt_ispri(ir->t)) { - Reg src = ra_alloc1(as, ref, allow); - emit_setgl(as, src, tmptv.gcr); - } - type = ra_allock(as, irt_toitype(ir->t), allow); - emit_setgl(as, type, tmptv.it); - } -} - -static void asm_aref(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg idx, base; - if (irref_isk(ir->op2)) { - IRRef tab = IR(ir->op1)->op1; - int32_t ofs = asm_fuseabase(as, tab); - IRRef refa = ofs ? tab : ir->op1; - ofs += 8*IR(ir->op2)->i; - if (checki16(ofs)) { - base = ra_alloc1(as, refa, RSET_GPR); - emit_tai(as, PPCI_ADDI, dest, base, ofs); - return; - } - } - base = ra_alloc1(as, ir->op1, RSET_GPR); - idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); - emit_tab(as, PPCI_ADD, dest, RID_TMP, base); - emit_slwi(as, RID_TMP, idx, 3); -} - -/* Inlined hash lookup. Specialized for key type and for const keys. -** The equivalent C code is: -** Node *n = hashkey(t, key); -** do { -** if (lj_obj_equal(&n->key, key)) return &n->val; -** } while ((n = nextnode(n))); -** return niltv(L); -*/ -static void asm_href(ASMState *as, IRIns *ir, IROp merge) -{ - RegSet allow = RSET_GPR; - int destused = ra_used(ir); - Reg dest = ra_dest(as, ir, allow); - Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); - Reg key = RID_NONE, tmp1 = RID_TMP, tmp2; - Reg tisnum = RID_NONE, tmpnum = RID_NONE; - IRRef refkey = ir->op2; - IRIns *irkey = IR(refkey); - IRType1 kt = irkey->t; - uint32_t khash; - MCLabel l_end, l_loop, l_next; - - rset_clear(allow, tab); - if (irt_isnum(kt)) { - key = ra_alloc1(as, refkey, RSET_FPR); - tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); - tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow); - rset_clear(allow, tisnum); - } else if (!irt_ispri(kt)) { - key = ra_alloc1(as, refkey, allow); - rset_clear(allow, key); - } - tmp2 = ra_scratch(as, allow); - rset_clear(allow, tmp2); - - /* Key not found in chain: jump to exit (if merged) or load niltv. */ - l_end = emit_label(as); - as->invmcp = NULL; - if (merge == IR_NE) - asm_guardcc(as, CC_EQ); - else if (destused) - emit_loada(as, dest, niltvg(J2G(as->J))); - - /* Follow hash chain until the end. */ - l_loop = --as->mcp; - emit_ai(as, PPCI_CMPWI, dest, 0); - emit_tai(as, PPCI_LWZ, dest, dest, (int32_t)offsetof(Node, next)); - l_next = emit_label(as); - - /* Type and value comparison. */ - if (merge == IR_EQ) - asm_guardcc(as, CC_EQ); - else - emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); - if (irt_isnum(kt)) { - emit_fab(as, PPCI_FCMPU, 0, tmpnum, key); - emit_condbranch(as, PPCI_BC, CC_GE, l_next); - emit_ab(as, PPCI_CMPLW, tmp1, tisnum); - emit_fai(as, PPCI_LFD, tmpnum, dest, (int32_t)offsetof(Node, key.n)); - } else { - if (!irt_ispri(kt)) { - emit_ab(as, PPCI_CMPW, tmp2, key); - emit_condbranch(as, PPCI_BC, CC_NE, l_next); - } - emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t)); - if (!irt_ispri(kt)) - emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr)); - } - emit_tai(as, PPCI_LWZ, tmp1, dest, (int32_t)offsetof(Node, key.it)); - *l_loop = PPCI_BC | PPCF_Y | PPCF_CC(CC_NE) | - (((char *)as->mcp-(char *)l_loop) & 0xffffu); - - /* Load main position relative to tab->node into dest. */ - khash = irref_isk(refkey) ? ir_khash(irkey) : 1; - if (khash == 0) { - emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); - } else { - Reg tmphash = tmp1; - if (irref_isk(refkey)) - tmphash = ra_allock(as, khash, allow); - emit_tab(as, PPCI_ADD, dest, dest, tmp1); - emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node)); - emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash); - emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); - emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); - if (irref_isk(refkey)) { - /* Nothing to do. */ - } else if (irt_isstr(kt)) { - emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash)); - } else { /* Must match with hash*() in lj_tab.c. */ - emit_tab(as, PPCI_SUBF, tmp1, tmp2, tmp1); - emit_rotlwi(as, tmp2, tmp2, HASH_ROT3); - emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2); - emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31); - emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2); - if (irt_isnum(kt)) { - int32_t ofs = ra_spill(as, irkey); - emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1); - emit_rotlwi(as, dest, tmp1, HASH_ROT1); - emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1); - emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4); - emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs); - } else { - emit_asb(as, PPCI_XOR, tmp2, key, tmp1); - emit_rotlwi(as, dest, tmp1, HASH_ROT1); - emit_tai(as, PPCI_ADDI, tmp1, tmp2, HASH_BIAS); - emit_tai(as, PPCI_ADDIS, tmp2, key, (HASH_BIAS + 32768)>>16); - } - } - } -} - -static void asm_hrefk(ASMState *as, IRIns *ir) -{ - IRIns *kslot = IR(ir->op2); - IRIns *irkey = IR(kslot->op1); - int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); - int32_t kofs = ofs + (int32_t)offsetof(Node, key); - Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; - Reg node = ra_alloc1(as, ir->op1, RSET_GPR); - Reg key = RID_NONE, type = RID_TMP, idx = node; - RegSet allow = rset_exclude(RSET_GPR, node); - lua_assert(ofs % sizeof(Node) == 0); - if (ofs > 32736) { - idx = dest; - rset_clear(allow, dest); - kofs = (int32_t)offsetof(Node, key); - } else if (ra_hasreg(dest)) { - emit_tai(as, PPCI_ADDI, dest, node, ofs); - } - asm_guardcc(as, CC_NE); - if (!irt_ispri(irkey->t)) { - key = ra_scratch(as, allow); - rset_clear(allow, key); - } - rset_clear(allow, type); - if (irt_isnum(irkey->t)) { - emit_cmpi(as, key, (int32_t)ir_knum(irkey)->u32.lo); - asm_guardcc(as, CC_NE); - emit_cmpi(as, type, (int32_t)ir_knum(irkey)->u32.hi); - } else { - if (ra_hasreg(key)) { - emit_cmpi(as, key, irkey->i); /* May use RID_TMP, i.e. type. */ - asm_guardcc(as, CC_NE); - } - emit_ai(as, PPCI_CMPWI, type, irt_toitype(irkey->t)); - } - if (ra_hasreg(key)) emit_tai(as, PPCI_LWZ, key, idx, kofs+4); - emit_tai(as, PPCI_LWZ, type, idx, kofs); - if (ofs > 32736) { - emit_tai(as, PPCI_ADDIS, dest, dest, (ofs + 32768) >> 16); - emit_tai(as, PPCI_ADDI, dest, node, ofs); - } -} - -static void asm_uref(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - if (irref_isk(ir->op1)) { - GCfunc *fn = ir_kfunc(IR(ir->op1)); - MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; - emit_lsptr(as, PPCI_LWZ, dest, v, RSET_GPR); - } else { - Reg uv = ra_scratch(as, RSET_GPR); - Reg func = ra_alloc1(as, ir->op1, RSET_GPR); - if (ir->o == IR_UREFC) { - asm_guardcc(as, CC_NE); - emit_ai(as, PPCI_CMPWI, RID_TMP, 1); - emit_tai(as, PPCI_ADDI, dest, uv, (int32_t)offsetof(GCupval, tv)); - emit_tai(as, PPCI_LBZ, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); - } else { - emit_tai(as, PPCI_LWZ, dest, uv, (int32_t)offsetof(GCupval, v)); - } - emit_tai(as, PPCI_LWZ, uv, func, - (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); - } -} - -static void asm_fref(ASMState *as, IRIns *ir) -{ - UNUSED(as); UNUSED(ir); - lua_assert(!ra_used(ir)); -} - -static void asm_strref(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - IRRef ref = ir->op2, refk = ir->op1; - int32_t ofs = (int32_t)sizeof(GCstr); - Reg r; - if (irref_isk(ref)) { - IRRef tmp = refk; refk = ref; ref = tmp; - } else if (!irref_isk(refk)) { - Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); - IRIns *irr = IR(ir->op2); - if (ra_hasreg(irr->r)) { - ra_noweak(as, irr->r); - right = irr->r; - } else if (mayfuse(as, irr->op2) && - irr->o == IR_ADD && irref_isk(irr->op2) && - checki16(ofs + IR(irr->op2)->i)) { - ofs += IR(irr->op2)->i; - right = ra_alloc1(as, irr->op1, rset_exclude(RSET_GPR, left)); - } else { - right = ra_allocref(as, ir->op2, rset_exclude(RSET_GPR, left)); - } - emit_tai(as, PPCI_ADDI, dest, dest, ofs); - emit_tab(as, PPCI_ADD, dest, left, right); - return; - } - r = ra_alloc1(as, ref, RSET_GPR); - ofs += IR(refk)->i; - if (checki16(ofs)) - emit_tai(as, PPCI_ADDI, dest, r, ofs); - else - emit_tab(as, PPCI_ADD, dest, r, - ra_allock(as, ofs, rset_exclude(RSET_GPR, r))); -} - -/* -- Loads and stores ---------------------------------------------------- */ - -static PPCIns asm_fxloadins(IRIns *ir) -{ - switch (irt_type(ir->t)) { - case IRT_I8: return PPCI_LBZ; /* Needs sign-extension. */ - case IRT_U8: return PPCI_LBZ; - case IRT_I16: return PPCI_LHA; - case IRT_U16: return PPCI_LHZ; - case IRT_NUM: return PPCI_LFD; - case IRT_FLOAT: return PPCI_LFS; - default: return PPCI_LWZ; - } -} - -static PPCIns asm_fxstoreins(IRIns *ir) -{ - switch (irt_type(ir->t)) { - case IRT_I8: case IRT_U8: return PPCI_STB; - case IRT_I16: case IRT_U16: return PPCI_STH; - case IRT_NUM: return PPCI_STFD; - case IRT_FLOAT: return PPCI_STFS; - default: return PPCI_STW; - } -} - -static void asm_fload(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - PPCIns pi = asm_fxloadins(ir); - Reg idx; - int32_t ofs; - if (ir->op1 == REF_NIL) { - idx = RID_JGL; - ofs = (ir->op2 << 2) - 32768; - } else { - idx = ra_alloc1(as, ir->op1, RSET_GPR); - if (ir->op2 == IRFL_TAB_ARRAY) { - ofs = asm_fuseabase(as, ir->op1); - if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ - emit_tai(as, PPCI_ADDI, dest, idx, ofs); - return; - } - } - ofs = field_ofs[ir->op2]; - } - lua_assert(!irt_isi8(ir->t)); - emit_tai(as, pi, dest, idx, ofs); -} - -static void asm_fstore(ASMState *as, IRIns *ir) -{ - if (ir->r != RID_SINK) { - Reg src = ra_alloc1(as, ir->op2, RSET_GPR); - IRIns *irf = IR(ir->op1); - Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); - int32_t ofs = field_ofs[irf->op2]; - PPCIns pi = asm_fxstoreins(ir); - emit_tai(as, pi, src, idx, ofs); - } -} - -static void asm_xload(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); - lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); - if (irt_isi8(ir->t)) - emit_as(as, PPCI_EXTSB, dest, dest); - asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); -} - -static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) -{ - IRIns *irb; - if (ir->r == RID_SINK) - return; - if (ofs == 0 && mayfuse(as, ir->op2) && (irb = IR(ir->op2))->o == IR_BSWAP && - ra_noreg(irb->r) && (irt_isint(ir->t) || irt_isu32(ir->t))) { - /* Fuse BSWAP with XSTORE to stwbrx. */ - Reg src = ra_alloc1(as, irb->op1, RSET_GPR); - asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src)); - } else { - Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); - asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, - rset_exclude(RSET_GPR, src), ofs); - } -} - -#define asm_xstore(as, ir) asm_xstore_(as, ir, 0) - -static void asm_ahuvload(ASMState *as, IRIns *ir) -{ - IRType1 t = ir->t; - Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx; - RegSet allow = RSET_GPR; - int32_t ofs = AHUREF_LSX; - if (ra_used(ir)) { - lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); - if (!irt_isnum(t)) ofs = 0; - dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); - rset_clear(allow, dest); - } - idx = asm_fuseahuref(as, ir->op1, &ofs, allow); - if (irt_isnum(t)) { - Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, rset_exclude(allow, idx)); - asm_guardcc(as, CC_GE); - emit_ab(as, PPCI_CMPLW, type, tisnum); - if (ra_hasreg(dest)) { - if (ofs == AHUREF_LSX) { - tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, - (idx&255)), (idx>>8))); - emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp); - } else { - emit_fai(as, PPCI_LFD, dest, idx, ofs); - } - } - } else { - asm_guardcc(as, CC_NE); - emit_ai(as, PPCI_CMPWI, type, irt_toitype(t)); - if (ra_hasreg(dest)) emit_tai(as, PPCI_LWZ, dest, idx, ofs+4); - } - if (ofs == AHUREF_LSX) { - emit_tab(as, PPCI_LWZX, type, (idx&255), tmp); - emit_slwi(as, tmp, (idx>>8), 3); - } else { - emit_tai(as, PPCI_LWZ, type, idx, ofs); - } -} - -static void asm_ahustore(ASMState *as, IRIns *ir) -{ - RegSet allow = RSET_GPR; - Reg idx, src = RID_NONE, type = RID_NONE; - int32_t ofs = AHUREF_LSX; - if (ir->r == RID_SINK) - return; - if (irt_isnum(ir->t)) { - src = ra_alloc1(as, ir->op2, RSET_FPR); - } else { - if (!irt_ispri(ir->t)) { - src = ra_alloc1(as, ir->op2, allow); - rset_clear(allow, src); - ofs = 0; - } - type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); - rset_clear(allow, type); - } - idx = asm_fuseahuref(as, ir->op1, &ofs, allow); - if (irt_isnum(ir->t)) { - if (ofs == AHUREF_LSX) { - emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP); - emit_slwi(as, RID_TMP, (idx>>8), 3); - } else { - emit_fai(as, PPCI_STFD, src, idx, ofs); - } - } else { - if (ra_hasreg(src)) - emit_tai(as, PPCI_STW, src, idx, ofs+4); - if (ofs == AHUREF_LSX) { - emit_tab(as, PPCI_STWX, type, (idx&255), RID_TMP); - emit_slwi(as, RID_TMP, (idx>>8), 3); - } else { - emit_tai(as, PPCI_STW, type, idx, ofs); - } - } -} - -static void asm_sload(ASMState *as, IRIns *ir) -{ - int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 0 : 4); - IRType1 t = ir->t; - Reg dest = RID_NONE, type = RID_NONE, base; - RegSet allow = RSET_GPR; - lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ - lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); - lua_assert(LJ_DUALNUM || - !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); - if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { - dest = ra_scratch(as, RSET_FPR); - asm_tointg(as, ir, dest); - t.irt = IRT_NUM; /* Continue with a regular number type check. */ - } else if (ra_used(ir)) { - lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); - dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); - rset_clear(allow, dest); - base = ra_alloc1(as, REF_BASE, allow); - rset_clear(allow, base); - if ((ir->op2 & IRSLOAD_CONVERT)) { - if (irt_isint(t)) { - emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); - dest = ra_scratch(as, RSET_FPR); - emit_fai(as, PPCI_STFD, dest, RID_SP, SPOFS_TMP); - emit_fb(as, PPCI_FCTIWZ, dest, dest); - t.irt = IRT_NUM; /* Check for original type. */ - } else { - Reg tmp = ra_scratch(as, allow); - Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, tmp)); - Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); - emit_fab(as, PPCI_FSUB, dest, dest, fbias); - emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); - emit_lsptr(as, PPCI_LFS, (fbias & 31), - (void *)&as->J->k32[LJ_K32_2P52_2P31], - rset_clear(allow, hibias)); - emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO); - emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI); - emit_asi(as, PPCI_XORIS, tmp, tmp, 0x8000); - dest = tmp; - t.irt = IRT_INT; /* Check for original type. */ - } - } - goto dotypecheck; - } - base = ra_alloc1(as, REF_BASE, allow); - rset_clear(allow, base); -dotypecheck: - if (irt_isnum(t)) { - if ((ir->op2 & IRSLOAD_TYPECHECK)) { - Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow); - asm_guardcc(as, CC_GE); - emit_ab(as, PPCI_CMPLW, RID_TMP, tisnum); - type = RID_TMP; - } - if (ra_hasreg(dest)) emit_fai(as, PPCI_LFD, dest, base, ofs-4); - } else { - if ((ir->op2 & IRSLOAD_TYPECHECK)) { - asm_guardcc(as, CC_NE); - emit_ai(as, PPCI_CMPWI, RID_TMP, irt_toitype(t)); - type = RID_TMP; - } - if (ra_hasreg(dest)) emit_tai(as, PPCI_LWZ, dest, base, ofs); - } - if (ra_hasreg(type)) emit_tai(as, PPCI_LWZ, type, base, ofs-4); -} - -/* -- Allocations --------------------------------------------------------- */ - -#if LJ_HASFFI -static void asm_cnew(ASMState *as, IRIns *ir) -{ - CTState *cts = ctype_ctsG(J2G(as->J)); - CTypeID id = (CTypeID)IR(ir->op1)->i; - CTSize sz; - CTInfo info = lj_ctype_info(cts, id, &sz); - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; - IRRef args[4]; - RegSet drop = RSET_SCRATCH; - lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); - - as->gcsteps++; - if (ra_hasreg(ir->r)) - rset_clear(drop, ir->r); /* Dest reg handled below. */ - ra_evictset(as, drop); - if (ra_used(ir)) - ra_destreg(as, ir, RID_RET); /* GCcdata * */ - - /* Initialize immutable cdata object. */ - if (ir->o == IR_CNEWI) { - RegSet allow = (RSET_GPR & ~RSET_SCRATCH); - int32_t ofs = sizeof(GCcdata); - lua_assert(sz == 4 || sz == 8); - if (sz == 8) { - ofs += 4; - lua_assert((ir+1)->o == IR_HIOP); - } - for (;;) { - Reg r = ra_alloc1(as, ir->op2, allow); - emit_tai(as, PPCI_STW, r, RID_RET, ofs); - rset_clear(allow, r); - if (ofs == sizeof(GCcdata)) break; - ofs -= 4; ir++; - } - } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ - ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ir->op1; /* CTypeID id */ - args[2] = ir->op2; /* CTSize sz */ - args[3] = ASMREF_TMP1; /* CTSize align */ - asm_gencall(as, ci, args); - emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); - return; - } - - /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ - emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); - emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); - emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA); - emit_ti(as, PPCI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */ - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ASMREF_TMP1; /* MSize size */ - asm_gencall(as, ci, args); - ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), - ra_releasetmp(as, ASMREF_TMP1)); -} -#else -#define asm_cnew(as, ir) ((void)0) -#endif - -/* -- Write barriers ------------------------------------------------------ */ - -static void asm_tbar(ASMState *as, IRIns *ir) -{ - Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); - Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab)); - Reg link = RID_TMP; - MCLabel l_end = emit_label(as); - emit_tai(as, PPCI_STW, link, tab, (int32_t)offsetof(GCtab, gclist)); - emit_tai(as, PPCI_STB, mark, tab, (int32_t)offsetof(GCtab, marked)); - emit_setgl(as, tab, gc.grayagain); - lua_assert(LJ_GC_BLACK == 0x04); - emit_rot(as, PPCI_RLWINM, mark, mark, 0, 30, 28); /* Clear black bit. */ - emit_getgl(as, link, gc.grayagain); - emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); - emit_asi(as, PPCI_ANDIDOT, RID_TMP, mark, LJ_GC_BLACK); - emit_tai(as, PPCI_LBZ, mark, tab, (int32_t)offsetof(GCtab, marked)); -} - -static void asm_obar(ASMState *as, IRIns *ir) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; - IRRef args[2]; - MCLabel l_end; - Reg obj, val, tmp; - /* No need for other object barriers (yet). */ - lua_assert(IR(ir->op1)->o == IR_UREFC); - ra_evictset(as, RSET_SCRATCH); - l_end = emit_label(as); - args[0] = ASMREF_TMP1; /* global_State *g */ - args[1] = ir->op1; /* TValue *tv */ - asm_gencall(as, ci, args); - emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); - obj = IR(ir->op1)->r; - tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); - emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); - emit_asi(as, PPCI_ANDIDOT, tmp, tmp, LJ_GC_BLACK); - emit_condbranch(as, PPCI_BC, CC_EQ, l_end); - emit_asi(as, PPCI_ANDIDOT, RID_TMP, RID_TMP, LJ_GC_WHITES); - val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); - emit_tai(as, PPCI_LBZ, tmp, obj, - (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); - emit_tai(as, PPCI_LBZ, RID_TMP, val, (int32_t)offsetof(GChead, marked)); -} - -/* -- Arithmetic and logic operations ------------------------------------- */ - -static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi) -{ - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = (left >> 8); left &= 255; - if (pi == PPCI_FMUL) - emit_fac(as, pi, dest, left, right); - else - emit_fab(as, pi, dest, left, right); -} - -static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi) -{ - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); - emit_fb(as, pi, dest, left); -} - -static void asm_fpmath(ASMState *as, IRIns *ir) -{ - if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) - return; - if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT)) - asm_fpunary(as, ir, PPCI_FSQRT); - else - asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); -} - -static void asm_add(ASMState *as, IRIns *ir) -{ - if (irt_isnum(ir->t)) { - if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD)) - asm_fparith(as, ir, PPCI_FADD); - } else { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - PPCIns pi; - if (irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; - if (checki16(k)) { - pi = PPCI_ADDI; - /* May fail due to spills/restores above, but simplifies the logic. */ - if (as->flagmcp == as->mcp) { - as->flagmcp = NULL; - as->mcp++; - pi = PPCI_ADDICDOT; - } - emit_tai(as, pi, dest, left, k); - return; - } else if ((k & 0xffff) == 0) { - emit_tai(as, PPCI_ADDIS, dest, left, (k >> 16)); - return; - } else if (!as->sectref) { - emit_tai(as, PPCI_ADDIS, dest, dest, (k + 32768) >> 16); - emit_tai(as, PPCI_ADDI, dest, left, k); - return; - } - } - pi = PPCI_ADD; - /* May fail due to spills/restores above, but simplifies the logic. */ - if (as->flagmcp == as->mcp) { - as->flagmcp = NULL; - as->mcp++; - pi |= PPCF_DOT; - } - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_tab(as, pi, dest, left, right); - } -} - -static void asm_sub(ASMState *as, IRIns *ir) -{ - if (irt_isnum(ir->t)) { - if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB)) - asm_fparith(as, ir, PPCI_FSUB); - } else { - PPCIns pi = PPCI_SUBF; - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left, right; - if (irref_isk(ir->op1)) { - int32_t k = IR(ir->op1)->i; - if (checki16(k)) { - right = ra_alloc1(as, ir->op2, RSET_GPR); - emit_tai(as, PPCI_SUBFIC, dest, right, k); - return; - } - } - /* May fail due to spills/restores above, but simplifies the logic. */ - if (as->flagmcp == as->mcp) { - as->flagmcp = NULL; - as->mcp++; - pi |= PPCF_DOT; - } - left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_tab(as, pi, dest, right, left); /* Subtract right _from_ left. */ - } -} - -static void asm_mul(ASMState *as, IRIns *ir) -{ - if (irt_isnum(ir->t)) { - asm_fparith(as, ir, PPCI_FMUL); - } else { - PPCIns pi = PPCI_MULLW; - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - if (irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; - if (checki16(k)) { - emit_tai(as, PPCI_MULLI, dest, left, k); - return; - } - } - /* May fail due to spills/restores above, but simplifies the logic. */ - if (as->flagmcp == as->mcp) { - as->flagmcp = NULL; - as->mcp++; - pi |= PPCF_DOT; - } - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_tab(as, pi, dest, left, right); - } -} - -#define asm_div(as, ir) asm_fparith(as, ir, PPCI_FDIV) -#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi) -#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi) - -static void asm_neg(ASMState *as, IRIns *ir) -{ - if (irt_isnum(ir->t)) { - asm_fpunary(as, ir, PPCI_FNEG); - } else { - Reg dest, left; - PPCIns pi = PPCI_NEG; - if (as->flagmcp == as->mcp) { - as->flagmcp = NULL; - as->mcp++; - pi |= PPCF_DOT; - } - dest = ra_dest(as, ir, RSET_GPR); - left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - emit_tab(as, pi, dest, left, 0); - } -} - -#define asm_abs(as, ir) asm_fpunary(as, ir, PPCI_FABS) -#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) -#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) - -static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) -{ - Reg dest, left, right; - if (as->flagmcp == as->mcp) { - as->flagmcp = NULL; - as->mcp++; - } - asm_guardcc(as, CC_SO); - dest = ra_dest(as, ir, RSET_GPR); - left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - if (pi == PPCI_SUBFO) { Reg tmp = left; left = right; right = tmp; } - emit_tab(as, pi|PPCF_DOT, dest, left, right); -} - -#define asm_addov(as, ir) asm_arithov(as, ir, PPCI_ADDO) -#define asm_subov(as, ir) asm_arithov(as, ir, PPCI_SUBFO) -#define asm_mulov(as, ir) asm_arithov(as, ir, PPCI_MULLWO) - -#if LJ_HASFFI -static void asm_add64(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); - PPCIns pi = PPCI_ADDE; - if (irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; - if (k == 0) - pi = PPCI_ADDZE; - else if (k == -1) - pi = PPCI_ADDME; - else - goto needright; - right = 0; - } else { - needright: - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - } - emit_tab(as, pi, dest, left, right); - ir--; - dest = ra_dest(as, ir, RSET_GPR); - left = ra_alloc1(as, ir->op1, RSET_GPR); - if (irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; - if (checki16(k)) { - emit_tai(as, PPCI_ADDIC, dest, left, k); - return; - } - } - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_tab(as, PPCI_ADDC, dest, left, right); -} - -static void asm_sub64(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left, right = ra_alloc1(as, ir->op2, RSET_GPR); - PPCIns pi = PPCI_SUBFE; - if (irref_isk(ir->op1)) { - int32_t k = IR(ir->op1)->i; - if (k == 0) - pi = PPCI_SUBFZE; - else if (k == -1) - pi = PPCI_SUBFME; - else - goto needleft; - left = 0; - } else { - needleft: - left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, right)); - } - emit_tab(as, pi, dest, right, left); /* Subtract right _from_ left. */ - ir--; - dest = ra_dest(as, ir, RSET_GPR); - right = ra_alloc1(as, ir->op2, RSET_GPR); - if (irref_isk(ir->op1)) { - int32_t k = IR(ir->op1)->i; - if (checki16(k)) { - emit_tai(as, PPCI_SUBFIC, dest, right, k); - return; - } - } - left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, right)); - emit_tab(as, PPCI_SUBFC, dest, right, left); -} - -static void asm_neg64(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - emit_tab(as, PPCI_SUBFZE, dest, left, 0); - ir--; - dest = ra_dest(as, ir, RSET_GPR); - left = ra_alloc1(as, ir->op1, RSET_GPR); - emit_tai(as, PPCI_SUBFIC, dest, left, 0); -} -#endif - -static void asm_bnot(ASMState *as, IRIns *ir) -{ - Reg dest, left, right; - PPCIns pi = PPCI_NOR; - if (as->flagmcp == as->mcp) { - as->flagmcp = NULL; - as->mcp++; - pi |= PPCF_DOT; - } - dest = ra_dest(as, ir, RSET_GPR); - if (mayfuse(as, ir->op1)) { - IRIns *irl = IR(ir->op1); - if (irl->o == IR_BAND) - pi ^= (PPCI_NOR ^ PPCI_NAND); - else if (irl->o == IR_BXOR) - pi ^= (PPCI_NOR ^ PPCI_EQV); - else if (irl->o != IR_BOR) - goto nofuse; - left = ra_hintalloc(as, irl->op1, dest, RSET_GPR); - right = ra_alloc1(as, irl->op2, rset_exclude(RSET_GPR, left)); - } else { -nofuse: - left = right = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - } - emit_asb(as, pi, dest, left, right); -} - -static void asm_bswap(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - IRIns *irx; - if (mayfuse(as, ir->op1) && (irx = IR(ir->op1))->o == IR_XLOAD && - ra_noreg(irx->r) && (irt_isint(irx->t) || irt_isu32(irx->t))) { - /* Fuse BSWAP with XLOAD to lwbrx. */ - asm_fusexrefx(as, PPCI_LWBRX, dest, irx->op1, RSET_GPR); - } else { - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - Reg tmp = dest; - if (tmp == left) { - tmp = RID_TMP; - emit_mr(as, dest, RID_TMP); - } - emit_rot(as, PPCI_RLWIMI, tmp, left, 24, 16, 23); - emit_rot(as, PPCI_RLWIMI, tmp, left, 24, 0, 7); - emit_rotlwi(as, tmp, left, 8); - } -} - -/* Fuse BAND with contiguous bitmask and a shift to rlwinm. */ -static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref) -{ - IRIns *ir; - Reg left; - if (mayfuse(as, ref) && (ir = IR(ref), ra_noreg(ir->r)) && - irref_isk(ir->op2) && ir->o >= IR_BSHL && ir->o <= IR_BROR) { - int32_t sh = (IR(ir->op2)->i & 31); - switch (ir->o) { - case IR_BSHL: - if ((mask & ((1u<>sh))) goto nofuse; - sh = ((32-sh)&31); - break; - case IR_BROL: - break; - default: - goto nofuse; - } - left = ra_alloc1(as, ir->op1, RSET_GPR); - *--as->mcp = pi | PPCF_T(left) | PPCF_B(sh); - return; - } -nofuse: - left = ra_alloc1(as, ref, RSET_GPR); - *--as->mcp = pi | PPCF_T(left); -} - -static void asm_band(ASMState *as, IRIns *ir) -{ - Reg dest, left, right; - IRRef lref = ir->op1; - PPCIns dot = 0; - IRRef op2; - if (as->flagmcp == as->mcp) { - as->flagmcp = NULL; - as->mcp++; - dot = PPCF_DOT; - } - dest = ra_dest(as, ir, RSET_GPR); - if (irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; - if (k) { - /* First check for a contiguous bitmask as used by rlwinm. */ - uint32_t s1 = lj_ffs((uint32_t)k); - uint32_t k1 = ((uint32_t)k >> s1); - if ((k1 & (k1+1)) == 0) { - asm_fuseandsh(as, PPCI_RLWINM|dot | PPCF_A(dest) | - PPCF_MB(31-lj_fls((uint32_t)k)) | PPCF_ME(31-s1), - k, lref); - return; - } - if (~(uint32_t)k) { - uint32_t s2 = lj_ffs(~(uint32_t)k); - uint32_t k2 = (~(uint32_t)k >> s2); - if ((k2 & (k2+1)) == 0) { - asm_fuseandsh(as, PPCI_RLWINM|dot | PPCF_A(dest) | - PPCF_MB(32-s2) | PPCF_ME(30-lj_fls(~(uint32_t)k)), - k, lref); - return; - } - } - } - if (checku16(k)) { - left = ra_alloc1(as, lref, RSET_GPR); - emit_asi(as, PPCI_ANDIDOT, dest, left, k); - return; - } else if ((k & 0xffff) == 0) { - left = ra_alloc1(as, lref, RSET_GPR); - emit_asi(as, PPCI_ANDISDOT, dest, left, (k >> 16)); - return; - } - } - op2 = ir->op2; - if (mayfuse(as, op2) && IR(op2)->o == IR_BNOT && ra_noreg(IR(op2)->r)) { - dot ^= (PPCI_AND ^ PPCI_ANDC); - op2 = IR(op2)->op1; - } - left = ra_hintalloc(as, lref, dest, RSET_GPR); - right = ra_alloc1(as, op2, rset_exclude(RSET_GPR, left)); - emit_asb(as, PPCI_AND ^ dot, dest, left, right); -} - -static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - if (irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; - Reg tmp = left; - if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) { - if (!checku16(k)) { - emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16)); - if ((k & 0xffff) == 0) return; - } - emit_asi(as, pik, dest, left, k); - return; - } - } - /* May fail due to spills/restores above, but simplifies the logic. */ - if (as->flagmcp == as->mcp) { - as->flagmcp = NULL; - as->mcp++; - pi |= PPCF_DOT; - } - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_asb(as, pi, dest, left, right); -} - -#define asm_bor(as, ir) asm_bitop(as, ir, PPCI_OR, PPCI_ORI) -#define asm_bxor(as, ir) asm_bitop(as, ir, PPCI_XOR, PPCI_XORI) - -static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) -{ - Reg dest, left; - Reg dot = 0; - if (as->flagmcp == as->mcp) { - as->flagmcp = NULL; - as->mcp++; - dot = PPCF_DOT; - } - dest = ra_dest(as, ir, RSET_GPR); - left = ra_alloc1(as, ir->op1, RSET_GPR); - if (irref_isk(ir->op2)) { /* Constant shifts. */ - int32_t shift = (IR(ir->op2)->i & 31); - if (pik == 0) /* SLWI */ - emit_rot(as, PPCI_RLWINM|dot, dest, left, shift, 0, 31-shift); - else if (pik == 1) /* SRWI */ - emit_rot(as, PPCI_RLWINM|dot, dest, left, (32-shift)&31, shift, 31); - else - emit_asb(as, pik|dot, dest, left, shift); - } else { - Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_asb(as, pi|dot, dest, left, right); - } -} - -#define asm_bshl(as, ir) asm_bitshift(as, ir, PPCI_SLW, 0) -#define asm_bshr(as, ir) asm_bitshift(as, ir, PPCI_SRW, 1) -#define asm_bsar(as, ir) asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI) -#define asm_brol(as, ir) \ - asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), \ - PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)) -#define asm_bror(as, ir) lua_assert(0) - -static void asm_min_max(ASMState *as, IRIns *ir, int ismax) -{ - if (irt_isnum(ir->t)) { - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg tmp = dest; - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = (left >> 8); left &= 255; - if (tmp == left || tmp == right) - tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_FPR, - dest), left), right)); - emit_facb(as, PPCI_FSEL, dest, tmp, - ismax ? left : right, ismax ? right : left); - emit_fab(as, PPCI_FSUB, tmp, left, right); - } else { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg tmp1 = RID_TMP, tmp2 = dest; - Reg right, left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - if (tmp2 == left || tmp2 == right) - tmp2 = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, - dest), left), right)); - emit_tab(as, PPCI_ADD, dest, tmp2, right); - emit_asb(as, ismax ? PPCI_ANDC : PPCI_AND, tmp2, tmp2, tmp1); - emit_tab(as, PPCI_SUBFE, tmp1, tmp1, tmp1); - emit_tab(as, PPCI_SUBFC, tmp2, tmp2, tmp1); - emit_asi(as, PPCI_XORIS, tmp2, right, 0x8000); - emit_asi(as, PPCI_XORIS, tmp1, left, 0x8000); - } -} - -#define asm_min(as, ir) asm_min_max(as, ir, 0) -#define asm_max(as, ir) asm_min_max(as, ir, 1) - -/* -- Comparisons --------------------------------------------------------- */ - -#define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */ -#define CC_TWO 0x80 /* Check two flags for FP comparison. */ - -/* Map of comparisons to flags. ORDER IR. */ -static const uint8_t asm_compmap[IR_ABC+1] = { - /* op int cc FP cc */ - /* LT */ CC_GE + (CC_GE<<4), - /* GE */ CC_LT + (CC_LE<<4) + CC_TWO, - /* LE */ CC_GT + (CC_GE<<4) + CC_TWO, - /* GT */ CC_LE + (CC_LE<<4), - /* ULT */ CC_GE + CC_UNSIGNED + (CC_GT<<4) + CC_TWO, - /* UGE */ CC_LT + CC_UNSIGNED + (CC_LT<<4), - /* ULE */ CC_GT + CC_UNSIGNED + (CC_GT<<4), - /* UGT */ CC_LE + CC_UNSIGNED + (CC_LT<<4) + CC_TWO, - /* EQ */ CC_NE + (CC_NE<<4), - /* NE */ CC_EQ + (CC_EQ<<4), - /* ABC */ CC_LE + CC_UNSIGNED + (CC_LT<<4) + CC_TWO /* Same as UGT. */ -}; - -static void asm_intcomp_(ASMState *as, IRRef lref, IRRef rref, Reg cr, PPCCC cc) -{ - Reg right, left = ra_alloc1(as, lref, RSET_GPR); - if (irref_isk(rref)) { - int32_t k = IR(rref)->i; - if ((cc & CC_UNSIGNED) == 0) { /* Signed comparison with constant. */ - if (checki16(k)) { - emit_tai(as, PPCI_CMPWI, cr, left, k); - /* Signed comparison with zero and referencing previous ins? */ - if (k == 0 && lref == as->curins-1) - as->flagmcp = as->mcp; /* Allow elimination of the compare. */ - return; - } else if ((cc & 3) == (CC_EQ & 3)) { /* Use CMPLWI for EQ or NE. */ - if (checku16(k)) { - emit_tai(as, PPCI_CMPLWI, cr, left, k); - return; - } else if (!as->sectref && ra_noreg(IR(rref)->r)) { - emit_tai(as, PPCI_CMPLWI, cr, RID_TMP, k); - emit_asi(as, PPCI_XORIS, RID_TMP, left, (k >> 16)); - return; - } - } - } else { /* Unsigned comparison with constant. */ - if (checku16(k)) { - emit_tai(as, PPCI_CMPLWI, cr, left, k); - return; - } - } - } - right = ra_alloc1(as, rref, rset_exclude(RSET_GPR, left)); - emit_tab(as, (cc & CC_UNSIGNED) ? PPCI_CMPLW : PPCI_CMPW, cr, left, right); -} - -static void asm_comp(ASMState *as, IRIns *ir) -{ - PPCCC cc = asm_compmap[ir->o]; - if (irt_isnum(ir->t)) { - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = (left >> 8); left &= 255; - asm_guardcc(as, (cc >> 4)); - if ((cc & CC_TWO)) - emit_tab(as, PPCI_CROR, ((cc>>4)&3), ((cc>>4)&3), (CC_EQ&3)); - emit_fab(as, PPCI_FCMPU, 0, left, right); - } else { - IRRef lref = ir->op1, rref = ir->op2; - if (irref_isk(lref) && !irref_isk(rref)) { - /* Swap constants to the right (only for ABC). */ - IRRef tmp = lref; lref = rref; rref = tmp; - if ((cc & 2) == 0) cc ^= 1; /* LT <-> GT, LE <-> GE */ - } - asm_guardcc(as, cc); - asm_intcomp_(as, lref, rref, 0, cc); - } -} - -#define asm_equal(as, ir) asm_comp(as, ir) - -#if LJ_HASFFI -/* 64 bit integer comparisons. */ -static void asm_comp64(ASMState *as, IRIns *ir) -{ - PPCCC cc = asm_compmap[(ir-1)->o]; - if ((cc&3) == (CC_EQ&3)) { - asm_guardcc(as, cc); - emit_tab(as, (cc&4) ? PPCI_CRAND : PPCI_CROR, - (CC_EQ&3), (CC_EQ&3), 4+(CC_EQ&3)); - } else { - asm_guardcc(as, CC_EQ); - emit_tab(as, PPCI_CROR, (CC_EQ&3), (CC_EQ&3), ((cc^~(cc>>2))&1)); - emit_tab(as, (cc&4) ? PPCI_CRAND : PPCI_CRANDC, - (CC_EQ&3), (CC_EQ&3), 4+(cc&3)); - } - /* Loword comparison sets cr1 and is unsigned, except for equality. */ - asm_intcomp_(as, (ir-1)->op1, (ir-1)->op2, 4, - cc | ((cc&3) == (CC_EQ&3) ? 0 : CC_UNSIGNED)); - /* Hiword comparison sets cr0. */ - asm_intcomp_(as, ir->op1, ir->op2, 0, cc); - as->flagmcp = NULL; /* Doesn't work here. */ -} -#endif - -/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ - -/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ -static void asm_hiop(ASMState *as, IRIns *ir) -{ -#if LJ_HASFFI - /* HIOP is marked as a store because it needs its own DCE logic. */ - int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ - if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; - if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ - as->curins--; /* Always skip the CONV. */ - if (usehi || uselo) - asm_conv64(as, ir); - return; - } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ - as->curins--; /* Always skip the loword comparison. */ - asm_comp64(as, ir); - return; - } else if ((ir-1)->o == IR_XSTORE) { - as->curins--; /* Handle both stores here. */ - if ((ir-1)->r != RID_SINK) { - asm_xstore_(as, ir, 0); - asm_xstore_(as, ir-1, 4); - } - return; - } - if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ - switch ((ir-1)->o) { - case IR_ADD: as->curins--; asm_add64(as, ir); break; - case IR_SUB: as->curins--; asm_sub64(as, ir); break; - case IR_NEG: as->curins--; asm_neg64(as, ir); break; - case IR_CALLN: - case IR_CALLXS: - if (!uselo) - ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ - break; - case IR_CNEWI: - /* Nothing to do here. Handled by lo op itself. */ - break; - default: lua_assert(0); break; - } -#else - UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused without FFI. */ -#endif -} - -/* -- Profiling ----------------------------------------------------------- */ - -static void asm_prof(ASMState *as, IRIns *ir) -{ - UNUSED(ir); - asm_guardcc(as, CC_NE); - emit_asi(as, PPCI_ANDIDOT, RID_TMP, RID_TMP, HOOK_PROFILE); - emit_lsglptr(as, PPCI_LBZ, RID_TMP, - (int32_t)offsetof(global_State, hookmask)); -} - -/* -- Stack handling ------------------------------------------------------ */ - -/* Check Lua stack size for overflow. Use exit handler as fallback. */ -static void asm_stack_check(ASMState *as, BCReg topslot, - IRIns *irp, RegSet allow, ExitNo exitno) -{ - /* Try to get an unused temp. register, otherwise spill/restore RID_RET*. */ - Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE; - rset_clear(allow, pbase); - tmp = allow ? rset_pickbot(allow) : - (pbase == RID_RETHI ? RID_RETLO : RID_RETHI); - emit_condbranch(as, PPCI_BC, CC_LT, asm_exitstub_addr(as, exitno)); - if (allow == RSET_EMPTY) /* Restore temp. register. */ - emit_tai(as, PPCI_LWZ, tmp, RID_SP, SPOFS_TMPW); - else - ra_modified(as, tmp); - emit_ai(as, PPCI_CMPLWI, RID_TMP, (int32_t)(8*topslot)); - emit_tab(as, PPCI_SUBF, RID_TMP, pbase, tmp); - emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack)); - if (pbase == RID_TMP) - emit_getgl(as, RID_TMP, jit_base); - emit_getgl(as, tmp, cur_L); - if (allow == RSET_EMPTY) /* Spill temp. register. */ - emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW); -} - -/* Restore Lua stack from on-trace state. */ -static void asm_stack_restore(ASMState *as, SnapShot *snap) -{ - SnapEntry *map = &as->T->snapmap[snap->mapofs]; - SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1]; - MSize n, nent = snap->nent; - /* Store the value of all modified slots to the Lua stack. */ - for (n = 0; n < nent; n++) { - SnapEntry sn = map[n]; - BCReg s = snap_slot(sn); - int32_t ofs = 8*((int32_t)s-1); - IRRef ref = snap_ref(sn); - IRIns *ir = IR(ref); - if ((sn & SNAP_NORESTORE)) - continue; - if (irt_isnum(ir->t)) { - Reg src = ra_alloc1(as, ref, RSET_FPR); - emit_fai(as, PPCI_STFD, src, RID_BASE, ofs); - } else { - Reg type; - RegSet allow = rset_exclude(RSET_GPR, RID_BASE); - lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); - if (!irt_ispri(ir->t)) { - Reg src = ra_alloc1(as, ref, allow); - rset_clear(allow, src); - emit_tai(as, PPCI_STW, src, RID_BASE, ofs+4); - } - if ((sn & (SNAP_CONT|SNAP_FRAME))) { - if (s == 0) continue; /* Do not overwrite link to previous frame. */ - type = ra_allock(as, (int32_t)(*flinks--), allow); - } else { - type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); - } - emit_tai(as, PPCI_STW, type, RID_BASE, ofs); - } - checkmclim(as); - } - lua_assert(map + nent == flinks); -} - -/* -- GC handling --------------------------------------------------------- */ - -/* Check GC threshold and do one or more GC steps. */ -static void asm_gc_check(ASMState *as) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; - IRRef args[2]; - MCLabel l_end; - Reg tmp; - ra_evictset(as, RSET_SCRATCH); - l_end = emit_label(as); - /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ - asm_guardcc(as, CC_NE); /* Assumes asm_snap_prep() already done. */ - emit_ai(as, PPCI_CMPWI, RID_RET, 0); - args[0] = ASMREF_TMP1; /* global_State *g */ - args[1] = ASMREF_TMP2; /* MSize steps */ - asm_gencall(as, ci, args); - emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); - tmp = ra_releasetmp(as, ASMREF_TMP2); - emit_loadi(as, tmp, as->gcsteps); - /* Jump around GC step if GC total < GC threshold. */ - emit_condbranch(as, PPCI_BC|PPCF_Y, CC_LT, l_end); - emit_ab(as, PPCI_CMPLW, RID_TMP, tmp); - emit_getgl(as, tmp, gc.threshold); - emit_getgl(as, RID_TMP, gc.total); - as->gcsteps = 0; - checkmclim(as); -} - -/* -- Loop handling ------------------------------------------------------- */ - -/* Fixup the loop branch. */ -static void asm_loop_fixup(ASMState *as) -{ - MCode *p = as->mctop; - MCode *target = as->mcp; - if (as->loopinv) { /* Inverted loop branch? */ - /* asm_guardcc already inverted the cond branch and patched the final b. */ - p[-2] = (p[-2] & (0xffff0000u & ~PPCF_Y)) | (((target-p+2) & 0x3fffu) << 2); - } else { - p[-1] = PPCI_B|(((target-p+1)&0x00ffffffu)<<2); - } -} - -/* -- Head of trace ------------------------------------------------------- */ - -/* Coalesce BASE register for a root trace. */ -static void asm_head_root_base(ASMState *as) -{ - IRIns *ir = IR(REF_BASE); - Reg r = ir->r; - if (ra_hasreg(r)) { - ra_free(as, r); - if (rset_test(as->modset, r) || irt_ismarked(ir->t)) - ir->r = RID_INIT; /* No inheritance for modified BASE register. */ - if (r != RID_BASE) - emit_mr(as, r, RID_BASE); - } -} - -/* Coalesce BASE register for a side trace. */ -static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) -{ - IRIns *ir = IR(REF_BASE); - Reg r = ir->r; - if (ra_hasreg(r)) { - ra_free(as, r); - if (rset_test(as->modset, r) || irt_ismarked(ir->t)) - ir->r = RID_INIT; /* No inheritance for modified BASE register. */ - if (irp->r == r) { - rset_clear(allow, r); /* Mark same BASE register as coalesced. */ - } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { - rset_clear(allow, irp->r); - emit_mr(as, r, irp->r); /* Move from coalesced parent reg. */ - } else { - emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ - } - } - return allow; -} - -/* -- Tail of trace ------------------------------------------------------- */ - -/* Fixup the tail code. */ -static void asm_tail_fixup(ASMState *as, TraceNo lnk) -{ - MCode *p = as->mctop; - MCode *target; - int32_t spadj = as->T->spadjust; - if (spadj == 0) { - *--p = PPCI_NOP; - *--p = PPCI_NOP; - as->mctop = p; - } else { - /* Patch stack adjustment. */ - lua_assert(checki16(CFRAME_SIZE+spadj)); - p[-3] = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj); - p[-2] = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj; - } - /* Patch exit branch. */ - target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; - p[-1] = PPCI_B|(((target-p+1)&0x00ffffffu)<<2); -} - -/* Prepare tail of code. */ -static void asm_tail_prep(ASMState *as) -{ - MCode *p = as->mctop - 1; /* Leave room for exit branch. */ - if (as->loopref) { - as->invmcp = as->mcp = p; - } else { - as->mcp = p-2; /* Leave room for stack pointer adjustment. */ - as->invmcp = NULL; - } -} - -/* -- Trace setup --------------------------------------------------------- */ - -/* Ensure there are enough stack slots for call arguments. */ -static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) -{ - IRRef args[CCI_NARGS_MAX*2]; - uint32_t i, nargs = CCI_XNARGS(ci); - int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; - asm_collectargs(as, ir, ci, args); - for (i = 0; i < nargs; i++) - if (args[i] && irt_isfp(IR(args[i])->t)) { - if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1; - } else { - if (ngpr > 0) ngpr--; else nslots++; - } - if (nslots > as->evenspill) /* Leave room for args in stack slots. */ - as->evenspill = nslots; - return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET); -} - -static void asm_setup_target(ASMState *as) -{ - asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0)); -} - -/* -- Trace patching ------------------------------------------------------ */ - -/* Patch exit jumps of existing machine code to a new target. */ -void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) -{ - MCode *p = T->mcode; - MCode *pe = (MCode *)((char *)p + T->szmcode); - MCode *px = exitstub_trace_addr(T, exitno); - MCode *cstart = NULL; - MCode *mcarea = lj_mcode_patch(J, p, 0); - int clearso = 0; - for (; p < pe; p++) { - /* Look for exitstub branch, try to replace with branch to target. */ - uint32_t ins = *p; - if ((ins & 0xfc000000u) == 0x40000000u && - ((ins ^ ((char *)px-(char *)p)) & 0xffffu) == 0) { - ptrdiff_t delta = (char *)target - (char *)p; - if (((ins >> 16) & 3) == (CC_SO&3)) { - clearso = sizeof(MCode); - delta -= sizeof(MCode); - } - /* Many, but not all short-range branches can be patched directly. */ - if (((delta + 0x8000) >> 16) == 0) { - *p = (ins & 0xffdf0000u) | ((uint32_t)delta & 0xffffu) | - ((delta & 0x8000) * (PPCF_Y/0x8000)); - if (!cstart) cstart = p; - } - } else if ((ins & 0xfc000000u) == PPCI_B && - ((ins ^ ((char *)px-(char *)p)) & 0x03ffffffu) == 0) { - ptrdiff_t delta = (char *)target - (char *)p; - lua_assert(((delta + 0x02000000) >> 26) == 0); - *p = PPCI_B | ((uint32_t)delta & 0x03ffffffu); - if (!cstart) cstart = p; - } - } - { /* Always patch long-range branch in exit stub itself. */ - ptrdiff_t delta = (char *)target - (char *)px - clearso; - lua_assert(((delta + 0x02000000) >> 26) == 0); - *px = PPCI_B | ((uint32_t)delta & 0x03ffffffu); - } - if (!cstart) cstart = px; - lj_mcode_sync(cstart, px+1); - if (clearso) { /* Extend the current trace. Ugly workaround. */ - MCode *pp = J->cur.mcode; - J->cur.szmcode += sizeof(MCode); - *--pp = PPCI_MCRXR; /* Clear SO flag. */ - J->cur.mcode = pp; - lj_mcode_sync(pp, pp+1); - } - lj_mcode_patch(J, mcarea, 1); -} - diff --git a/src/lj_emit_arm.h b/src/lj_emit_arm.h deleted file mode 100644 index dee8bdccd1..0000000000 --- a/src/lj_emit_arm.h +++ /dev/null @@ -1,357 +0,0 @@ -/* -** ARM instruction emitter. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -/* -- Constant encoding --------------------------------------------------- */ - -static uint8_t emit_invai[16] = { - /* AND */ (ARMI_AND^ARMI_BIC) >> 21, - /* EOR */ 0, - /* SUB */ (ARMI_SUB^ARMI_ADD) >> 21, - /* RSB */ 0, - /* ADD */ (ARMI_ADD^ARMI_SUB) >> 21, - /* ADC */ (ARMI_ADC^ARMI_SBC) >> 21, - /* SBC */ (ARMI_SBC^ARMI_ADC) >> 21, - /* RSC */ 0, - /* TST */ 0, - /* TEQ */ 0, - /* CMP */ (ARMI_CMP^ARMI_CMN) >> 21, - /* CMN */ (ARMI_CMN^ARMI_CMP) >> 21, - /* ORR */ 0, - /* MOV */ (ARMI_MOV^ARMI_MVN) >> 21, - /* BIC */ (ARMI_BIC^ARMI_AND) >> 21, - /* MVN */ (ARMI_MVN^ARMI_MOV) >> 21 -}; - -/* Encode constant in K12 format for data processing instructions. */ -static uint32_t emit_isk12(ARMIns ai, int32_t n) -{ - uint32_t invai, i, m = (uint32_t)n; - /* K12: unsigned 8 bit value, rotated in steps of two bits. */ - for (i = 0; i < 4096; i += 256, m = lj_rol(m, 2)) - if (m <= 255) return ARMI_K12|m|i; - /* Otherwise try negation/complement with the inverse instruction. */ - invai = emit_invai[((ai >> 21) & 15)]; - if (!invai) return 0; /* Failed. No inverse instruction. */ - m = ~(uint32_t)n; - if (invai == ((ARMI_SUB^ARMI_ADD) >> 21) || - invai == (ARMI_CMP^ARMI_CMN) >> 21) m++; - for (i = 0; i < 4096; i += 256, m = lj_rol(m, 2)) - if (m <= 255) return ARMI_K12|(invai<<21)|m|i; - return 0; /* Failed. */ -} - -/* -- Emit basic instructions --------------------------------------------- */ - -static void emit_dnm(ASMState *as, ARMIns ai, Reg rd, Reg rn, Reg rm) -{ - *--as->mcp = ai | ARMF_D(rd) | ARMF_N(rn) | ARMF_M(rm); -} - -static void emit_dm(ASMState *as, ARMIns ai, Reg rd, Reg rm) -{ - *--as->mcp = ai | ARMF_D(rd) | ARMF_M(rm); -} - -static void emit_dn(ASMState *as, ARMIns ai, Reg rd, Reg rn) -{ - *--as->mcp = ai | ARMF_D(rd) | ARMF_N(rn); -} - -static void emit_nm(ASMState *as, ARMIns ai, Reg rn, Reg rm) -{ - *--as->mcp = ai | ARMF_N(rn) | ARMF_M(rm); -} - -static void emit_d(ASMState *as, ARMIns ai, Reg rd) -{ - *--as->mcp = ai | ARMF_D(rd); -} - -static void emit_n(ASMState *as, ARMIns ai, Reg rn) -{ - *--as->mcp = ai | ARMF_N(rn); -} - -static void emit_m(ASMState *as, ARMIns ai, Reg rm) -{ - *--as->mcp = ai | ARMF_M(rm); -} - -static void emit_lsox(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) -{ - lua_assert(ofs >= -255 && ofs <= 255); - if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U; - *--as->mcp = ai | ARMI_LS_P | ARMI_LSX_I | ARMF_D(rd) | ARMF_N(rn) | - ((ofs & 0xf0) << 4) | (ofs & 0x0f); -} - -static void emit_lso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) -{ - lua_assert(ofs >= -4095 && ofs <= 4095); - /* Combine LDR/STR pairs to LDRD/STRD. */ - if (*as->mcp == (ai|ARMI_LS_P|ARMI_LS_U|ARMF_D(rd^1)|ARMF_N(rn)|(ofs^4)) && - (ai & ~(ARMI_LDR^ARMI_STR)) == ARMI_STR && rd != rn && - (uint32_t)ofs <= 252 && !(ofs & 3) && !((rd ^ (ofs >>2)) & 1) && - as->mcp != as->mcloop) { - as->mcp++; - emit_lsox(as, ai == ARMI_LDR ? ARMI_LDRD : ARMI_STRD, rd&~1, rn, ofs&~4); - return; - } - if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U; - *--as->mcp = ai | ARMI_LS_P | ARMF_D(rd) | ARMF_N(rn) | ofs; -} - -#if !LJ_SOFTFP -static void emit_vlso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) -{ - lua_assert(ofs >= -1020 && ofs <= 1020 && (ofs&3) == 0); - if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U; - *--as->mcp = ai | ARMI_LS_P | ARMF_D(rd & 15) | ARMF_N(rn) | (ofs >> 2); -} -#endif - -/* -- Emit loads/stores --------------------------------------------------- */ - -/* Prefer spills of BASE/L. */ -#define emit_canremat(ref) ((ref) < ASMREF_L) - -/* Try to find a one step delta relative to another constant. */ -static int emit_kdelta1(ASMState *as, Reg d, int32_t i) -{ - RegSet work = ~as->freeset & RSET_GPR; - while (work) { - Reg r = rset_picktop(work); - IRRef ref = regcost_ref(as->cost[r]); - lua_assert(r != d); - if (emit_canremat(ref)) { - int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i); - uint32_t k = emit_isk12(ARMI_ADD, delta); - if (k) { - if (k == ARMI_K12) - emit_dm(as, ARMI_MOV, d, r); - else - emit_dn(as, ARMI_ADD^k, d, r); - return 1; - } - } - rset_clear(work, r); - } - return 0; /* Failed. */ -} - -/* Try to find a two step delta relative to another constant. */ -static int emit_kdelta2(ASMState *as, Reg d, int32_t i) -{ - RegSet work = ~as->freeset & RSET_GPR; - while (work) { - Reg r = rset_picktop(work); - IRRef ref = regcost_ref(as->cost[r]); - lua_assert(r != d); - if (emit_canremat(ref)) { - int32_t other = ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i; - if (other) { - int32_t delta = i - other; - uint32_t sh, inv = 0, k2, k; - if (delta < 0) { delta = -delta; inv = ARMI_ADD^ARMI_SUB; } - sh = lj_ffs(delta) & ~1; - k2 = emit_isk12(0, delta & (255 << sh)); - k = emit_isk12(0, delta & ~(255 << sh)); - if (k) { - emit_dn(as, ARMI_ADD^k2^inv, d, d); - emit_dn(as, ARMI_ADD^k^inv, d, r); - return 1; - } - } - } - rset_clear(work, r); - } - return 0; /* Failed. */ -} - -/* Load a 32 bit constant into a GPR. */ -static void emit_loadi(ASMState *as, Reg r, int32_t i) -{ - uint32_t k = emit_isk12(ARMI_MOV, i); - lua_assert(rset_test(as->freeset, r) || r == RID_TMP); - if (k) { - /* Standard K12 constant. */ - emit_d(as, ARMI_MOV^k, r); - } else if ((as->flags & JIT_F_ARMV6T2) && (uint32_t)i < 0x00010000u) { - /* 16 bit loword constant for ARMv6T2. */ - emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), r); - } else if (emit_kdelta1(as, r, i)) { - /* One step delta relative to another constant. */ - } else if ((as->flags & JIT_F_ARMV6T2)) { - /* 32 bit hiword/loword constant for ARMv6T2. */ - emit_d(as, ARMI_MOVT|((i>>16) & 0x0fff)|(((i>>16) & 0xf000)<<4), r); - emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), r); - } else if (emit_kdelta2(as, r, i)) { - /* Two step delta relative to another constant. */ - } else { - /* Otherwise construct the constant with up to 4 instructions. */ - /* NYI: use mvn+bic, use pc-relative loads. */ - for (;;) { - uint32_t sh = lj_ffs(i) & ~1; - int32_t m = i & (255 << sh); - i &= ~(255 << sh); - if (i == 0) { - emit_d(as, ARMI_MOV ^ emit_isk12(0, m), r); - break; - } - emit_dn(as, ARMI_ORR ^ emit_isk12(0, m), r, r); - } - } -} - -#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) - -static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); - -/* Get/set from constant pointer. */ -static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p) -{ - int32_t i = i32ptr(p); - emit_lso(as, ai, r, ra_allock(as, (i & ~4095), rset_exclude(RSET_GPR, r)), - (i & 4095)); -} - -#if !LJ_SOFTFP -/* Load a number constant into an FPR. */ -static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) -{ - cTValue *tv = ir_knum(ir); - int32_t i; - if ((as->flags & JIT_F_VFPV3) && !tv->u32.lo) { - uint32_t hi = tv->u32.hi; - uint32_t b = ((hi >> 22) & 0x1ff); - if (!(hi & 0xffff) && (b == 0x100 || b == 0x0ff)) { - *--as->mcp = ARMI_VMOVI_D | ARMF_D(r & 15) | - ((tv->u32.hi >> 12) & 0x00080000) | - ((tv->u32.hi >> 4) & 0x00070000) | - ((tv->u32.hi >> 16) & 0x0000000f); - return; - } - } - i = i32ptr(tv); - emit_vlso(as, ARMI_VLDR_D, r, - ra_allock(as, (i & ~1020), RSET_GPR), (i & 1020)); -} -#endif - -/* Get/set global_State fields. */ -#define emit_getgl(as, r, field) \ - emit_lsptr(as, ARMI_LDR, (r), (void *)&J2G(as->J)->field) -#define emit_setgl(as, r, field) \ - emit_lsptr(as, ARMI_STR, (r), (void *)&J2G(as->J)->field) - -/* Trace number is determined from pc of exit instruction. */ -#define emit_setvmstate(as, i) UNUSED(i) - -/* -- Emit control-flow instructions -------------------------------------- */ - -/* Label for internal jumps. */ -typedef MCode *MCLabel; - -/* Return label pointing to current PC. */ -#define emit_label(as) ((as)->mcp) - -static void emit_branch(ASMState *as, ARMIns ai, MCode *target) -{ - MCode *p = as->mcp; - ptrdiff_t delta = (target - p) - 1; - lua_assert(((delta + 0x00800000) >> 24) == 0); - *--p = ai | ((uint32_t)delta & 0x00ffffffu); - as->mcp = p; -} - -#define emit_jmp(as, target) emit_branch(as, ARMI_B, (target)) - -static void emit_call(ASMState *as, void *target) -{ - MCode *p = --as->mcp; - ptrdiff_t delta = ((char *)target - (char *)p) - 8; - if ((((delta>>2) + 0x00800000) >> 24) == 0) { - if ((delta & 1)) - *p = ARMI_BLX | ((uint32_t)(delta>>2) & 0x00ffffffu) | ((delta&2) << 23); - else - *p = ARMI_BL | ((uint32_t)(delta>>2) & 0x00ffffffu); - } else { /* Target out of range: need indirect call. But don't use R0-R3. */ - Reg r = ra_allock(as, i32ptr(target), RSET_RANGE(RID_R4, RID_R12+1)); - *p = ARMI_BLXr | ARMF_M(r); - } -} - -/* -- Emit generic operations --------------------------------------------- */ - -/* Generic move between two regs. */ -static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) -{ -#if LJ_SOFTFP - lua_assert(!irt_isnum(ir->t)); UNUSED(ir); -#else - if (dst >= RID_MAX_GPR) { - emit_dm(as, irt_isnum(ir->t) ? ARMI_VMOV_D : ARMI_VMOV_S, - (dst & 15), (src & 15)); - return; - } -#endif - if (as->mcp != as->mcloop) { /* Swap early registers for loads/stores. */ - MCode ins = *as->mcp, swp = (src^dst); - if ((ins & 0x0c000000) == 0x04000000 && (ins & 0x02000010) != 0x02000010) { - if (!((ins ^ (dst << 16)) & 0x000f0000)) - *as->mcp = ins ^ (swp << 16); /* Swap N in load/store. */ - if (!(ins & 0x00100000) && !((ins ^ (dst << 12)) & 0x0000f000)) - *as->mcp = ins ^ (swp << 12); /* Swap D in store. */ - } - } - emit_dm(as, ARMI_MOV, dst, src); -} - -/* Generic load of register with base and (small) offset address. */ -static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) -{ -#if LJ_SOFTFP - lua_assert(!irt_isnum(ir->t)); UNUSED(ir); -#else - if (r >= RID_MAX_GPR) - emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, base, ofs); - else -#endif - emit_lso(as, ARMI_LDR, r, base, ofs); -} - -/* Generic store of register with base and (small) offset address. */ -static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) -{ -#if LJ_SOFTFP - lua_assert(!irt_isnum(ir->t)); UNUSED(ir); -#else - if (r >= RID_MAX_GPR) - emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, base, ofs); - else -#endif - emit_lso(as, ARMI_STR, r, base, ofs); -} - -/* Emit an arithmetic/logic operation with a constant operand. */ -static void emit_opk(ASMState *as, ARMIns ai, Reg dest, Reg src, - int32_t i, RegSet allow) -{ - uint32_t k = emit_isk12(ai, i); - if (k) - emit_dn(as, ai^k, dest, src); - else - emit_dnm(as, ai, dest, src, ra_allock(as, i, allow)); -} - -/* Add offset to pointer. */ -static void emit_addptr(ASMState *as, Reg r, int32_t ofs) -{ - if (ofs) - emit_opk(as, ARMI_ADD, r, r, ofs, rset_exclude(RSET_GPR, r)); -} - -#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs)) - diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h deleted file mode 100644 index cfa18c83c2..0000000000 --- a/src/lj_emit_arm64.h +++ /dev/null @@ -1,419 +0,0 @@ -/* -** ARM64 instruction emitter. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -** -** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. -** Sponsored by Cisco Systems, Inc. -*/ - -/* -- Constant encoding --------------------------------------------------- */ - -static uint64_t get_k64val(IRIns *ir) -{ - if (ir->o == IR_KINT64) { - return ir_kint64(ir)->u64; - } else if (ir->o == IR_KGC) { - return (uint64_t)ir_kgc(ir); - } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { - return (uint64_t)ir_kptr(ir); - } else { - lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL); - return ir->i; /* Sign-extended. */ - } -} - -/* Encode constant in K12 format for data processing instructions. */ -static uint32_t emit_isk12(int64_t n) -{ - uint64_t k = (n < 0) ? -n : n; - uint32_t m = (n < 0) ? 0x40000000 : 0; - if (k < 0x1000) { - return A64I_K12|m|A64F_U12(k); - } else if ((k & 0xfff000) == k) { - return A64I_K12|m|0x400000|A64F_U12(k>>12); - } - return 0; -} - -#define emit_clz64(n) __builtin_clzll(n) -#define emit_ctz64(n) __builtin_ctzll(n) - -/* Encode constant in K13 format for logical data processing instructions. */ -static uint32_t emit_isk13(uint64_t n, int is64) -{ - int inv = 0, w = 128, lz, tz; - if (n & 1) { n = ~n; w = 64; inv = 1; } /* Avoid wrap-around of ones. */ - if (!n) return 0; /* Neither all-zero nor all-ones are allowed. */ - do { /* Find the repeat width. */ - if (is64 && (uint32_t)(n^(n>>32))) break; - n = (uint32_t)n; - if (!n) return 0; /* Ditto when passing n=0xffffffff and is64=0. */ - w = 32; if ((n^(n>>16)) & 0xffff) break; - n = n & 0xffff; w = 16; if ((n^(n>>8)) & 0xff) break; - n = n & 0xff; w = 8; if ((n^(n>>4)) & 0xf) break; - n = n & 0xf; w = 4; if ((n^(n>>2)) & 0x3) break; - n = n & 0x3; w = 2; - } while (0); - lz = emit_clz64(n); - tz = emit_ctz64(n); - if ((int64_t)(n << lz) >> (lz+tz) != -1ll) return 0; /* Non-contiguous? */ - if (inv) - return A64I_K13 | (((lz-w) & 127) << 16) | (((lz+tz-w-1) & 63) << 10); - else - return A64I_K13 | ((w-tz) << 16) | (((63-lz-tz-w-w) & 63) << 10); -} - -static uint32_t emit_isfpk64(uint64_t n) -{ - uint64_t etop9 = ((n >> 54) & 0x1ff); - if ((n << 16) == 0 && (etop9 == 0x100 || etop9 == 0x0ff)) { - return (uint32_t)(((n >> 48) & 0x7f) | ((n >> 56) & 0x80)); - } - return ~0u; -} - -/* -- Emit basic instructions --------------------------------------------- */ - -static void emit_dnma(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm, Reg ra) -{ - *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm) | A64F_A(ra); -} - -static void emit_dnm(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm) -{ - *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm); -} - -static void emit_dm(ASMState *as, A64Ins ai, Reg rd, Reg rm) -{ - *--as->mcp = ai | A64F_D(rd) | A64F_M(rm); -} - -static void emit_dn(ASMState *as, A64Ins ai, Reg rd, Reg rn) -{ - *--as->mcp = ai | A64F_D(rd) | A64F_N(rn); -} - -static void emit_nm(ASMState *as, A64Ins ai, Reg rn, Reg rm) -{ - *--as->mcp = ai | A64F_N(rn) | A64F_M(rm); -} - -static void emit_d(ASMState *as, A64Ins ai, Reg rd) -{ - *--as->mcp = ai | A64F_D(rd); -} - -static void emit_n(ASMState *as, A64Ins ai, Reg rn) -{ - *--as->mcp = ai | A64F_N(rn); -} - -static int emit_checkofs(A64Ins ai, int64_t ofs) -{ - int scale = (ai >> 30) & 3; - if (ofs < 0 || (ofs & ((1<= -256 && ofs <= 255) ? -1 : 0; - } else { - return (ofs < (4096<> 30) & 3; - lua_assert(ot); - /* Combine LDR/STR pairs to LDP/STP. */ - if ((sc == 2 || sc == 3) && - (!(ai & 0x400000) || rd != rn) && - as->mcp != as->mcloop) { - uint32_t prev = *as->mcp & ~A64F_D(31); - int ofsm = ofs - (1<>sc)) || - prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsm&0x1ff))) { - aip = (A64F_A(rd) | A64F_D(*as->mcp & 31)); - } else if (prev == (ai | A64F_N(rn) | A64F_U12(ofsp>>sc)) || - prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsp&0x1ff))) { - aip = (A64F_D(rd) | A64F_A(*as->mcp & 31)); - ofsm = ofs; - } else { - goto nopair; - } - if (ofsm >= (-64<mcp = aip | A64F_N(rn) | ((ofsm >> sc) << 15) | - (ai ^ ((ai == A64I_LDRx || ai == A64I_STRx) ? 0x50000000 : 0x90000000)); - return; - } - } -nopair: - if (ot == 1) - *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_U12(ofs >> sc); - else - *--as->mcp = (ai^A64I_LS_U) | A64F_D(rd) | A64F_N(rn) | A64F_S9(ofs & 0x1ff); -} - -/* -- Emit loads/stores --------------------------------------------------- */ - -/* Prefer rematerialization of BASE/L from global_State over spills. */ -#define emit_canremat(ref) ((ref) <= ASMREF_L) - -/* Try to find an N-step delta relative to other consts with N < lim. */ -static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim) -{ - RegSet work = ~as->freeset & RSET_GPR; - if (lim <= 1) return 0; /* Can't beat that. */ - while (work) { - Reg r = rset_picktop(work); - IRRef ref = regcost_ref(as->cost[r]); - lua_assert(r != rd); - if (ref < REF_TRUE) { - uint64_t kx = ra_iskref(ref) ? (uint64_t)ra_krefk(as, ref) : - get_k64val(IR(ref)); - int64_t delta = (int64_t)(k - kx); - if (delta == 0) { - emit_dm(as, A64I_MOVx, rd, r); - return 1; - } else { - uint32_t k12 = emit_isk12(delta < 0 ? -delta : delta); - if (k12) { - emit_dn(as, (delta < 0 ? A64I_SUBx : A64I_ADDx)^k12, rd, r); - return 1; - } - /* Do other ops or multi-step deltas pay off? Probably not. - ** E.g. XOR rarely helps with pointer consts. - */ - } - } - rset_clear(work, r); - } - return 0; /* Failed. */ -} - -static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64) -{ - uint32_t k13 = emit_isk13(u64, is64); - if (k13) { /* Can the constant be represented as a bitmask immediate? */ - emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO); - } else { - int i, zeros = 0, ones = 0, neg; - if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */ - /* Count homogeneous 16 bit fragments. */ - for (i = 0; i < 4; i++) { - uint64_t frag = (u64 >> i*16) & 0xffff; - zeros += (frag == 0); - ones += (frag == 0xffff); - } - neg = ones > zeros; /* Use MOVN if it pays off. */ - if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) { - int shift = 0, lshift = 0; - uint64_t n64 = neg ? ~u64 : u64; - if (n64 != 0) { - /* Find first/last fragment to be filled. */ - shift = (63-emit_clz64(n64)) & ~15; - lshift = emit_ctz64(n64) & ~15; - } - /* MOVK requires the original value (u64). */ - while (shift > lshift) { - uint32_t u16 = (u64 >> shift) & 0xffff; - /* Skip fragments that are correctly filled by MOVN/MOVZ. */ - if (u16 != (neg ? 0xffff : 0)) - emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd); - shift -= 16; - } - /* But MOVN needs an inverted value (n64). */ - emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) | - A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd); - } - } -} - -/* Load a 32 bit constant into a GPR. */ -#define emit_loadi(as, rd, i) emit_loadk(as, rd, i, 0) - -/* Load a 64 bit constant into a GPR. */ -#define emit_loadu64(as, rd, i) emit_loadk(as, rd, i, A64I_X) - -#define emit_loada(as, r, addr) emit_loadu64(as, (r), (uintptr_t)(addr)) - -#define glofs(as, k) \ - ((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g)) -#define mcpofs(as, k) \ - ((intptr_t)((uintptr_t)(k) - (uintptr_t)(as->mcp - 1))) -#define checkmcpofs(as, k) \ - ((((mcpofs(as, k)>>2) + 0x00040000) >> 19) == 0) - -static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); - -/* Get/set from constant pointer. */ -static void emit_lsptr(ASMState *as, A64Ins ai, Reg r, void *p) -{ - /* First, check if ip + offset is in range. */ - if ((ai & 0x00400000) && checkmcpofs(as, p)) { - emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, p)>>2), r); - } else { - Reg base = RID_GL; /* Next, try GL + offset. */ - int64_t ofs = glofs(as, p); - if (!emit_checkofs(ai, ofs)) { /* Else split up into base reg + offset. */ - int64_t i64 = i64ptr(p); - base = ra_allock(as, (i64 & ~0x7fffull), rset_exclude(RSET_GPR, r)); - ofs = i64 & 0x7fffull; - } - emit_lso(as, ai, r, base, ofs); - } -} - -/* Load 64 bit IR constant into register. */ -static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) -{ - const uint64_t *k = &ir_k64(ir)->u64; - int64_t ofs; - if (r >= RID_MAX_GPR) { - uint32_t fpk = emit_isfpk64(*k); - if (fpk != ~0u) { - emit_d(as, A64I_FMOV_DI | A64F_FP8(fpk), (r & 31)); - return; - } - } - ofs = glofs(as, k); - if (emit_checkofs(A64I_LDRx, ofs)) { - emit_lso(as, r >= RID_MAX_GPR ? A64I_LDRd : A64I_LDRx, - (r & 31), RID_GL, ofs); - } else { - if (r >= RID_MAX_GPR) { - emit_dn(as, A64I_FMOV_D_R, (r & 31), RID_TMP); - r = RID_TMP; - } - if (checkmcpofs(as, k)) - emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, k)>>2), r); - else - emit_loadu64(as, r, *k); - } -} - -/* Get/set global_State fields. */ -#define emit_getgl(as, r, field) \ - emit_lsptr(as, A64I_LDRx, (r), (void *)&J2G(as->J)->field) -#define emit_setgl(as, r, field) \ - emit_lsptr(as, A64I_STRx, (r), (void *)&J2G(as->J)->field) - -/* Trace number is determined from pc of exit instruction. */ -#define emit_setvmstate(as, i) UNUSED(i) - -/* -- Emit control-flow instructions -------------------------------------- */ - -/* Label for internal jumps. */ -typedef MCode *MCLabel; - -/* Return label pointing to current PC. */ -#define emit_label(as) ((as)->mcp) - -static void emit_cond_branch(ASMState *as, A64CC cond, MCode *target) -{ - MCode *p = --as->mcp; - ptrdiff_t delta = target - p; - lua_assert(((delta + 0x40000) >> 19) == 0); - *p = A64I_BCC | A64F_S19(delta) | cond; -} - -static void emit_branch(ASMState *as, A64Ins ai, MCode *target) -{ - MCode *p = --as->mcp; - ptrdiff_t delta = target - p; - lua_assert(((delta + 0x02000000) >> 26) == 0); - *p = ai | ((uint32_t)delta & 0x03ffffffu); -} - -static void emit_tnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit, MCode *target) -{ - MCode *p = --as->mcp; - ptrdiff_t delta = target - p; - lua_assert(bit < 63 && ((delta + 0x2000) >> 14) == 0); - if (bit > 31) ai |= A64I_X; - *p = ai | A64F_BIT(bit & 31) | A64F_S14((uint32_t)delta & 0x3fffu) | r; -} - -static void emit_cnb(ASMState *as, A64Ins ai, Reg r, MCode *target) -{ - MCode *p = --as->mcp; - ptrdiff_t delta = target - p; - lua_assert(((delta + 0x40000) >> 19) == 0); - *p = ai | A64F_S19(delta) | r; -} - -#define emit_jmp(as, target) emit_branch(as, A64I_B, (target)) - -static void emit_call(ASMState *as, void *target) -{ - MCode *p = --as->mcp; - ptrdiff_t delta = (char *)target - (char *)p; - if ((((delta>>2) + 0x02000000) >> 26) == 0) { - *p = A64I_BL | ((uint32_t)(delta>>2) & 0x03ffffffu); - } else { /* Target out of range: need indirect call. But don't use R0-R7. */ - Reg r = ra_allock(as, i64ptr(target), - RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED); - *p = A64I_BLR | A64F_N(r); - } -} - -/* -- Emit generic operations --------------------------------------------- */ - -/* Generic move between two regs. */ -static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) -{ - if (dst >= RID_MAX_GPR) { - emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D : A64I_FMOV_S, - (dst & 31), (src & 31)); - return; - } - if (as->mcp != as->mcloop) { /* Swap early registers for loads/stores. */ - MCode ins = *as->mcp, swp = (src^dst); - if ((ins & 0xbf800000) == 0xb9000000) { - if (!((ins ^ (dst << 5)) & 0x000003e0)) - *as->mcp = ins ^ (swp << 5); /* Swap N in load/store. */ - if (!(ins & 0x00400000) && !((ins ^ dst) & 0x0000001f)) - *as->mcp = ins ^ swp; /* Swap D in store. */ - } - } - emit_dm(as, A64I_MOVx, dst, src); -} - -/* Generic load of register with base and (small) offset address. */ -static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) -{ - if (r >= RID_MAX_GPR) - emit_lso(as, irt_isnum(ir->t) ? A64I_LDRd : A64I_LDRs, (r & 31), base, ofs); - else - emit_lso(as, irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw, r, base, ofs); -} - -/* Generic store of register with base and (small) offset address. */ -static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) -{ - if (r >= RID_MAX_GPR) - emit_lso(as, irt_isnum(ir->t) ? A64I_STRd : A64I_STRs, (r & 31), base, ofs); - else - emit_lso(as, irt_is64(ir->t) ? A64I_STRx : A64I_STRw, r, base, ofs); -} - -/* Emit an arithmetic operation with a constant operand. */ -static void emit_opk(ASMState *as, A64Ins ai, Reg dest, Reg src, - int32_t i, RegSet allow) -{ - uint32_t k = emit_isk12(i); - if (k) - emit_dn(as, ai^k, dest, src); - else - emit_dnm(as, ai, dest, src, ra_allock(as, i, allow)); -} - -/* Add offset to pointer. */ -static void emit_addptr(ASMState *as, Reg r, int32_t ofs) -{ - if (ofs) - emit_opk(as, ofs < 0 ? A64I_SUBx : A64I_ADDx, r, r, - ofs < 0 ? -ofs : ofs, rset_exclude(RSET_GPR, r)); -} - -#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs)) - diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h deleted file mode 100644 index 8a9ee24dce..0000000000 --- a/src/lj_emit_mips.h +++ /dev/null @@ -1,293 +0,0 @@ -/* -** MIPS instruction emitter. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -#if LJ_64 -static intptr_t get_k64val(IRIns *ir) -{ - if (ir->o == IR_KINT64) { - return (intptr_t)ir_kint64(ir)->u64; - } else if (ir->o == IR_KGC) { - return (intptr_t)ir_kgc(ir); - } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { - return (intptr_t)ir_kptr(ir); - } else { - lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL); - return ir->i; /* Sign-extended. */ - } -} -#endif - -#if LJ_64 -#define get_kval(ir) get_k64val(ir) -#else -#define get_kval(ir) ((ir)->i) -#endif - -/* -- Emit basic instructions --------------------------------------------- */ - -static void emit_dst(ASMState *as, MIPSIns mi, Reg rd, Reg rs, Reg rt) -{ - *--as->mcp = mi | MIPSF_D(rd) | MIPSF_S(rs) | MIPSF_T(rt); -} - -static void emit_dta(ASMState *as, MIPSIns mi, Reg rd, Reg rt, uint32_t a) -{ - *--as->mcp = mi | MIPSF_D(rd) | MIPSF_T(rt) | MIPSF_A(a); -} - -#define emit_ds(as, mi, rd, rs) emit_dst(as, (mi), (rd), (rs), 0) -#define emit_tg(as, mi, rt, rg) emit_dst(as, (mi), (rg)&31, 0, (rt)) - -static void emit_tsi(ASMState *as, MIPSIns mi, Reg rt, Reg rs, int32_t i) -{ - *--as->mcp = mi | MIPSF_T(rt) | MIPSF_S(rs) | (i & 0xffff); -} - -#define emit_ti(as, mi, rt, i) emit_tsi(as, (mi), (rt), 0, (i)) -#define emit_hsi(as, mi, rh, rs, i) emit_tsi(as, (mi), (rh) & 31, (rs), (i)) - -static void emit_fgh(ASMState *as, MIPSIns mi, Reg rf, Reg rg, Reg rh) -{ - *--as->mcp = mi | MIPSF_F(rf&31) | MIPSF_G(rg&31) | MIPSF_H(rh&31); -} - -#define emit_fg(as, mi, rf, rg) emit_fgh(as, (mi), (rf), (rg), 0) - -static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift) -{ - if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) { - emit_dta(as, MIPSI_ROTR, dest, src, shift); - } else { - emit_dst(as, MIPSI_OR, dest, dest, tmp); - emit_dta(as, MIPSI_SLL, dest, src, (-shift)&31); - emit_dta(as, MIPSI_SRL, tmp, src, shift); - } -} - -#if LJ_64 -static void emit_tsml(ASMState *as, MIPSIns mi, Reg rt, Reg rs, uint32_t msb, - uint32_t lsb) -{ - *--as->mcp = mi | MIPSF_T(rt) | MIPSF_S(rs) | MIPSF_M(msb) | MIPSF_L(lsb); -} -#endif - -/* -- Emit loads/stores --------------------------------------------------- */ - -/* Prefer rematerialization of BASE/L from global_State over spills. */ -#define emit_canremat(ref) ((ref) <= REF_BASE) - -/* Try to find a one step delta relative to another constant. */ -static int emit_kdelta1(ASMState *as, Reg t, intptr_t i) -{ - RegSet work = ~as->freeset & RSET_GPR; - while (work) { - Reg r = rset_picktop(work); - IRRef ref = regcost_ref(as->cost[r]); - lua_assert(r != t); - if (ref < ASMREF_L) { - intptr_t delta = (intptr_t)((uintptr_t)i - - (uintptr_t)(ra_iskref(ref) ? ra_krefk(as, ref) : get_kval(IR(ref)))); - if (checki16(delta)) { - emit_tsi(as, MIPSI_AADDIU, t, r, delta); - return 1; - } - } - rset_clear(work, r); - } - return 0; /* Failed. */ -} - -/* Load a 32 bit constant into a GPR. */ -static void emit_loadi(ASMState *as, Reg r, int32_t i) -{ - if (checki16(i)) { - emit_ti(as, MIPSI_LI, r, i); - } else { - if ((i & 0xffff)) { - intptr_t jgl = (intptr_t)(void *)J2G(as->J); - if ((uintptr_t)(i-jgl) < 65536) { - emit_tsi(as, MIPSI_ADDIU, r, RID_JGL, i-jgl-32768); - return; - } else if (emit_kdelta1(as, r, i)) { - return; - } else if ((i >> 16) == 0) { - emit_tsi(as, MIPSI_ORI, r, RID_ZERO, i); - return; - } - emit_tsi(as, MIPSI_ORI, r, r, i); - } - emit_ti(as, MIPSI_LUI, r, (i >> 16)); - } -} - -#if LJ_64 -/* Load a 64 bit constant into a GPR. */ -static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) -{ - if (checki32((int64_t)u64)) { - emit_loadi(as, r, (int32_t)u64); - } else { - uint64_t delta = u64 - (uint64_t)(void *)J2G(as->J); - if (delta < 65536) { - emit_tsi(as, MIPSI_DADDIU, r, RID_JGL, (int32_t)(delta-32768)); - } else if (emit_kdelta1(as, r, (intptr_t)u64)) { - return; - } else { - if ((u64 & 0xffff)) { - emit_tsi(as, MIPSI_ORI, r, r, u64 & 0xffff); - } - if (((u64 >> 16) & 0xffff)) { - emit_dta(as, MIPSI_DSLL, r, r, 16); - emit_tsi(as, MIPSI_ORI, r, r, (u64 >> 16) & 0xffff); - emit_dta(as, MIPSI_DSLL, r, r, 16); - } else { - emit_dta(as, MIPSI_DSLL32, r, r, 0); - } - emit_loadi(as, r, (int32_t)(u64 >> 32)); - } - /* TODO: There are probably more optimization opportunities. */ - } -} - -#define emit_loada(as, r, addr) emit_loadu64(as, (r), u64ptr((addr))) -#else -#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) -#endif - -static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); -static void ra_allockreg(ASMState *as, intptr_t k, Reg r); - -/* Get/set from constant pointer. */ -static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow) -{ - intptr_t jgl = (intptr_t)(J2G(as->J)); - intptr_t i = (intptr_t)(p); - Reg base; - if ((uint32_t)(i-jgl) < 65536) { - i = i-jgl-32768; - base = RID_JGL; - } else { - base = ra_allock(as, i-(int16_t)i, allow); - } - emit_tsi(as, mi, r, base, i); -} - -#if LJ_64 -static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) -{ - const uint64_t *k = &ir_k64(ir)->u64; - Reg r64 = r; - if (rset_test(RSET_FPR, r)) { - r64 = RID_TMP; - emit_tg(as, MIPSI_DMTC1, r64, r); - } - if ((uint32_t)((intptr_t)k-(intptr_t)J2G(as->J)) < 65536) - emit_lsptr(as, MIPSI_LD, r64, (void *)k, 0); - else - emit_loadu64(as, r64, *k); -} -#else -#define emit_loadk64(as, r, ir) \ - emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR) -#endif - -/* Get/set global_State fields. */ -static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs) -{ - emit_tsi(as, mi, r, RID_JGL, ofs-32768); -} - -#define emit_getgl(as, r, field) \ - emit_lsglptr(as, MIPSI_AL, (r), (int32_t)offsetof(global_State, field)) -#define emit_setgl(as, r, field) \ - emit_lsglptr(as, MIPSI_AS, (r), (int32_t)offsetof(global_State, field)) - -/* Trace number is determined from per-trace exit stubs. */ -#define emit_setvmstate(as, i) UNUSED(i) - -/* -- Emit control-flow instructions -------------------------------------- */ - -/* Label for internal jumps. */ -typedef MCode *MCLabel; - -/* Return label pointing to current PC. */ -#define emit_label(as) ((as)->mcp) - -static void emit_branch(ASMState *as, MIPSIns mi, Reg rs, Reg rt, MCode *target) -{ - MCode *p = as->mcp; - ptrdiff_t delta = target - p; - lua_assert(((delta + 0x8000) >> 16) == 0); - *--p = mi | MIPSF_S(rs) | MIPSF_T(rt) | ((uint32_t)delta & 0xffffu); - as->mcp = p; -} - -static void emit_jmp(ASMState *as, MCode *target) -{ - *--as->mcp = MIPSI_NOP; - emit_branch(as, MIPSI_B, RID_ZERO, RID_ZERO, (target)); -} - -static void emit_call(ASMState *as, void *target, int needcfa) -{ - MCode *p = as->mcp; - *--p = MIPSI_NOP; - if ((((uintptr_t)target ^ (uintptr_t)p) >> 28) == 0) { - *--p = (((uintptr_t)target & 1) ? MIPSI_JALX : MIPSI_JAL) | - (((uintptr_t)target >>2) & 0x03ffffffu); - } else { /* Target out of range: need indirect call. */ - *--p = MIPSI_JALR | MIPSF_S(RID_CFUNCADDR); - needcfa = 1; - } - as->mcp = p; - if (needcfa) ra_allockreg(as, (intptr_t)target, RID_CFUNCADDR); -} - -/* -- Emit generic operations --------------------------------------------- */ - -#define emit_move(as, dst, src) \ - emit_ds(as, MIPSI_MOVE, (dst), (src)) - -/* Generic move between two regs. */ -static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) -{ - if (dst < RID_MAX_GPR) - emit_move(as, dst, src); - else - emit_fg(as, irt_isnum(ir->t) ? MIPSI_MOV_D : MIPSI_MOV_S, dst, src); -} - -/* Generic load of register with base and (small) offset address. */ -static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) -{ - if (r < RID_MAX_GPR) - emit_tsi(as, irt_is64(ir->t) ? MIPSI_LD : MIPSI_LW, r, base, ofs); - else - emit_tsi(as, irt_isnum(ir->t) ? MIPSI_LDC1 : MIPSI_LWC1, - (r & 31), base, ofs); -} - -/* Generic store of register with base and (small) offset address. */ -static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) -{ - if (r < RID_MAX_GPR) - emit_tsi(as, irt_is64(ir->t) ? MIPSI_SD : MIPSI_SW, r, base, ofs); - else - emit_tsi(as, irt_isnum(ir->t) ? MIPSI_SDC1 : MIPSI_SWC1, - (r&31), base, ofs); -} - -/* Add offset to pointer. */ -static void emit_addptr(ASMState *as, Reg r, int32_t ofs) -{ - if (ofs) { - lua_assert(checki16(ofs)); - emit_tsi(as, MIPSI_AADDIU, r, r, ofs); - } -} - -#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs)) - diff --git a/src/lj_emit_ppc.h b/src/lj_emit_ppc.h deleted file mode 100644 index 21c3c2ace7..0000000000 --- a/src/lj_emit_ppc.h +++ /dev/null @@ -1,238 +0,0 @@ -/* -** PPC instruction emitter. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -/* -- Emit basic instructions --------------------------------------------- */ - -static void emit_tab(ASMState *as, PPCIns pi, Reg rt, Reg ra, Reg rb) -{ - *--as->mcp = pi | PPCF_T(rt) | PPCF_A(ra) | PPCF_B(rb); -} - -#define emit_asb(as, pi, ra, rs, rb) emit_tab(as, (pi), (rs), (ra), (rb)) -#define emit_as(as, pi, ra, rs) emit_tab(as, (pi), (rs), (ra), 0) -#define emit_ab(as, pi, ra, rb) emit_tab(as, (pi), 0, (ra), (rb)) - -static void emit_tai(ASMState *as, PPCIns pi, Reg rt, Reg ra, int32_t i) -{ - *--as->mcp = pi | PPCF_T(rt) | PPCF_A(ra) | (i & 0xffff); -} - -#define emit_ti(as, pi, rt, i) emit_tai(as, (pi), (rt), 0, (i)) -#define emit_ai(as, pi, ra, i) emit_tai(as, (pi), 0, (ra), (i)) -#define emit_asi(as, pi, ra, rs, i) emit_tai(as, (pi), (rs), (ra), (i)) - -#define emit_fab(as, pi, rf, ra, rb) \ - emit_tab(as, (pi), (rf)&31, (ra)&31, (rb)&31) -#define emit_fb(as, pi, rf, rb) emit_tab(as, (pi), (rf)&31, 0, (rb)&31) -#define emit_fac(as, pi, rf, ra, rc) \ - emit_tab(as, (pi) | PPCF_C((rc) & 31), (rf)&31, (ra)&31, 0) -#define emit_facb(as, pi, rf, ra, rc, rb) \ - emit_tab(as, (pi) | PPCF_C((rc) & 31), (rf)&31, (ra)&31, (rb)&31) -#define emit_fai(as, pi, rf, ra, i) emit_tai(as, (pi), (rf)&31, (ra), (i)) - -static void emit_rot(ASMState *as, PPCIns pi, Reg ra, Reg rs, - int32_t n, int32_t b, int32_t e) -{ - *--as->mcp = pi | PPCF_T(rs) | PPCF_A(ra) | PPCF_B(n) | - PPCF_MB(b) | PPCF_ME(e); -} - -static void emit_slwi(ASMState *as, Reg ra, Reg rs, int32_t n) -{ - lua_assert(n >= 0 && n < 32); - emit_rot(as, PPCI_RLWINM, ra, rs, n, 0, 31-n); -} - -static void emit_rotlwi(ASMState *as, Reg ra, Reg rs, int32_t n) -{ - lua_assert(n >= 0 && n < 32); - emit_rot(as, PPCI_RLWINM, ra, rs, n, 0, 31); -} - -/* -- Emit loads/stores --------------------------------------------------- */ - -/* Prefer rematerialization of BASE/L from global_State over spills. */ -#define emit_canremat(ref) ((ref) <= REF_BASE) - -/* Try to find a one step delta relative to another constant. */ -static int emit_kdelta1(ASMState *as, Reg t, int32_t i) -{ - RegSet work = ~as->freeset & RSET_GPR; - while (work) { - Reg r = rset_picktop(work); - IRRef ref = regcost_ref(as->cost[r]); - lua_assert(r != t); - if (ref < ASMREF_L) { - int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i); - if (checki16(delta)) { - emit_tai(as, PPCI_ADDI, t, r, delta); - return 1; - } - } - rset_clear(work, r); - } - return 0; /* Failed. */ -} - -/* Load a 32 bit constant into a GPR. */ -static void emit_loadi(ASMState *as, Reg r, int32_t i) -{ - if (checki16(i)) { - emit_ti(as, PPCI_LI, r, i); - } else { - if ((i & 0xffff)) { - int32_t jgl = i32ptr(J2G(as->J)); - if ((uint32_t)(i-jgl) < 65536) { - emit_tai(as, PPCI_ADDI, r, RID_JGL, i-jgl-32768); - return; - } else if (emit_kdelta1(as, r, i)) { - return; - } - emit_asi(as, PPCI_ORI, r, r, i); - } - emit_ti(as, PPCI_LIS, r, (i >> 16)); - } -} - -#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) - -static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); - -/* Get/set from constant pointer. */ -static void emit_lsptr(ASMState *as, PPCIns pi, Reg r, void *p, RegSet allow) -{ - int32_t jgl = i32ptr(J2G(as->J)); - int32_t i = i32ptr(p); - Reg base; - if ((uint32_t)(i-jgl) < 65536) { - i = i-jgl-32768; - base = RID_JGL; - } else { - base = ra_allock(as, i-(int16_t)i, allow); - } - emit_tai(as, pi, r, base, i); -} - -#define emit_loadk64(as, r, ir) \ - emit_lsptr(as, PPCI_LFD, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR) - -/* Get/set global_State fields. */ -static void emit_lsglptr(ASMState *as, PPCIns pi, Reg r, int32_t ofs) -{ - emit_tai(as, pi, r, RID_JGL, ofs-32768); -} - -#define emit_getgl(as, r, field) \ - emit_lsglptr(as, PPCI_LWZ, (r), (int32_t)offsetof(global_State, field)) -#define emit_setgl(as, r, field) \ - emit_lsglptr(as, PPCI_STW, (r), (int32_t)offsetof(global_State, field)) - -/* Trace number is determined from per-trace exit stubs. */ -#define emit_setvmstate(as, i) UNUSED(i) - -/* -- Emit control-flow instructions -------------------------------------- */ - -/* Label for internal jumps. */ -typedef MCode *MCLabel; - -/* Return label pointing to current PC. */ -#define emit_label(as) ((as)->mcp) - -static void emit_condbranch(ASMState *as, PPCIns pi, PPCCC cc, MCode *target) -{ - MCode *p = --as->mcp; - ptrdiff_t delta = (char *)target - (char *)p; - lua_assert(((delta + 0x8000) >> 16) == 0); - pi ^= (delta & 0x8000) * (PPCF_Y/0x8000); - *p = pi | PPCF_CC(cc) | ((uint32_t)delta & 0xffffu); -} - -static void emit_jmp(ASMState *as, MCode *target) -{ - MCode *p = --as->mcp; - ptrdiff_t delta = (char *)target - (char *)p; - *p = PPCI_B | (delta & 0x03fffffcu); -} - -static void emit_call(ASMState *as, void *target) -{ - MCode *p = --as->mcp; - ptrdiff_t delta = (char *)target - (char *)p; - if ((((delta>>2) + 0x00800000) >> 24) == 0) { - *p = PPCI_BL | (delta & 0x03fffffcu); - } else { /* Target out of range: need indirect call. Don't use arg reg. */ - RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1); - Reg r = ra_allock(as, i32ptr(target), allow); - *p = PPCI_BCTRL; - p[-1] = PPCI_MTCTR | PPCF_T(r); - as->mcp = p-1; - } -} - -/* -- Emit generic operations --------------------------------------------- */ - -#define emit_mr(as, dst, src) \ - emit_asb(as, PPCI_MR, (dst), (src), (src)) - -/* Generic move between two regs. */ -static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) -{ - UNUSED(ir); - if (dst < RID_MAX_GPR) - emit_mr(as, dst, src); - else - emit_fb(as, PPCI_FMR, dst, src); -} - -/* Generic load of register with base and (small) offset address. */ -static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) -{ - if (r < RID_MAX_GPR) - emit_tai(as, PPCI_LWZ, r, base, ofs); - else - emit_fai(as, irt_isnum(ir->t) ? PPCI_LFD : PPCI_LFS, r, base, ofs); -} - -/* Generic store of register with base and (small) offset address. */ -static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) -{ - if (r < RID_MAX_GPR) - emit_tai(as, PPCI_STW, r, base, ofs); - else - emit_fai(as, irt_isnum(ir->t) ? PPCI_STFD : PPCI_STFS, r, base, ofs); -} - -/* Emit a compare (for equality) with a constant operand. */ -static void emit_cmpi(ASMState *as, Reg r, int32_t k) -{ - if (checki16(k)) { - emit_ai(as, PPCI_CMPWI, r, k); - } else if (checku16(k)) { - emit_ai(as, PPCI_CMPLWI, r, k); - } else { - emit_ai(as, PPCI_CMPLWI, RID_TMP, k); - emit_asi(as, PPCI_XORIS, RID_TMP, r, (k >> 16)); - } -} - -/* Add offset to pointer. */ -static void emit_addptr(ASMState *as, Reg r, int32_t ofs) -{ - if (ofs) { - emit_tai(as, PPCI_ADDI, r, r, ofs); - if (!checki16(ofs)) - emit_tai(as, PPCI_ADDIS, r, r, (ofs + 32768) >> 16); - } -} - -static void emit_spsub(ASMState *as, int32_t ofs) -{ - if (ofs) { - emit_tai(as, PPCI_STWU, RID_TMP, RID_SP, -ofs); - emit_tai(as, PPCI_ADDI, RID_TMP, RID_SP, - CFRAME_SIZE + (as->parent ? as->parent->spadjust : 0)); - } -} - diff --git a/src/lj_target_arm.h b/src/lj_target_arm.h deleted file mode 100644 index 5551b1f1ce..0000000000 --- a/src/lj_target_arm.h +++ /dev/null @@ -1,270 +0,0 @@ -/* -** Definitions for ARM CPUs. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -#ifndef _LJ_TARGET_ARM_H -#define _LJ_TARGET_ARM_H - -/* -- Registers IDs ------------------------------------------------------- */ - -#define GPRDEF(_) \ - _(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \ - _(R8) _(R9) _(R10) _(R11) _(R12) _(SP) _(LR) _(PC) -#if LJ_SOFTFP -#define FPRDEF(_) -#else -#define FPRDEF(_) \ - _(D0) _(D1) _(D2) _(D3) _(D4) _(D5) _(D6) _(D7) \ - _(D8) _(D9) _(D10) _(D11) _(D12) _(D13) _(D14) _(D15) -#endif -#define VRIDDEF(_) - -#define RIDENUM(name) RID_##name, - -enum { - GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ - FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ - RID_MAX, - RID_TMP = RID_LR, - - /* Calling conventions. */ - RID_RET = RID_R0, - RID_RETLO = RID_R0, - RID_RETHI = RID_R1, -#if LJ_SOFTFP - RID_FPRET = RID_R0, -#else - RID_FPRET = RID_D0, -#endif - - /* These definitions must match with the *.dasc file(s): */ - RID_BASE = RID_R9, /* Interpreter BASE. */ - RID_LPC = RID_R6, /* Interpreter PC. */ - RID_DISPATCH = RID_R7, /* Interpreter DISPATCH table. */ - RID_LREG = RID_R8, /* Interpreter L. */ - - /* Register ranges [min, max) and number of registers. */ - RID_MIN_GPR = RID_R0, - RID_MAX_GPR = RID_PC+1, - RID_MIN_FPR = RID_MAX_GPR, -#if LJ_SOFTFP - RID_MAX_FPR = RID_MIN_FPR, -#else - RID_MAX_FPR = RID_D15+1, -#endif - RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, - RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR -}; - -#define RID_NUM_KREF RID_NUM_GPR -#define RID_MIN_KREF RID_R0 - -/* -- Register sets ------------------------------------------------------- */ - -/* Make use of all registers, except sp, lr and pc. */ -#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_R12+1)) -#define RSET_GPREVEN \ - (RID2RSET(RID_R0)|RID2RSET(RID_R2)|RID2RSET(RID_R4)|RID2RSET(RID_R6)| \ - RID2RSET(RID_R8)|RID2RSET(RID_R10)) -#define RSET_GPRODD \ - (RID2RSET(RID_R1)|RID2RSET(RID_R3)|RID2RSET(RID_R5)|RID2RSET(RID_R7)| \ - RID2RSET(RID_R9)|RID2RSET(RID_R11)) -#if LJ_SOFTFP -#define RSET_FPR 0 -#else -#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) -#endif -#define RSET_ALL (RSET_GPR|RSET_FPR) -#define RSET_INIT RSET_ALL - -/* ABI-specific register sets. lr is an implicit scratch register. */ -#define RSET_SCRATCH_GPR_ (RSET_RANGE(RID_R0, RID_R3+1)|RID2RSET(RID_R12)) -#ifdef __APPLE__ -#define RSET_SCRATCH_GPR (RSET_SCRATCH_GPR_|RID2RSET(RID_R9)) -#else -#define RSET_SCRATCH_GPR RSET_SCRATCH_GPR_ -#endif -#if LJ_SOFTFP -#define RSET_SCRATCH_FPR 0 -#else -#define RSET_SCRATCH_FPR (RSET_RANGE(RID_D0, RID_D7+1)) -#endif -#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) -#define REGARG_FIRSTGPR RID_R0 -#define REGARG_LASTGPR RID_R3 -#define REGARG_NUMGPR 4 -#if LJ_ABI_SOFTFP -#define REGARG_FIRSTFPR 0 -#define REGARG_LASTFPR 0 -#define REGARG_NUMFPR 0 -#else -#define REGARG_FIRSTFPR RID_D0 -#define REGARG_LASTFPR RID_D7 -#define REGARG_NUMFPR 8 -#endif - -/* -- Spill slots --------------------------------------------------------- */ - -/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. -** -** SPS_FIXED: Available fixed spill slots in interpreter frame. -** This definition must match with the *.dasc file(s). -** -** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots. -*/ -#define SPS_FIXED 2 -#define SPS_FIRST 2 - -#define SPOFS_TMP 0 - -#define sps_scale(slot) (4 * (int32_t)(slot)) -#define sps_align(slot) (((slot) - SPS_FIXED + 1) & ~1) - -/* -- Exit state ---------------------------------------------------------- */ - -/* This definition must match with the *.dasc file(s). */ -typedef struct { -#if !LJ_SOFTFP - lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ -#endif - int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ - int32_t spill[256]; /* Spill slots. */ -} ExitState; - -/* PC after instruction that caused an exit. Used to find the trace number. */ -#define EXITSTATE_PCREG RID_PC -/* Highest exit + 1 indicates stack check. */ -#define EXITSTATE_CHECKEXIT 1 - -#define EXITSTUB_SPACING 4 -#define EXITSTUBS_PER_GROUP 32 - -/* -- Instructions -------------------------------------------------------- */ - -/* Instruction fields. */ -#define ARMF_CC(ai, cc) (((ai) ^ ARMI_CCAL) | ((cc) << 28)) -#define ARMF_N(r) ((r) << 16) -#define ARMF_D(r) ((r) << 12) -#define ARMF_S(r) ((r) << 8) -#define ARMF_M(r) (r) -#define ARMF_SH(sh, n) (((sh) << 5) | ((n) << 7)) -#define ARMF_RSH(sh, r) (0x10 | ((sh) << 5) | ARMF_S(r)) - -typedef enum ARMIns { - ARMI_CCAL = 0xe0000000, - ARMI_S = 0x000100000, - ARMI_K12 = 0x02000000, - ARMI_KNEG = 0x00200000, - ARMI_LS_W = 0x00200000, - ARMI_LS_U = 0x00800000, - ARMI_LS_P = 0x01000000, - ARMI_LS_R = 0x02000000, - ARMI_LSX_I = 0x00400000, - - ARMI_AND = 0xe0000000, - ARMI_EOR = 0xe0200000, - ARMI_SUB = 0xe0400000, - ARMI_RSB = 0xe0600000, - ARMI_ADD = 0xe0800000, - ARMI_ADC = 0xe0a00000, - ARMI_SBC = 0xe0c00000, - ARMI_RSC = 0xe0e00000, - ARMI_TST = 0xe1100000, - ARMI_TEQ = 0xe1300000, - ARMI_CMP = 0xe1500000, - ARMI_CMN = 0xe1700000, - ARMI_ORR = 0xe1800000, - ARMI_MOV = 0xe1a00000, - ARMI_BIC = 0xe1c00000, - ARMI_MVN = 0xe1e00000, - - ARMI_NOP = 0xe1a00000, - - ARMI_MUL = 0xe0000090, - ARMI_SMULL = 0xe0c00090, - - ARMI_LDR = 0xe4100000, - ARMI_LDRB = 0xe4500000, - ARMI_LDRH = 0xe01000b0, - ARMI_LDRSB = 0xe01000d0, - ARMI_LDRSH = 0xe01000f0, - ARMI_LDRD = 0xe00000d0, - ARMI_STR = 0xe4000000, - ARMI_STRB = 0xe4400000, - ARMI_STRH = 0xe00000b0, - ARMI_STRD = 0xe00000f0, - ARMI_PUSH = 0xe92d0000, - - ARMI_B = 0xea000000, - ARMI_BL = 0xeb000000, - ARMI_BLX = 0xfa000000, - ARMI_BLXr = 0xe12fff30, - - /* ARMv6 */ - ARMI_REV = 0xe6bf0f30, - ARMI_SXTB = 0xe6af0070, - ARMI_SXTH = 0xe6bf0070, - ARMI_UXTB = 0xe6ef0070, - ARMI_UXTH = 0xe6ff0070, - - /* ARMv6T2 */ - ARMI_MOVW = 0xe3000000, - ARMI_MOVT = 0xe3400000, - - /* VFP */ - ARMI_VMOV_D = 0xeeb00b40, - ARMI_VMOV_S = 0xeeb00a40, - ARMI_VMOVI_D = 0xeeb00b00, - - ARMI_VMOV_R_S = 0xee100a10, - ARMI_VMOV_S_R = 0xee000a10, - ARMI_VMOV_RR_D = 0xec500b10, - ARMI_VMOV_D_RR = 0xec400b10, - - ARMI_VADD_D = 0xee300b00, - ARMI_VSUB_D = 0xee300b40, - ARMI_VMUL_D = 0xee200b00, - ARMI_VMLA_D = 0xee000b00, - ARMI_VMLS_D = 0xee000b40, - ARMI_VNMLS_D = 0xee100b00, - ARMI_VDIV_D = 0xee800b00, - - ARMI_VABS_D = 0xeeb00bc0, - ARMI_VNEG_D = 0xeeb10b40, - ARMI_VSQRT_D = 0xeeb10bc0, - - ARMI_VCMP_D = 0xeeb40b40, - ARMI_VCMPZ_D = 0xeeb50b40, - - ARMI_VMRS = 0xeef1fa10, - - ARMI_VCVT_S32_F32 = 0xeebd0ac0, - ARMI_VCVT_S32_F64 = 0xeebd0bc0, - ARMI_VCVT_U32_F32 = 0xeebc0ac0, - ARMI_VCVT_U32_F64 = 0xeebc0bc0, - ARMI_VCVT_F32_S32 = 0xeeb80ac0, - ARMI_VCVT_F64_S32 = 0xeeb80bc0, - ARMI_VCVT_F32_U32 = 0xeeb80a40, - ARMI_VCVT_F64_U32 = 0xeeb80b40, - ARMI_VCVT_F32_F64 = 0xeeb70bc0, - ARMI_VCVT_F64_F32 = 0xeeb70ac0, - - ARMI_VLDR_S = 0xed100a00, - ARMI_VLDR_D = 0xed100b00, - ARMI_VSTR_S = 0xed000a00, - ARMI_VSTR_D = 0xed000b00, -} ARMIns; - -typedef enum ARMShift { - ARMSH_LSL, ARMSH_LSR, ARMSH_ASR, ARMSH_ROR -} ARMShift; - -/* ARM condition codes. */ -typedef enum ARMCC { - CC_EQ, CC_NE, CC_CS, CC_CC, CC_MI, CC_PL, CC_VS, CC_VC, - CC_HI, CC_LS, CC_GE, CC_LT, CC_GT, CC_LE, CC_AL, - CC_HS = CC_CS, CC_LO = CC_CC -} ARMCC; - -#endif diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h deleted file mode 100644 index 3f6bb39be2..0000000000 --- a/src/lj_target_arm64.h +++ /dev/null @@ -1,322 +0,0 @@ -/* -** Definitions for ARM64 CPUs. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -#ifndef _LJ_TARGET_ARM64_H -#define _LJ_TARGET_ARM64_H - -/* -- Registers IDs ------------------------------------------------------- */ - -#define GPRDEF(_) \ - _(X0) _(X1) _(X2) _(X3) _(X4) _(X5) _(X6) _(X7) \ - _(X8) _(X9) _(X10) _(X11) _(X12) _(X13) _(X14) _(X15) \ - _(X16) _(X17) _(X18) _(X19) _(X20) _(X21) _(X22) _(X23) \ - _(X24) _(X25) _(X26) _(X27) _(X28) _(FP) _(LR) _(SP) -#define FPRDEF(_) \ - _(D0) _(D1) _(D2) _(D3) _(D4) _(D5) _(D6) _(D7) \ - _(D8) _(D9) _(D10) _(D11) _(D12) _(D13) _(D14) _(D15) \ - _(D16) _(D17) _(D18) _(D19) _(D20) _(D21) _(D22) _(D23) \ - _(D24) _(D25) _(D26) _(D27) _(D28) _(D29) _(D30) _(D31) -#define VRIDDEF(_) - -#define RIDENUM(name) RID_##name, - -enum { - GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ - FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ - RID_MAX, - RID_TMP = RID_LR, - RID_ZERO = RID_SP, - - /* Calling conventions. */ - RID_RET = RID_X0, - RID_FPRET = RID_D0, - - /* These definitions must match with the *.dasc file(s): */ - RID_BASE = RID_X19, /* Interpreter BASE. */ - RID_LPC = RID_X21, /* Interpreter PC. */ - RID_GL = RID_X22, /* Interpreter GL. */ - RID_LREG = RID_X23, /* Interpreter L. */ - - /* Register ranges [min, max) and number of registers. */ - RID_MIN_GPR = RID_X0, - RID_MAX_GPR = RID_SP+1, - RID_MIN_FPR = RID_MAX_GPR, - RID_MAX_FPR = RID_D31+1, - RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, - RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR -}; - -#define RID_NUM_KREF RID_NUM_GPR -#define RID_MIN_KREF RID_X0 - -/* -- Register sets ------------------------------------------------------- */ - -/* Make use of all registers, except for x18, fp, lr and sp. */ -#define RSET_FIXED \ - (RID2RSET(RID_X18)|RID2RSET(RID_FP)|RID2RSET(RID_LR)|RID2RSET(RID_SP)|\ - RID2RSET(RID_GL)) -#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) -#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) -#define RSET_ALL (RSET_GPR|RSET_FPR) -#define RSET_INIT RSET_ALL - -/* lr is an implicit scratch register. */ -#define RSET_SCRATCH_GPR (RSET_RANGE(RID_X0, RID_X17+1)) -#define RSET_SCRATCH_FPR \ - (RSET_RANGE(RID_D0, RID_D7+1)|RSET_RANGE(RID_D16, RID_D31+1)) -#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) -#define REGARG_FIRSTGPR RID_X0 -#define REGARG_LASTGPR RID_X7 -#define REGARG_NUMGPR 8 -#define REGARG_FIRSTFPR RID_D0 -#define REGARG_LASTFPR RID_D7 -#define REGARG_NUMFPR 8 - -/* -- Spill slots --------------------------------------------------------- */ - -/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. -** -** SPS_FIXED: Available fixed spill slots in interpreter frame. -** This definition must match with the vm_arm64.dasc file. -** Pre-allocate some slots to avoid sp adjust in every root trace. -** -** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots. -*/ -#define SPS_FIXED 4 -#define SPS_FIRST 2 - -#define SPOFS_TMP 0 - -#define sps_scale(slot) (4 * (int32_t)(slot)) -#define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3) - -/* -- Exit state ---------------------------------------------------------- */ - -/* This definition must match with the *.dasc file(s). */ -typedef struct { - lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ - intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ - int32_t spill[256]; /* Spill slots. */ -} ExitState; - -/* Highest exit + 1 indicates stack check. */ -#define EXITSTATE_CHECKEXIT 1 - -/* Return the address of a per-trace exit stub. */ -static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) -{ - while (*p == 0xd503201f) p++; /* Skip A64I_NOP. */ - return p + 3 + exitno; -} -/* Avoid dependence on lj_jit.h if only including lj_target.h. */ -#define exitstub_trace_addr(T, exitno) \ - exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode), (exitno)) - -/* -- Instructions -------------------------------------------------------- */ - -/* Instruction fields. */ -#define A64F_D(r) (r) -#define A64F_N(r) ((r) << 5) -#define A64F_A(r) ((r) << 10) -#define A64F_M(r) ((r) << 16) -#define A64F_IMMS(x) ((x) << 10) -#define A64F_IMMR(x) ((x) << 16) -#define A64F_U16(x) ((x) << 5) -#define A64F_U12(x) ((x) << 10) -#define A64F_S26(x) (x) -#define A64F_S19(x) (((uint32_t)(x) & 0x7ffffu) << 5) -#define A64F_S14(x) ((x) << 5) -#define A64F_S9(x) ((x) << 12) -#define A64F_BIT(x) ((x) << 19) -#define A64F_SH(sh, x) (((sh) << 22) | ((x) << 10)) -#define A64F_EX(ex) (A64I_EX | ((ex) << 13)) -#define A64F_EXSH(ex,x) (A64I_EX | ((ex) << 13) | ((x) << 10)) -#define A64F_FP8(x) ((x) << 13) -#define A64F_CC(cc) ((cc) << 12) -#define A64F_LSL16(x) (((x) / 16) << 21) -#define A64F_BSH(sh) ((sh) << 10) - -typedef enum A64Ins { - A64I_S = 0x20000000, - A64I_X = 0x80000000, - A64I_EX = 0x00200000, - A64I_ON = 0x00200000, - A64I_K12 = 0x1a000000, - A64I_K13 = 0x18000000, - A64I_LS_U = 0x01000000, - A64I_LS_S = 0x00800000, - A64I_LS_R = 0x01200800, - A64I_LS_SH = 0x00001000, - A64I_LS_UXTWx = 0x00004000, - A64I_LS_SXTWx = 0x0000c000, - A64I_LS_SXTXx = 0x0000e000, - A64I_LS_LSLx = 0x00006000, - - A64I_ADDw = 0x0b000000, - A64I_ADDx = 0x8b000000, - A64I_ADDSw = 0x2b000000, - A64I_ADDSx = 0xab000000, - A64I_NEGw = 0x4b0003e0, - A64I_NEGx = 0xcb0003e0, - A64I_SUBw = 0x4b000000, - A64I_SUBx = 0xcb000000, - A64I_SUBSw = 0x6b000000, - A64I_SUBSx = 0xeb000000, - - A64I_MULw = 0x1b007c00, - A64I_MULx = 0x9b007c00, - A64I_SMULL = 0x9b207c00, - - A64I_ANDw = 0x0a000000, - A64I_ANDx = 0x8a000000, - A64I_ANDSw = 0x6a000000, - A64I_ANDSx = 0xea000000, - A64I_EORw = 0x4a000000, - A64I_EORx = 0xca000000, - A64I_ORRw = 0x2a000000, - A64I_ORRx = 0xaa000000, - A64I_TSTw = 0x6a00001f, - A64I_TSTx = 0xea00001f, - - A64I_CMPw = 0x6b00001f, - A64I_CMPx = 0xeb00001f, - A64I_CMNw = 0x2b00001f, - A64I_CMNx = 0xab00001f, - A64I_CCMPw = 0x7a400000, - A64I_CCMPx = 0xfa400000, - A64I_CSELw = 0x1a800000, - A64I_CSELx = 0x9a800000, - - A64I_ASRw = 0x13007c00, - A64I_ASRx = 0x9340fc00, - A64I_LSLx = 0xd3400000, - A64I_LSRx = 0xd340fc00, - A64I_SHRw = 0x1ac02000, - A64I_SHRx = 0x9ac02000, /* lsl/lsr/asr/ror x0, x0, x0 */ - A64I_REVw = 0x5ac00800, - A64I_REVx = 0xdac00c00, - - A64I_EXTRw = 0x13800000, - A64I_EXTRx = 0x93c00000, - A64I_SBFMw = 0x13000000, - A64I_SBFMx = 0x93400000, - A64I_SXTBw = 0x13001c00, - A64I_SXTHw = 0x13003c00, - A64I_SXTW = 0x93407c00, - A64I_UBFMw = 0x53000000, - A64I_UBFMx = 0xd3400000, - A64I_UXTBw = 0x53001c00, - A64I_UXTHw = 0x53003c00, - - A64I_MOVw = 0x2a0003e0, - A64I_MOVx = 0xaa0003e0, - A64I_MVNw = 0x2a2003e0, - A64I_MVNx = 0xaa2003e0, - A64I_MOVKw = 0x72800000, - A64I_MOVKx = 0xf2800000, - A64I_MOVZw = 0x52800000, - A64I_MOVZx = 0xd2800000, - A64I_MOVNw = 0x12800000, - A64I_MOVNx = 0x92800000, - - A64I_LDRB = 0x39400000, - A64I_LDRH = 0x79400000, - A64I_LDRw = 0xb9400000, - A64I_LDRx = 0xf9400000, - A64I_LDRLw = 0x18000000, - A64I_LDRLx = 0x58000000, - A64I_STRB = 0x39000000, - A64I_STRH = 0x79000000, - A64I_STRw = 0xb9000000, - A64I_STRx = 0xf9000000, - A64I_STPw = 0x29000000, - A64I_STPx = 0xa9000000, - A64I_LDPw = 0x29400000, - A64I_LDPx = 0xa9400000, - - A64I_B = 0x14000000, - A64I_BCC = 0x54000000, - A64I_BL = 0x94000000, - A64I_BR = 0xd61f0000, - A64I_BLR = 0xd63f0000, - A64I_TBZ = 0x36000000, - A64I_TBNZ = 0x37000000, - A64I_CBZ = 0x34000000, - A64I_CBNZ = 0x35000000, - - A64I_NOP = 0xd503201f, - - /* FP */ - A64I_FADDd = 0x1e602800, - A64I_FSUBd = 0x1e603800, - A64I_FMADDd = 0x1f400000, - A64I_FMSUBd = 0x1f408000, - A64I_FNMADDd = 0x1f600000, - A64I_FNMSUBd = 0x1f608000, - A64I_FMULd = 0x1e600800, - A64I_FDIVd = 0x1e601800, - A64I_FNEGd = 0x1e614000, - A64I_FABS = 0x1e60c000, - A64I_FSQRTd = 0x1e61c000, - A64I_LDRs = 0xbd400000, - A64I_LDRd = 0xfd400000, - A64I_STRs = 0xbd000000, - A64I_STRd = 0xfd000000, - A64I_LDPs = 0x2d400000, - A64I_LDPd = 0x6d400000, - A64I_STPs = 0x2d000000, - A64I_STPd = 0x6d000000, - A64I_FCMPd = 0x1e602000, - A64I_FCMPZd = 0x1e602008, - A64I_FCSELd = 0x1e600c00, - A64I_FRINTMd = 0x1e654000, - A64I_FRINTPd = 0x1e64c000, - A64I_FRINTZd = 0x1e65c000, - - A64I_FCVT_F32_F64 = 0x1e624000, - A64I_FCVT_F64_F32 = 0x1e22c000, - A64I_FCVT_F32_S32 = 0x1e220000, - A64I_FCVT_F64_S32 = 0x1e620000, - A64I_FCVT_F32_U32 = 0x1e230000, - A64I_FCVT_F64_U32 = 0x1e630000, - A64I_FCVT_F32_S64 = 0x9e220000, - A64I_FCVT_F64_S64 = 0x9e620000, - A64I_FCVT_F32_U64 = 0x9e230000, - A64I_FCVT_F64_U64 = 0x9e630000, - A64I_FCVT_S32_F64 = 0x1e780000, - A64I_FCVT_S32_F32 = 0x1e380000, - A64I_FCVT_U32_F64 = 0x1e790000, - A64I_FCVT_U32_F32 = 0x1e390000, - A64I_FCVT_S64_F64 = 0x9e780000, - A64I_FCVT_S64_F32 = 0x9e380000, - A64I_FCVT_U64_F64 = 0x9e790000, - A64I_FCVT_U64_F32 = 0x9e390000, - - A64I_FMOV_S = 0x1e204000, - A64I_FMOV_D = 0x1e604000, - A64I_FMOV_R_S = 0x1e260000, - A64I_FMOV_S_R = 0x1e270000, - A64I_FMOV_R_D = 0x9e660000, - A64I_FMOV_D_R = 0x9e670000, - A64I_FMOV_DI = 0x1e601000, -} A64Ins; - -typedef enum A64Shift { - A64SH_LSL, A64SH_LSR, A64SH_ASR, A64SH_ROR -} A64Shift; - -typedef enum A64Extend { - A64EX_UXTB, A64EX_UXTH, A64EX_UXTW, A64EX_UXTX, - A64EX_SXTB, A64EX_SXTH, A64EX_SXTW, A64EX_SXTX, -} A64Extend; - -/* ARM condition codes. */ -typedef enum A64CC { - CC_EQ, CC_NE, CC_CS, CC_CC, CC_MI, CC_PL, CC_VS, CC_VC, - CC_HI, CC_LS, CC_GE, CC_LT, CC_GT, CC_LE, CC_AL, - CC_HS = CC_CS, CC_LO = CC_CC -} A64CC; - -#endif diff --git a/src/lj_target_mips.h b/src/lj_target_mips.h deleted file mode 100644 index 740687b355..0000000000 --- a/src/lj_target_mips.h +++ /dev/null @@ -1,377 +0,0 @@ -/* -** Definitions for MIPS CPUs. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -#ifndef _LJ_TARGET_MIPS_H -#define _LJ_TARGET_MIPS_H - -/* -- Registers IDs ------------------------------------------------------- */ - -#define GPRDEF(_) \ - _(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \ - _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15) \ - _(R16) _(R17) _(R18) _(R19) _(R20) _(R21) _(R22) _(R23) \ - _(R24) _(R25) _(SYS1) _(SYS2) _(R28) _(SP) _(R30) _(RA) -#if LJ_SOFTFP -#define FPRDEF(_) -#else -#define FPRDEF(_) \ - _(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \ - _(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \ - _(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \ - _(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31) -#endif -#define VRIDDEF(_) - -#define RIDENUM(name) RID_##name, - -enum { - GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ - FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ - RID_MAX, - RID_ZERO = RID_R0, - RID_TMP = RID_RA, - RID_GP = RID_R28, - - /* Calling conventions. */ - RID_RET = RID_R2, -#if LJ_LE - RID_RETHI = RID_R3, - RID_RETLO = RID_R2, -#else - RID_RETHI = RID_R2, - RID_RETLO = RID_R3, -#endif -#if LJ_SOFTFP - RID_FPRET = RID_R2, -#else - RID_FPRET = RID_F0, -#endif - RID_CFUNCADDR = RID_R25, - - /* These definitions must match with the *.dasc file(s): */ - RID_BASE = RID_R16, /* Interpreter BASE. */ - RID_LPC = RID_R18, /* Interpreter PC. */ - RID_DISPATCH = RID_R19, /* Interpreter DISPATCH table. */ - RID_LREG = RID_R20, /* Interpreter L. */ - RID_JGL = RID_R30, /* On-trace: global_State + 32768. */ - - /* Register ranges [min, max) and number of registers. */ - RID_MIN_GPR = RID_R0, - RID_MAX_GPR = RID_RA+1, - RID_MIN_FPR = RID_MAX_GPR, -#if LJ_SOFTFP - RID_MAX_FPR = RID_MIN_FPR, -#else - RID_MAX_FPR = RID_F31+1, -#endif - RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, - RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR /* Only even regs are used. */ -}; - -#define RID_NUM_KREF RID_NUM_GPR -#define RID_MIN_KREF RID_R0 - -/* -- Register sets ------------------------------------------------------- */ - -/* Make use of all registers, except ZERO, TMP, SP, SYS1, SYS2, JGL and GP. */ -#define RSET_FIXED \ - (RID2RSET(RID_ZERO)|RID2RSET(RID_TMP)|RID2RSET(RID_SP)|\ - RID2RSET(RID_SYS1)|RID2RSET(RID_SYS2)|RID2RSET(RID_JGL)|RID2RSET(RID_GP)) -#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) -#if LJ_SOFTFP -#define RSET_FPR 0 -#else -#if LJ_32 -#define RSET_FPR \ - (RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\ - RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\ - RID2RSET(RID_F16)|RID2RSET(RID_F18)|RID2RSET(RID_F20)|RID2RSET(RID_F22)|\ - RID2RSET(RID_F24)|RID2RSET(RID_F26)|RID2RSET(RID_F28)|RID2RSET(RID_F30)) -#else -#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) -#endif -#endif -#define RSET_ALL (RSET_GPR|RSET_FPR) -#define RSET_INIT RSET_ALL - -#define RSET_SCRATCH_GPR \ - (RSET_RANGE(RID_R1, RID_R15+1)|\ - RID2RSET(RID_R24)|RID2RSET(RID_R25)) -#if LJ_SOFTFP -#define RSET_SCRATCH_FPR 0 -#else -#if LJ_32 -#define RSET_SCRATCH_FPR \ - (RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\ - RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\ - RID2RSET(RID_F16)|RID2RSET(RID_F18)) -#else -#define RSET_SCRATCH_FPR RSET_RANGE(RID_F0, RID_F24) -#endif -#endif -#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) -#define REGARG_FIRSTGPR RID_R4 -#if LJ_32 -#define REGARG_LASTGPR RID_R7 -#define REGARG_NUMGPR 4 -#else -#define REGARG_LASTGPR RID_R11 -#define REGARG_NUMGPR 8 -#endif -#if LJ_ABI_SOFTFP -#define REGARG_FIRSTFPR 0 -#define REGARG_LASTFPR 0 -#define REGARG_NUMFPR 0 -#else -#define REGARG_FIRSTFPR RID_F12 -#if LJ_32 -#define REGARG_LASTFPR RID_F14 -#define REGARG_NUMFPR 2 -#else -#define REGARG_LASTFPR RID_F19 -#define REGARG_NUMFPR 8 -#endif -#endif - -/* -- Spill slots --------------------------------------------------------- */ - -/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. -** -** SPS_FIXED: Available fixed spill slots in interpreter frame. -** This definition must match with the *.dasc file(s). -** -** SPS_FIRST: First spill slot for general use. -*/ -#if LJ_32 -#define SPS_FIXED 5 -#else -#define SPS_FIXED 4 -#endif -#define SPS_FIRST 4 - -#define SPOFS_TMP 0 - -#define sps_scale(slot) (4 * (int32_t)(slot)) -#define sps_align(slot) (((slot) - SPS_FIXED + 1) & ~1) - -/* -- Exit state ---------------------------------------------------------- */ - -/* This definition must match with the *.dasc file(s). */ -typedef struct { -#if !LJ_SOFTFP - lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ -#endif - intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ - int32_t spill[256]; /* Spill slots. */ -} ExitState; - -/* Highest exit + 1 indicates stack check. */ -#define EXITSTATE_CHECKEXIT 1 - -/* Return the address of a per-trace exit stub. */ -static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p) -{ - while (*p == 0x00000000) p++; /* Skip MIPSI_NOP. */ - return p; -} -/* Avoid dependence on lj_jit.h if only including lj_target.h. */ -#define exitstub_trace_addr(T, exitno) \ - exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode)) - -/* -- Instructions -------------------------------------------------------- */ - -/* Instruction fields. */ -#define MIPSF_S(r) ((r) << 21) -#define MIPSF_T(r) ((r) << 16) -#define MIPSF_D(r) ((r) << 11) -#define MIPSF_R(r) ((r) << 21) -#define MIPSF_H(r) ((r) << 16) -#define MIPSF_G(r) ((r) << 11) -#define MIPSF_F(r) ((r) << 6) -#define MIPSF_A(n) ((n) << 6) -#define MIPSF_M(n) ((n) << 11) -#define MIPSF_L(n) ((n) << 6) - -typedef enum MIPSIns { - MIPSI_D = 0x38, - MIPSI_DV = 0x10, - MIPSI_D32 = 0x3c, - /* Integer instructions. */ - MIPSI_MOVE = 0x00000025, - MIPSI_NOP = 0x00000000, - - MIPSI_LI = 0x24000000, - MIPSI_LU = 0x34000000, - MIPSI_LUI = 0x3c000000, - - MIPSI_AND = 0x00000024, - MIPSI_ANDI = 0x30000000, - MIPSI_OR = 0x00000025, - MIPSI_ORI = 0x34000000, - MIPSI_XOR = 0x00000026, - MIPSI_XORI = 0x38000000, - MIPSI_NOR = 0x00000027, - - MIPSI_SLT = 0x0000002a, - MIPSI_SLTU = 0x0000002b, - MIPSI_SLTI = 0x28000000, - MIPSI_SLTIU = 0x2c000000, - - MIPSI_ADDU = 0x00000021, - MIPSI_ADDIU = 0x24000000, - MIPSI_SUB = 0x00000022, - MIPSI_SUBU = 0x00000023, - MIPSI_MUL = 0x70000002, - MIPSI_DIV = 0x0000001a, - MIPSI_DIVU = 0x0000001b, - - MIPSI_MOVZ = 0x0000000a, - MIPSI_MOVN = 0x0000000b, - MIPSI_MFHI = 0x00000010, - MIPSI_MFLO = 0x00000012, - MIPSI_MULT = 0x00000018, - - MIPSI_SLL = 0x00000000, - MIPSI_SRL = 0x00000002, - MIPSI_SRA = 0x00000003, - MIPSI_ROTR = 0x00200002, /* MIPSXXR2 */ - MIPSI_DROTR = 0x0020003a, - MIPSI_DROTR32 = 0x0020003e, - MIPSI_SLLV = 0x00000004, - MIPSI_SRLV = 0x00000006, - MIPSI_SRAV = 0x00000007, - MIPSI_ROTRV = 0x00000046, /* MIPSXXR2 */ - MIPSI_DROTRV = 0x00000056, - - MIPSI_SEB = 0x7c000420, /* MIPSXXR2 */ - MIPSI_SEH = 0x7c000620, /* MIPSXXR2 */ - MIPSI_WSBH = 0x7c0000a0, /* MIPSXXR2 */ - MIPSI_DSBH = 0x7c0000a4, - - MIPSI_B = 0x10000000, - MIPSI_J = 0x08000000, - MIPSI_JAL = 0x0c000000, - MIPSI_JALX = 0x74000000, - MIPSI_JR = 0x00000008, - MIPSI_JALR = 0x0000f809, - - MIPSI_BEQ = 0x10000000, - MIPSI_BNE = 0x14000000, - MIPSI_BLEZ = 0x18000000, - MIPSI_BGTZ = 0x1c000000, - MIPSI_BLTZ = 0x04000000, - MIPSI_BGEZ = 0x04010000, - - /* Load/store instructions. */ - MIPSI_LW = 0x8c000000, - MIPSI_LD = 0xdc000000, - MIPSI_SW = 0xac000000, - MIPSI_SD = 0xfc000000, - MIPSI_LB = 0x80000000, - MIPSI_SB = 0xa0000000, - MIPSI_LH = 0x84000000, - MIPSI_SH = 0xa4000000, - MIPSI_LBU = 0x90000000, - MIPSI_LHU = 0x94000000, - MIPSI_LWC1 = 0xc4000000, - MIPSI_SWC1 = 0xe4000000, - MIPSI_LDC1 = 0xd4000000, - MIPSI_SDC1 = 0xf4000000, - - /* MIPS64 instructions. */ - MIPSI_DADD = 0x0000002c, - MIPSI_DADDI = 0x60000000, - MIPSI_DADDU = 0x0000002d, - MIPSI_DADDIU = 0x64000000, - MIPSI_DSUB = 0x0000002e, - MIPSI_DSUBU = 0x0000002f, - MIPSI_DDIV = 0x0000001e, - MIPSI_DDIVU = 0x0000001f, - MIPSI_DMULT = 0x0000001c, - MIPSI_DMULTU = 0x0000001d, - - MIPSI_DSLL = 0x00000038, - MIPSI_DSRL = 0x0000003a, - MIPSI_DSLLV = 0x00000014, - MIPSI_DSRLV = 0x00000016, - MIPSI_DSRA = 0x0000003b, - MIPSI_DSRAV = 0x00000017, - MIPSI_DSRA32 = 0x0000003f, - MIPSI_DSLL32 = 0x0000003c, - MIPSI_DSRL32 = 0x0000003e, - MIPSI_DSHD = 0x7c000164, - - MIPSI_AADDU = LJ_32 ? MIPSI_ADDU : MIPSI_DADDU, - MIPSI_AADDIU = LJ_32 ? MIPSI_ADDIU : MIPSI_DADDIU, - MIPSI_ASUBU = LJ_32 ? MIPSI_SUBU : MIPSI_DSUBU, - MIPSI_AL = LJ_32 ? MIPSI_LW : MIPSI_LD, - MIPSI_AS = LJ_32 ? MIPSI_SW : MIPSI_SD, - - /* Extract/insert instructions. */ - MIPSI_DEXTM = 0x7c000001, - MIPSI_DEXTU = 0x7c000002, - MIPSI_DEXT = 0x7c000003, - MIPSI_DINSM = 0x7c000005, - MIPSI_DINSU = 0x7c000006, - MIPSI_DINS = 0x7c000007, - - MIPSI_RINT_D = 0x4620001a, - MIPSI_RINT_S = 0x4600001a, - MIPSI_RINT = 0x4400001a, - MIPSI_FLOOR_D = 0x4620000b, - MIPSI_CEIL_D = 0x4620000a, - MIPSI_ROUND_D = 0x46200008, - - /* FP instructions. */ - MIPSI_MOV_S = 0x46000006, - MIPSI_MOV_D = 0x46200006, - MIPSI_MOVT_D = 0x46210011, - MIPSI_MOVF_D = 0x46200011, - - MIPSI_ABS_D = 0x46200005, - MIPSI_NEG_D = 0x46200007, - - MIPSI_ADD_D = 0x46200000, - MIPSI_SUB_D = 0x46200001, - MIPSI_MUL_D = 0x46200002, - MIPSI_DIV_D = 0x46200003, - MIPSI_SQRT_D = 0x46200004, - - MIPSI_ADD_S = 0x46000000, - MIPSI_SUB_S = 0x46000001, - - MIPSI_CVT_D_S = 0x46000021, - MIPSI_CVT_W_S = 0x46000024, - MIPSI_CVT_S_D = 0x46200020, - MIPSI_CVT_W_D = 0x46200024, - MIPSI_CVT_S_W = 0x46800020, - MIPSI_CVT_D_W = 0x46800021, - MIPSI_CVT_S_L = 0x46a00020, - MIPSI_CVT_D_L = 0x46a00021, - - MIPSI_TRUNC_W_S = 0x4600000d, - MIPSI_TRUNC_W_D = 0x4620000d, - MIPSI_TRUNC_L_S = 0x46000009, - MIPSI_TRUNC_L_D = 0x46200009, - MIPSI_FLOOR_W_S = 0x4600000f, - MIPSI_FLOOR_W_D = 0x4620000f, - - MIPSI_MFC1 = 0x44000000, - MIPSI_MTC1 = 0x44800000, - MIPSI_DMTC1 = 0x44a00000, - MIPSI_DMFC1 = 0x44200000, - - MIPSI_BC1F = 0x45000000, - MIPSI_BC1T = 0x45010000, - - MIPSI_C_EQ_D = 0x46200032, - MIPSI_C_OLT_S = 0x46000034, - MIPSI_C_OLT_D = 0x46200034, - MIPSI_C_ULT_D = 0x46200035, - MIPSI_C_OLE_D = 0x46200036, - MIPSI_C_ULE_D = 0x46200037, -} MIPSIns; - -#endif diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h deleted file mode 100644 index c5c991a377..0000000000 --- a/src/lj_target_ppc.h +++ /dev/null @@ -1,280 +0,0 @@ -/* -** Definitions for PPC CPUs. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -#ifndef _LJ_TARGET_PPC_H -#define _LJ_TARGET_PPC_H - -/* -- Registers IDs ------------------------------------------------------- */ - -#define GPRDEF(_) \ - _(R0) _(SP) _(SYS1) _(R3) _(R4) _(R5) _(R6) _(R7) \ - _(R8) _(R9) _(R10) _(R11) _(R12) _(SYS2) _(R14) _(R15) \ - _(R16) _(R17) _(R18) _(R19) _(R20) _(R21) _(R22) _(R23) \ - _(R24) _(R25) _(R26) _(R27) _(R28) _(R29) _(R30) _(R31) -#define FPRDEF(_) \ - _(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \ - _(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \ - _(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \ - _(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31) -#define VRIDDEF(_) - -#define RIDENUM(name) RID_##name, - -enum { - GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ - FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ - RID_MAX, - RID_TMP = RID_R0, - - /* Calling conventions. */ - RID_RET = RID_R3, - RID_RETHI = RID_R3, - RID_RETLO = RID_R4, - RID_FPRET = RID_F1, - - /* These definitions must match with the *.dasc file(s): */ - RID_BASE = RID_R14, /* Interpreter BASE. */ - RID_LPC = RID_R16, /* Interpreter PC. */ - RID_DISPATCH = RID_R17, /* Interpreter DISPATCH table. */ - RID_LREG = RID_R18, /* Interpreter L. */ - RID_JGL = RID_R31, /* On-trace: global_State + 32768. */ - - /* Register ranges [min, max) and number of registers. */ - RID_MIN_GPR = RID_R0, - RID_MAX_GPR = RID_R31+1, - RID_MIN_FPR = RID_F0, - RID_MAX_FPR = RID_F31+1, - RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, - RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR -}; - -#define RID_NUM_KREF RID_NUM_GPR -#define RID_MIN_KREF RID_R0 - -/* -- Register sets ------------------------------------------------------- */ - -/* Make use of all registers, except TMP, SP, SYS1, SYS2 and JGL. */ -#define RSET_FIXED \ - (RID2RSET(RID_TMP)|RID2RSET(RID_SP)|RID2RSET(RID_SYS1)|\ - RID2RSET(RID_SYS2)|RID2RSET(RID_JGL)) -#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) -#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) -#define RSET_ALL (RSET_GPR|RSET_FPR) -#define RSET_INIT RSET_ALL - -#define RSET_SCRATCH_GPR (RSET_RANGE(RID_R3, RID_R12+1)) -#define RSET_SCRATCH_FPR (RSET_RANGE(RID_F0, RID_F13+1)) -#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) -#define REGARG_FIRSTGPR RID_R3 -#define REGARG_LASTGPR RID_R10 -#define REGARG_NUMGPR 8 -#define REGARG_FIRSTFPR RID_F1 -#define REGARG_LASTFPR RID_F8 -#define REGARG_NUMFPR 8 - -/* -- Spill slots --------------------------------------------------------- */ - -/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. -** -** SPS_FIXED: Available fixed spill slots in interpreter frame. -** This definition must match with the *.dasc file(s). -** -** SPS_FIRST: First spill slot for general use. -** [sp+12] tmplo word \ -** [sp+ 8] tmphi word / tmp dword, parameter area for callee -** [sp+ 4] tmpw, LR of callee -** [sp+ 0] stack chain -*/ -#define SPS_FIXED 7 -#define SPS_FIRST 4 - -/* Stack offsets for temporary slots. Used for FP<->int conversions etc. */ -#define SPOFS_TMPW 4 -#define SPOFS_TMP 8 -#define SPOFS_TMPHI 8 -#define SPOFS_TMPLO 12 - -#define sps_scale(slot) (4 * (int32_t)(slot)) -#define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3) - -/* -- Exit state ---------------------------------------------------------- */ - -/* This definition must match with the *.dasc file(s). */ -typedef struct { - lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ - intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ - int32_t spill[256]; /* Spill slots. */ -} ExitState; - -/* Highest exit + 1 indicates stack check. */ -#define EXITSTATE_CHECKEXIT 1 - -/* Return the address of a per-trace exit stub. */ -static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) -{ - while (*p == 0x60000000) p++; /* Skip PPCI_NOP. */ - return p + 3 + exitno; -} -/* Avoid dependence on lj_jit.h if only including lj_target.h. */ -#define exitstub_trace_addr(T, exitno) \ - exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode), (exitno)) - -/* -- Instructions -------------------------------------------------------- */ - -/* Instruction fields. */ -#define PPCF_CC(cc) ((((cc) & 3) << 16) | (((cc) & 4) << 22)) -#define PPCF_T(r) ((r) << 21) -#define PPCF_A(r) ((r) << 16) -#define PPCF_B(r) ((r) << 11) -#define PPCF_C(r) ((r) << 6) -#define PPCF_MB(n) ((n) << 6) -#define PPCF_ME(n) ((n) << 1) -#define PPCF_Y 0x00200000 -#define PPCF_DOT 0x00000001 - -typedef enum PPCIns { - /* Integer instructions. */ - PPCI_MR = 0x7c000378, - PPCI_NOP = 0x60000000, - - PPCI_LI = 0x38000000, - PPCI_LIS = 0x3c000000, - - PPCI_ADD = 0x7c000214, - PPCI_ADDC = 0x7c000014, - PPCI_ADDO = 0x7c000614, - PPCI_ADDE = 0x7c000114, - PPCI_ADDZE = 0x7c000194, - PPCI_ADDME = 0x7c0001d4, - PPCI_ADDI = 0x38000000, - PPCI_ADDIS = 0x3c000000, - PPCI_ADDIC = 0x30000000, - PPCI_ADDICDOT = 0x34000000, - - PPCI_SUBF = 0x7c000050, - PPCI_SUBFC = 0x7c000010, - PPCI_SUBFO = 0x7c000450, - PPCI_SUBFE = 0x7c000110, - PPCI_SUBFZE = 0x7c000190, - PPCI_SUBFME = 0x7c0001d0, - PPCI_SUBFIC = 0x20000000, - - PPCI_NEG = 0x7c0000d0, - - PPCI_AND = 0x7c000038, - PPCI_ANDC = 0x7c000078, - PPCI_NAND = 0x7c0003b8, - PPCI_ANDIDOT = 0x70000000, - PPCI_ANDISDOT = 0x74000000, - - PPCI_OR = 0x7c000378, - PPCI_NOR = 0x7c0000f8, - PPCI_ORI = 0x60000000, - PPCI_ORIS = 0x64000000, - - PPCI_XOR = 0x7c000278, - PPCI_EQV = 0x7c000238, - PPCI_XORI = 0x68000000, - PPCI_XORIS = 0x6c000000, - - PPCI_CMPW = 0x7c000000, - PPCI_CMPLW = 0x7c000040, - PPCI_CMPWI = 0x2c000000, - PPCI_CMPLWI = 0x28000000, - - PPCI_MULLW = 0x7c0001d6, - PPCI_MULLI = 0x1c000000, - PPCI_MULLWO = 0x7c0005d6, - - PPCI_EXTSB = 0x7c000774, - PPCI_EXTSH = 0x7c000734, - - PPCI_SLW = 0x7c000030, - PPCI_SRW = 0x7c000430, - PPCI_SRAW = 0x7c000630, - PPCI_SRAWI = 0x7c000670, - - PPCI_RLWNM = 0x5c000000, - PPCI_RLWINM = 0x54000000, - PPCI_RLWIMI = 0x50000000, - - PPCI_B = 0x48000000, - PPCI_BL = 0x48000001, - PPCI_BC = 0x40800000, - PPCI_BCL = 0x40800001, - PPCI_BCTR = 0x4e800420, - PPCI_BCTRL = 0x4e800421, - - PPCI_CRANDC = 0x4c000102, - PPCI_CRXOR = 0x4c000182, - PPCI_CRAND = 0x4c000202, - PPCI_CREQV = 0x4c000242, - PPCI_CRORC = 0x4c000342, - PPCI_CROR = 0x4c000382, - - PPCI_MFLR = 0x7c0802a6, - PPCI_MTCTR = 0x7c0903a6, - - PPCI_MCRXR = 0x7c000400, - - /* Load/store instructions. */ - PPCI_LWZ = 0x80000000, - PPCI_LBZ = 0x88000000, - PPCI_STW = 0x90000000, - PPCI_STB = 0x98000000, - PPCI_LHZ = 0xa0000000, - PPCI_LHA = 0xa8000000, - PPCI_STH = 0xb0000000, - - PPCI_STWU = 0x94000000, - - PPCI_LFS = 0xc0000000, - PPCI_LFD = 0xc8000000, - PPCI_STFS = 0xd0000000, - PPCI_STFD = 0xd8000000, - - PPCI_LWZX = 0x7c00002e, - PPCI_LBZX = 0x7c0000ae, - PPCI_STWX = 0x7c00012e, - PPCI_STBX = 0x7c0001ae, - PPCI_LHZX = 0x7c00022e, - PPCI_LHAX = 0x7c0002ae, - PPCI_STHX = 0x7c00032e, - - PPCI_LWBRX = 0x7c00042c, - PPCI_STWBRX = 0x7c00052c, - - PPCI_LFSX = 0x7c00042e, - PPCI_LFDX = 0x7c0004ae, - PPCI_STFSX = 0x7c00052e, - PPCI_STFDX = 0x7c0005ae, - - /* FP instructions. */ - PPCI_FMR = 0xfc000090, - PPCI_FNEG = 0xfc000050, - PPCI_FABS = 0xfc000210, - - PPCI_FRSP = 0xfc000018, - PPCI_FCTIWZ = 0xfc00001e, - - PPCI_FADD = 0xfc00002a, - PPCI_FSUB = 0xfc000028, - PPCI_FMUL = 0xfc000032, - PPCI_FDIV = 0xfc000024, - PPCI_FSQRT = 0xfc00002c, - - PPCI_FMADD = 0xfc00003a, - PPCI_FMSUB = 0xfc000038, - PPCI_FNMSUB = 0xfc00003c, - - PPCI_FCMPU = 0xfc000000, - PPCI_FSEL = 0xfc00002e, -} PPCIns; - -typedef enum PPCCC { - CC_GE, CC_LE, CC_NE, CC_NS, CC_LT, CC_GT, CC_EQ, CC_SO -} PPCCC; - -#endif diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc deleted file mode 100644 index 780cc16e6d..0000000000 --- a/src/vm_arm.dasc +++ /dev/null @@ -1,4593 +0,0 @@ -|// Low-level VM code for ARM CPUs. -|// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -| -|.arch arm -|.section code_op, code_sub -| -|.actionlist build_actionlist -|.globals GLOB_ -|.globalnames globnames -|.externnames extnames -| -|// Note: The ragged indentation of the instructions is intentional. -|// The starting columns indicate data dependencies. -| -|//----------------------------------------------------------------------- -| -|// Fixed register assignments for the interpreter. -| -|// The following must be C callee-save. -|.define MASKR8, r4 // 255*8 constant for fast bytecode decoding. -|.define KBASE, r5 // Constants of current Lua function. -|.define PC, r6 // Next PC. -|.define DISPATCH, r7 // Opcode dispatch table. -|.define LREG, r8 // Register holding lua_State (also in SAVE_L). -| -|// C callee-save in EABI, but often refetched. Temporary in iOS 3.0+. -|.define BASE, r9 // Base of current Lua stack frame. -| -|// The following temporaries are not saved across C calls, except for RA/RC. -|.define RA, r10 // Callee-save. -|.define RC, r11 // Callee-save. -|.define RB, r12 -|.define OP, r12 // Overlaps RB, must not be lr. -|.define INS, lr -| -|// Calling conventions. Also used as temporaries. -|.define CARG1, r0 -|.define CARG2, r1 -|.define CARG3, r2 -|.define CARG4, r3 -|.define CARG12, r0 // For 1st soft-fp double. -|.define CARG34, r2 // For 2nd soft-fp double. -| -|.define CRET1, r0 -|.define CRET2, r1 -| -|// Stack layout while in interpreter. Must match with lj_frame.h. -|.define SAVE_R4, [sp, #28] -|.define CFRAME_SPACE, #28 -|.define SAVE_ERRF, [sp, #24] -|.define SAVE_NRES, [sp, #20] -|.define SAVE_CFRAME, [sp, #16] -|.define SAVE_L, [sp, #12] -|.define SAVE_PC, [sp, #8] -|.define SAVE_MULTRES, [sp, #4] -|.define ARG5, [sp] -| -|.define TMPDhi, [sp, #4] -|.define TMPDlo, [sp] -|.define TMPD, [sp] -|.define TMPDp, sp -| -|.if FPU -|.macro saveregs -| push {r5, r6, r7, r8, r9, r10, r11, lr} -| vpush {d8-d15} -| sub sp, sp, CFRAME_SPACE+4 -| str r4, SAVE_R4 -|.endmacro -|.macro restoreregs_ret -| ldr r4, SAVE_R4 -| add sp, sp, CFRAME_SPACE+4 -| vpop {d8-d15} -| pop {r5, r6, r7, r8, r9, r10, r11, pc} -|.endmacro -|.else -|.macro saveregs -| push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -| sub sp, sp, CFRAME_SPACE -|.endmacro -|.macro restoreregs_ret -| add sp, sp, CFRAME_SPACE -| pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -|.endmacro -|.endif -| -|// Type definitions. Some of these are only used for documentation. -|.type L, lua_State, LREG -|.type GL, global_State -|.type TVALUE, TValue -|.type GCOBJ, GCobj -|.type STR, GCstr -|.type TAB, GCtab -|.type LFUNC, GCfuncL -|.type CFUNC, GCfuncC -|.type PROTO, GCproto -|.type UPVAL, GCupval -|.type NODE, Node -|.type NARGS8, int -|.type TRACE, GCtrace -|.type SBUF, SBuf -| -|//----------------------------------------------------------------------- -| -|// Trap for not-yet-implemented parts. -|.macro NYI; ud; .endmacro -| -|//----------------------------------------------------------------------- -| -|// Access to frame relative to BASE. -|.define FRAME_FUNC, #-8 -|.define FRAME_PC, #-4 -| -|.macro decode_RA8, dst, ins; and dst, MASKR8, ins, lsr #5; .endmacro -|.macro decode_RB8, dst, ins; and dst, MASKR8, ins, lsr #21; .endmacro -|.macro decode_RC8, dst, ins; and dst, MASKR8, ins, lsr #13; .endmacro -|.macro decode_RD, dst, ins; lsr dst, ins, #16; .endmacro -|.macro decode_OP, dst, ins; and dst, ins, #255; .endmacro -| -|// Instruction fetch. -|.macro ins_NEXT1 -| ldrb OP, [PC] -|.endmacro -|.macro ins_NEXT2 -| ldr INS, [PC], #4 -|.endmacro -|// Instruction decode+dispatch. -|.macro ins_NEXT3 -| ldr OP, [DISPATCH, OP, lsl #2] -| decode_RA8 RA, INS -| decode_RD RC, INS -| bx OP -|.endmacro -|.macro ins_NEXT -| ins_NEXT1 -| ins_NEXT2 -| ins_NEXT3 -|.endmacro -| -|// Instruction footer. -|.if 1 -| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. -| .define ins_next, ins_NEXT -| .define ins_next_, ins_NEXT -| .define ins_next1, ins_NEXT1 -| .define ins_next2, ins_NEXT2 -| .define ins_next3, ins_NEXT3 -|.else -| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. -| // Affects only certain kinds of benchmarks (and only with -j off). -| .macro ins_next -| b ->ins_next -| .endmacro -| .macro ins_next1 -| .endmacro -| .macro ins_next2 -| .endmacro -| .macro ins_next3 -| b ->ins_next -| .endmacro -| .macro ins_next_ -| ->ins_next: -| ins_NEXT -| .endmacro -|.endif -| -|// Avoid register name substitution for field name. -#define field_pc pc -| -|// Call decode and dispatch. -|.macro ins_callt -| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC -| ldr PC, LFUNC:CARG3->field_pc -| ldrb OP, [PC] // STALL: load PC. early PC. -| ldr INS, [PC], #4 -| ldr OP, [DISPATCH, OP, lsl #2] // STALL: load OP. early OP. -| decode_RA8 RA, INS -| add RA, RA, BASE -| bx OP -|.endmacro -| -|.macro ins_call -| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, PC = caller PC -| str PC, [BASE, FRAME_PC] -| ins_callt // STALL: locked PC. -|.endmacro -| -|//----------------------------------------------------------------------- -| -|// Macros to test operand types. -|.macro checktp, reg, tp; cmn reg, #-tp; .endmacro -|.macro checktpeq, reg, tp; cmneq reg, #-tp; .endmacro -|.macro checktpne, reg, tp; cmnne reg, #-tp; .endmacro -|.macro checkstr, reg, target; checktp reg, LJ_TSTR; bne target; .endmacro -|.macro checktab, reg, target; checktp reg, LJ_TTAB; bne target; .endmacro -|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC; bne target; .endmacro -| -|// Assumes DISPATCH is relative to GL. -#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) -#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) -| -#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) -| -|.macro hotcheck, delta -| lsr CARG1, PC, #1 -| and CARG1, CARG1, #126 -| sub CARG1, CARG1, #-GG_DISP2HOT -| ldrh CARG2, [DISPATCH, CARG1] -| subs CARG2, CARG2, #delta -| strh CARG2, [DISPATCH, CARG1] -|.endmacro -| -|.macro hotloop -| hotcheck HOTCOUNT_LOOP -| blo ->vm_hotloop -|.endmacro -| -|.macro hotcall -| hotcheck HOTCOUNT_CALL -| blo ->vm_hotcall -|.endmacro -| -|// Set current VM state. -|.macro mv_vmstate, reg, st; mvn reg, #LJ_VMST_..st; .endmacro -|.macro st_vmstate, reg; str reg, [DISPATCH, #DISPATCH_GL(vmstate)]; .endmacro -| -|// Move table write barrier back. Overwrites mark and tmp. -|.macro barrierback, tab, mark, tmp -| ldr tmp, [DISPATCH, #DISPATCH_GL(gc.grayagain)] -| bic mark, mark, #LJ_GC_BLACK // black2gray(tab) -| str tab, [DISPATCH, #DISPATCH_GL(gc.grayagain)] -| strb mark, tab->marked -| str tmp, tab->gclist -|.endmacro -| -|.macro .IOS, a, b -|.if IOS -| a, b -|.endif -|.endmacro -| -|//----------------------------------------------------------------------- - -#if !LJ_DUALNUM -#error "Only dual-number mode supported for ARM target" -#endif - -/* Generate subroutines used by opcodes and other parts of the VM. */ -/* The .code_sub section should be last to help static branch prediction. */ -static void build_subroutines(BuildCtx *ctx) -{ - |.code_sub - | - |//----------------------------------------------------------------------- - |//-- Return handling ---------------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_returnp: - | // See vm_return. Also: RB = previous base. - | tst PC, #FRAME_P - | beq ->cont_dispatch - | - | // Return from pcall or xpcall fast func. - | ldr PC, [RB, FRAME_PC] // Fetch PC of previous frame. - | mvn CARG2, #~LJ_TTRUE - | mov BASE, RB - | // Prepending may overwrite the pcall frame, so do it at the end. - | str CARG2, [RA, FRAME_PC] // Prepend true to results. - | sub RA, RA, #8 - | - |->vm_returnc: - | adds RC, RC, #8 // RC = (nresults+1)*8. - | mov CRET1, #LUA_YIELD - | beq ->vm_unwind_c_eh - | str RC, SAVE_MULTRES - | ands CARG1, PC, #FRAME_TYPE - | beq ->BC_RET_Z // Handle regular return to Lua. - | - |->vm_return: - | // BASE = base, RA = resultptr, RC/MULTRES = (nresults+1)*8, PC = return - | // CARG1 = PC & FRAME_TYPE - | bic RB, PC, #FRAME_TYPEP - | cmp CARG1, #FRAME_C - | sub RB, BASE, RB // RB = previous base. - | bne ->vm_returnp - | - | str RB, L->base - | ldr KBASE, SAVE_NRES - | mv_vmstate CARG4, C - | sub BASE, BASE, #8 - | subs CARG3, RC, #8 - | lsl KBASE, KBASE, #3 // KBASE = (nresults_wanted+1)*8 - | st_vmstate CARG4 - | beq >2 - |1: - | subs CARG3, CARG3, #8 - | ldrd CARG12, [RA], #8 - | strd CARG12, [BASE], #8 - | bne <1 - |2: - | cmp KBASE, RC // More/less results wanted? - | bne >6 - |3: - | str BASE, L->top // Store new top. - | - |->vm_leave_cp: - | ldr RC, SAVE_CFRAME // Restore previous C frame. - | mov CRET1, #0 // Ok return status for vm_pcall. - | str RC, L->cframe - | - |->vm_leave_unw: - | restoreregs_ret - | - |6: - | blt >7 // Less results wanted? - | // More results wanted. Check stack size and fill up results with nil. - | ldr CARG3, L->maxstack - | mvn CARG2, #~LJ_TNIL - | cmp BASE, CARG3 - | bhs >8 - | str CARG2, [BASE, #4] - | add RC, RC, #8 - | add BASE, BASE, #8 - | b <2 - | - |7: // Less results wanted. - | sub CARG1, RC, KBASE - | cmp KBASE, #0 // LUA_MULTRET+1 case? - | subne BASE, BASE, CARG1 // Either keep top or shrink it. - | b <3 - | - |8: // Corner case: need to grow stack for filling up results. - | // This can happen if: - | // - A C function grows the stack (a lot). - | // - The GC shrinks the stack in between. - | // - A return back from a lua_call() with (high) nresults adjustment. - | str BASE, L->top // Save current top held in BASE (yes). - | lsr CARG2, KBASE, #3 - | mov CARG1, L - | bl extern lj_state_growstack // (lua_State *L, int n) - | ldr BASE, L->top // Need the (realloced) L->top in BASE. - | b <2 - | - |->vm_unwind_c: // Unwind C stack, return from vm_pcall. - | // (void *cframe, int errcode) - | mov sp, CARG1 - | mov CRET1, CARG2 - |->vm_unwind_c_eh: // Landing pad for external unwinder. - | ldr L, SAVE_L - | mv_vmstate CARG4, C - | ldr GL:CARG3, L->glref - | str CARG4, GL:CARG3->vmstate - | b ->vm_leave_unw - | - |->vm_unwind_ff: // Unwind C stack, return from ff pcall. - | // (void *cframe) - | bic CARG1, CARG1, #~CFRAME_RAWMASK // Use two steps: bic sp is deprecated. - | mov sp, CARG1 - |->vm_unwind_ff_eh: // Landing pad for external unwinder. - | ldr L, SAVE_L - | mov MASKR8, #255 - | mov RC, #16 // 2 results: false + error message. - | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. - | ldr BASE, L->base - | ldr DISPATCH, L->glref // Setup pointer to dispatch table. - | mvn CARG1, #~LJ_TFALSE - | sub RA, BASE, #8 // Results start at BASE-8. - | ldr PC, [BASE, FRAME_PC] // Fetch PC of previous frame. - | add DISPATCH, DISPATCH, #GG_G2DISP - | mv_vmstate CARG2, INTERP - | str CARG1, [BASE, #-4] // Prepend false to error message. - | st_vmstate CARG2 - | b ->vm_returnc - | - |->vm_unwind_ext: // Complete external unwind. -#if !LJ_NO_UNWIND - | push {r0, r1, r2, lr} - | bl extern _Unwind_Complete - | ldr r0, [sp] - | bl extern _Unwind_DeleteException - | pop {r0, r1, r2, lr} - | mov r0, r1 - | bx r2 -#endif - | - |//----------------------------------------------------------------------- - |//-- Grow stack for calls ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_growstack_c: // Grow stack for C function. - | // CARG1 = L - | mov CARG2, #LUA_MINSTACK - | b >2 - | - |->vm_growstack_l: // Grow stack for Lua function. - | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC - | add RC, BASE, RC - | sub RA, RA, BASE - | mov CARG1, L - | str BASE, L->base - | add PC, PC, #4 // Must point after first instruction. - | str RC, L->top - | lsr CARG2, RA, #3 - |2: - | // L->base = new base, L->top = top - | str PC, SAVE_PC - | bl extern lj_state_growstack // (lua_State *L, int n) - | ldr BASE, L->base - | ldr RC, L->top - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] - | sub NARGS8:RC, RC, BASE - | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC - | ins_callt // Just retry the call. - | - |//----------------------------------------------------------------------- - |//-- Entry points into the assembler VM --------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_resume: // Setup C frame and resume thread. - | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) - | saveregs - | mov L, CARG1 - | ldr DISPATCH, L:CARG1->glref // Setup pointer to dispatch table. - | mov BASE, CARG2 - | add DISPATCH, DISPATCH, #GG_G2DISP - | str L, SAVE_L - | mov PC, #FRAME_CP - | str CARG3, SAVE_NRES - | add CARG2, sp, #CFRAME_RESUME - | ldrb CARG1, L->status - | str CARG3, SAVE_ERRF - | str L, SAVE_PC // Any value outside of bytecode is ok. - | str CARG3, SAVE_CFRAME - | cmp CARG1, #0 - | str CARG2, L->cframe - | beq >3 - | - | // Resume after yield (like a return). - | str L, [DISPATCH, #DISPATCH_GL(cur_L)] - | mov RA, BASE - | ldr BASE, L->base - | ldr CARG1, L->top - | mov MASKR8, #255 - | strb CARG3, L->status - | sub RC, CARG1, BASE - | ldr PC, [BASE, FRAME_PC] - | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. - | mv_vmstate CARG2, INTERP - | add RC, RC, #8 - | ands CARG1, PC, #FRAME_TYPE - | st_vmstate CARG2 - | str RC, SAVE_MULTRES - | beq ->BC_RET_Z - | b ->vm_return - | - |->vm_pcall: // Setup protected C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) - | saveregs - | mov PC, #FRAME_CP - | str CARG4, SAVE_ERRF - | b >1 - | - |->vm_call: // Setup C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1) - | saveregs - | mov PC, #FRAME_C - | - |1: // Entry point for vm_pcall above (PC = ftype). - | ldr RC, L:CARG1->cframe - | str CARG3, SAVE_NRES - | mov L, CARG1 - | str CARG1, SAVE_L - | ldr DISPATCH, L->glref // Setup pointer to dispatch table. - | mov BASE, CARG2 - | str CARG1, SAVE_PC // Any value outside of bytecode is ok. - | str RC, SAVE_CFRAME - | add DISPATCH, DISPATCH, #GG_G2DISP - | str sp, L->cframe // Add our C frame to cframe chain. - | - |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). - | str L, [DISPATCH, #DISPATCH_GL(cur_L)] - | ldr RB, L->base // RB = old base (for vmeta_call). - | ldr CARG1, L->top - | mov MASKR8, #255 - | add PC, PC, BASE - | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. - | sub PC, PC, RB // PC = frame delta + frame type - | mv_vmstate CARG2, INTERP - | sub NARGS8:RC, CARG1, BASE - | st_vmstate CARG2 - | - |->vm_call_dispatch: - | // RB = old base, BASE = new base, RC = nargs*8, PC = caller PC - | ldrd CARG34, [BASE, FRAME_FUNC] - | checkfunc CARG4, ->vmeta_call - | - |->vm_call_dispatch_f: - | ins_call - | // BASE = new base, CARG3 = func, RC = nargs*8, PC = caller PC - | - |->vm_cpcall: // Setup protected C frame, call C. - | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) - | saveregs - | mov L, CARG1 - | ldr RA, L:CARG1->stack - | str CARG1, SAVE_L - | ldr DISPATCH, L->glref // Setup pointer to dispatch table. - | ldr RB, L->top - | str CARG1, SAVE_PC // Any value outside of bytecode is ok. - | ldr RC, L->cframe - | add DISPATCH, DISPATCH, #GG_G2DISP - | sub RA, RA, RB // Compute -savestack(L, L->top). - | mov RB, #0 - | str RA, SAVE_NRES // Neg. delta means cframe w/o frame. - | str RB, SAVE_ERRF // No error function. - | str RC, SAVE_CFRAME - | str sp, L->cframe // Add our C frame to cframe chain. - | str L, [DISPATCH, #DISPATCH_GL(cur_L)] - | blx CARG4 // (lua_State *L, lua_CFunction func, void *ud) - | movs BASE, CRET1 - | mov PC, #FRAME_CP - | bne <3 // Else continue with the call. - | b ->vm_leave_cp // No base? Just remove C frame. - | - |//----------------------------------------------------------------------- - |//-- Metamethod handling ------------------------------------------------ - |//----------------------------------------------------------------------- - | - |//-- Continuation dispatch ---------------------------------------------- - | - |->cont_dispatch: - | // BASE = meta base, RA = resultptr, RC = (nresults+1)*8 - | ldr LFUNC:CARG3, [RB, FRAME_FUNC] - | ldr CARG1, [BASE, #-16] // Get continuation. - | mov CARG4, BASE - | mov BASE, RB // Restore caller BASE. - |.if FFI - | cmp CARG1, #1 - |.endif - | ldr PC, [CARG4, #-12] // Restore PC from [cont|PC]. - | ldr CARG3, LFUNC:CARG3->field_pc - | mvn INS, #~LJ_TNIL - | add CARG2, RA, RC - | str INS, [CARG2, #-4] // Ensure one valid arg. - |.if FFI - | bls >1 - |.endif - | ldr KBASE, [CARG3, #PC2PROTO(k)] - | // BASE = base, RA = resultptr, CARG4 = meta base - | bx CARG1 - | - |.if FFI - |1: - | beq ->cont_ffi_callback // cont = 1: return from FFI callback. - | // cont = 0: tailcall from C function. - | sub CARG4, CARG4, #16 - | sub RC, CARG4, BASE - | b ->vm_call_tail - |.endif - | - |->cont_cat: // RA = resultptr, CARG4 = meta base - | ldr INS, [PC, #-4] - | sub CARG2, CARG4, #16 - | ldrd CARG34, [RA] - | str BASE, L->base - | decode_RB8 RC, INS - | decode_RA8 RA, INS - | add CARG1, BASE, RC - | subs CARG1, CARG2, CARG1 - | strdne CARG34, [CARG2] - | movne CARG3, CARG1 - | bne ->BC_CAT_Z - | strd CARG34, [BASE, RA] - | b ->cont_nop - | - |//-- Table indexing metamethods ----------------------------------------- - | - |->vmeta_tgets1: - | add CARG2, BASE, RB - | b >2 - | - |->vmeta_tgets: - | sub CARG2, DISPATCH, #-DISPATCH_GL(tmptv) - | mvn CARG4, #~LJ_TTAB - | str TAB:RB, [CARG2] - | str CARG4, [CARG2, #4] - |2: - | mvn CARG4, #~LJ_TSTR - | str STR:RC, TMPDlo - | str CARG4, TMPDhi - | mov CARG3, TMPDp - | b >1 - | - |->vmeta_tgetb: // RC = index - | decode_RB8 RB, INS - | str RC, TMPDlo - | mvn CARG4, #~LJ_TISNUM - | add CARG2, BASE, RB - | str CARG4, TMPDhi - | mov CARG3, TMPDp - | b >1 - | - |->vmeta_tgetv: - | add CARG2, BASE, RB - | add CARG3, BASE, RC - |1: - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) - | // Returns TValue * (finished) or NULL (metamethod). - | .IOS ldr BASE, L->base - | cmp CRET1, #0 - | beq >3 - | ldrd CARG34, [CRET1] - | ins_next1 - | ins_next2 - | strd CARG34, [BASE, RA] - | ins_next3 - | - |3: // Call __index metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k - | rsb CARG1, BASE, #FRAME_CONT - | ldr BASE, L->top - | mov NARGS8:RC, #16 // 2 args for func(t, k). - | str PC, [BASE, #-12] // [cont|PC] - | add PC, CARG1, BASE - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. - | b ->vm_call_dispatch_f - | - |->vmeta_tgetr: - | .IOS mov RC, BASE - | bl extern lj_tab_getinth // (GCtab *t, int32_t key) - | // Returns cTValue * or NULL. - | .IOS mov BASE, RC - | cmp CRET1, #0 - | ldrdne CARG12, [CRET1] - | mvneq CARG2, #~LJ_TNIL - | b ->BC_TGETR_Z - | - |//----------------------------------------------------------------------- - | - |->vmeta_tsets1: - | add CARG2, BASE, RB - | b >2 - | - |->vmeta_tsets: - | sub CARG2, DISPATCH, #-DISPATCH_GL(tmptv) - | mvn CARG4, #~LJ_TTAB - | str TAB:RB, [CARG2] - | str CARG4, [CARG2, #4] - |2: - | mvn CARG4, #~LJ_TSTR - | str STR:RC, TMPDlo - | str CARG4, TMPDhi - | mov CARG3, TMPDp - | b >1 - | - |->vmeta_tsetb: // RC = index - | decode_RB8 RB, INS - | str RC, TMPDlo - | mvn CARG4, #~LJ_TISNUM - | add CARG2, BASE, RB - | str CARG4, TMPDhi - | mov CARG3, TMPDp - | b >1 - | - |->vmeta_tsetv: - | add CARG2, BASE, RB - | add CARG3, BASE, RC - |1: - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) - | // Returns TValue * (finished) or NULL (metamethod). - | .IOS ldr BASE, L->base - | cmp CRET1, #0 - | ldrd CARG34, [BASE, RA] - | beq >3 - | ins_next1 - | // NOBARRIER: lj_meta_tset ensures the table is not black. - | strd CARG34, [CRET1] - | ins_next2 - | ins_next3 - | - |3: // Call __newindex metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) - | rsb CARG1, BASE, #FRAME_CONT - | ldr BASE, L->top - | mov NARGS8:RC, #24 // 3 args for func(t, k, v). - | strd CARG34, [BASE, #16] // Copy value to third argument. - | str PC, [BASE, #-12] // [cont|PC] - | add PC, CARG1, BASE - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. - | b ->vm_call_dispatch_f - | - |->vmeta_tsetr: - | str BASE, L->base - | .IOS mov RC, BASE - | str PC, SAVE_PC - | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) - | // Returns TValue *. - | .IOS mov BASE, RC - | b ->BC_TSETR_Z - | - |//-- Comparison metamethods --------------------------------------------- - | - |->vmeta_comp: - | mov CARG1, L - | sub PC, PC, #4 - | mov CARG2, RA - | str BASE, L->base - | mov CARG3, RC - | str PC, SAVE_PC - | decode_OP CARG4, INS - | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) - | // Returns 0/1 or TValue * (metamethod). - |3: - | .IOS ldr BASE, L->base - | cmp CRET1, #1 - | bhi ->vmeta_binop - |4: - | ldrh RB, [PC, #2] - | add PC, PC, #4 - | add RB, PC, RB, lsl #2 - | subhs PC, RB, #0x20000 - |->cont_nop: - | ins_next - | - |->cont_ra: // RA = resultptr - | ldr INS, [PC, #-4] - | ldrd CARG12, [RA] - | decode_RA8 CARG3, INS - | strd CARG12, [BASE, CARG3] - | b ->cont_nop - | - |->cont_condt: // RA = resultptr - | ldr CARG2, [RA, #4] - | mvn CARG1, #~LJ_TTRUE - | cmp CARG1, CARG2 // Branch if result is true. - | b <4 - | - |->cont_condf: // RA = resultptr - | ldr CARG2, [RA, #4] - | checktp CARG2, LJ_TFALSE // Branch if result is false. - | b <4 - | - |->vmeta_equal: - | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV. - | sub PC, PC, #4 - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) - | // Returns 0/1 or TValue * (metamethod). - | b <3 - | - |->vmeta_equal_cd: - |.if FFI - | sub PC, PC, #4 - | str BASE, L->base - | mov CARG1, L - | mov CARG2, INS - | str PC, SAVE_PC - | bl extern lj_meta_equal_cd // (lua_State *L, BCIns op) - | // Returns 0/1 or TValue * (metamethod). - | b <3 - |.endif - | - |->vmeta_istype: - | sub PC, PC, #4 - | str BASE, L->base - | mov CARG1, L - | lsr CARG2, RA, #3 - | mov CARG3, RC - | str PC, SAVE_PC - | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) - | .IOS ldr BASE, L->base - | b ->cont_nop - | - |//-- Arithmetic metamethods --------------------------------------------- - | - |->vmeta_arith_vn: - | decode_RB8 RB, INS - | decode_RC8 RC, INS - | add CARG3, BASE, RB - | add CARG4, KBASE, RC - | b >1 - | - |->vmeta_arith_nv: - | decode_RB8 RB, INS - | decode_RC8 RC, INS - | add CARG4, BASE, RB - | add CARG3, KBASE, RC - | b >1 - | - |->vmeta_unm: - | ldr INS, [PC, #-8] - | sub PC, PC, #4 - | add CARG3, BASE, RC - | add CARG4, BASE, RC - | b >1 - | - |->vmeta_arith_vv: - | decode_RB8 RB, INS - | decode_RC8 RC, INS - | add CARG3, BASE, RB - | add CARG4, BASE, RC - |1: - | decode_OP OP, INS - | add CARG2, BASE, RA - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | str OP, ARG5 - | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) - | // Returns NULL (finished) or TValue * (metamethod). - | .IOS ldr BASE, L->base - | cmp CRET1, #0 - | beq ->cont_nop - | - | // Call metamethod for binary op. - |->vmeta_binop: - | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 - | sub CARG2, CRET1, BASE - | str PC, [CRET1, #-12] // [cont|PC] - | add PC, CARG2, #FRAME_CONT - | mov BASE, CRET1 - | mov NARGS8:RC, #16 // 2 args for func(o1, o2). - | b ->vm_call_dispatch - | - |->vmeta_len: - | add CARG2, BASE, RC - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_meta_len // (lua_State *L, TValue *o) - | // Returns NULL (retry) or TValue * (metamethod base). - | .IOS ldr BASE, L->base -#if LJ_52 - | cmp CRET1, #0 - | bne ->vmeta_binop // Binop call for compatibility. - | ldr TAB:CARG1, [BASE, RC] - | b ->BC_LEN_Z -#else - | b ->vmeta_binop // Binop call for compatibility. -#endif - | - |//-- Call metamethod ---------------------------------------------------- - | - |->vmeta_call: // Resolve and call __call metamethod. - | // RB = old base, BASE = new base, RC = nargs*8 - | mov CARG1, L - | str RB, L->base // This is the callers base! - | sub CARG2, BASE, #8 - | str PC, SAVE_PC - | add CARG3, BASE, NARGS8:RC - | .IOS mov RA, BASE - | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) - | .IOS mov BASE, RA - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. - | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now. - | ins_call - | - |->vmeta_callt: // Resolve __call for BC_CALLT. - | // BASE = old base, RA = new base, RC = nargs*8 - | mov CARG1, L - | str BASE, L->base - | sub CARG2, RA, #8 - | str PC, SAVE_PC - | add CARG3, RA, NARGS8:RC - | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) - | .IOS ldr BASE, L->base - | ldr LFUNC:CARG3, [RA, FRAME_FUNC] // Guaranteed to be a function here. - | ldr PC, [BASE, FRAME_PC] - | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now. - | b ->BC_CALLT2_Z - | - |//-- Argument coercion for 'for' statement ------------------------------ - | - |->vmeta_for: - | mov CARG1, L - | str BASE, L->base - | mov CARG2, RA - | str PC, SAVE_PC - | bl extern lj_meta_for // (lua_State *L, TValue *base) - | .IOS ldr BASE, L->base - |.if JIT - | ldrb OP, [PC, #-4] - |.endif - | ldr INS, [PC, #-4] - |.if JIT - | cmp OP, #BC_JFORI - |.endif - | decode_RA8 RA, INS - | decode_RD RC, INS - |.if JIT - | beq =>BC_JFORI - |.endif - | b =>BC_FORI - | - |//----------------------------------------------------------------------- - |//-- Fast functions ----------------------------------------------------- - |//----------------------------------------------------------------------- - | - |.macro .ffunc, name - |->ff_ .. name: - |.endmacro - | - |.macro .ffunc_1, name - |->ff_ .. name: - | ldrd CARG12, [BASE] - | cmp NARGS8:RC, #8 - | blo ->fff_fallback - |.endmacro - | - |.macro .ffunc_2, name - |->ff_ .. name: - | ldrd CARG12, [BASE] - | ldrd CARG34, [BASE, #8] - | cmp NARGS8:RC, #16 - | blo ->fff_fallback - |.endmacro - | - |.macro .ffunc_n, name - | .ffunc_1 name - | checktp CARG2, LJ_TISNUM - | bhs ->fff_fallback - |.endmacro - | - |.macro .ffunc_nn, name - | .ffunc_2 name - | checktp CARG2, LJ_TISNUM - | cmnlo CARG4, #-LJ_TISNUM - | bhs ->fff_fallback - |.endmacro - | - |.macro .ffunc_d, name - | .ffunc name - | ldr CARG2, [BASE, #4] - | cmp NARGS8:RC, #8 - | vldr d0, [BASE] - | blo ->fff_fallback - | checktp CARG2, LJ_TISNUM - | bhs ->fff_fallback - |.endmacro - | - |.macro .ffunc_dd, name - | .ffunc name - | ldr CARG2, [BASE, #4] - | ldr CARG4, [BASE, #12] - | cmp NARGS8:RC, #16 - | vldr d0, [BASE] - | vldr d1, [BASE, #8] - | blo ->fff_fallback - | checktp CARG2, LJ_TISNUM - | cmnlo CARG4, #-LJ_TISNUM - | bhs ->fff_fallback - |.endmacro - | - |// Inlined GC threshold check. Caveat: uses CARG1 and CARG2. - |.macro ffgccheck - | ldr CARG1, [DISPATCH, #DISPATCH_GL(gc.total)] - | ldr CARG2, [DISPATCH, #DISPATCH_GL(gc.threshold)] - | cmp CARG1, CARG2 - | blge ->fff_gcstep - |.endmacro - | - |//-- Base library: checks ----------------------------------------------- - | - |.ffunc_1 assert - | checktp CARG2, LJ_TTRUE - | bhi ->fff_fallback - | ldr PC, [BASE, FRAME_PC] - | strd CARG12, [BASE, #-8] - | mov RB, BASE - | subs RA, NARGS8:RC, #8 - | add RC, NARGS8:RC, #8 // Compute (nresults+1)*8. - | beq ->fff_res // Done if exactly 1 argument. - |1: - | ldrd CARG12, [RB, #8] - | subs RA, RA, #8 - | strd CARG12, [RB], #8 - | bne <1 - | b ->fff_res - | - |.ffunc type - | ldr CARG2, [BASE, #4] - | cmp NARGS8:RC, #8 - | blo ->fff_fallback - | checktp CARG2, LJ_TISNUM - | mvnlo CARG2, #~LJ_TISNUM - | rsb CARG4, CARG2, #(int)(offsetof(GCfuncC, upvalue)>>3)-1 - | lsl CARG4, CARG4, #3 - | ldrd CARG12, [CFUNC:CARG3, CARG4] - | b ->fff_restv - | - |//-- Base library: getters and setters --------------------------------- - | - |.ffunc_1 getmetatable - | checktp CARG2, LJ_TTAB - | cmnne CARG2, #-LJ_TUDATA - | bne >6 - |1: // Field metatable must be at same offset for GCtab and GCudata! - | ldr TAB:RB, TAB:CARG1->metatable - |2: - | mvn CARG2, #~LJ_TNIL - | ldr STR:RC, [DISPATCH, #DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])] - | cmp TAB:RB, #0 - | beq ->fff_restv - | ldr CARG3, TAB:RB->hmask - | ldr CARG4, STR:RC->hash - | ldr NODE:INS, TAB:RB->node - | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask - | add CARG3, CARG3, CARG3, lsl #1 - | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 - |3: // Rearranged logic, because we expect _not_ to find the key. - | ldrd CARG34, NODE:INS->key // STALL: early NODE:INS. - | ldrd CARG12, NODE:INS->val - | ldr NODE:INS, NODE:INS->next - | checktp CARG4, LJ_TSTR - | cmpeq CARG3, STR:RC - | beq >5 - | cmp NODE:INS, #0 - | bne <3 - |4: - | mov CARG1, RB // Use metatable as default result. - | mvn CARG2, #~LJ_TTAB - | b ->fff_restv - |5: - | checktp CARG2, LJ_TNIL - | bne ->fff_restv - | b <4 - | - |6: - | checktp CARG2, LJ_TISNUM - | mvnhs CARG2, CARG2 - | movlo CARG2, #~LJ_TISNUM - | add CARG4, DISPATCH, CARG2, lsl #2 - | ldr TAB:RB, [CARG4, #DISPATCH_GL(gcroot[GCROOT_BASEMT])] - | b <2 - | - |.ffunc_2 setmetatable - | // Fast path: no mt for table yet and not clearing the mt. - | checktp CARG2, LJ_TTAB - | ldreq TAB:RB, TAB:CARG1->metatable - | checktpeq CARG4, LJ_TTAB - | ldrbeq CARG4, TAB:CARG1->marked - | cmpeq TAB:RB, #0 - | bne ->fff_fallback - | tst CARG4, #LJ_GC_BLACK // isblack(table) - | str TAB:CARG3, TAB:CARG1->metatable - | beq ->fff_restv - | barrierback TAB:CARG1, CARG4, CARG3 - | b ->fff_restv - | - |.ffunc rawget - | ldrd CARG34, [BASE] - | cmp NARGS8:RC, #16 - | blo ->fff_fallback - | mov CARG2, CARG3 - | checktab CARG4, ->fff_fallback - | mov CARG1, L - | add CARG3, BASE, #8 - | .IOS mov RA, BASE - | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) - | // Returns cTValue *. - | .IOS mov BASE, RA - | ldrd CARG12, [CRET1] - | b ->fff_restv - | - |//-- Base library: conversions ------------------------------------------ - | - |.ffunc tonumber - | // Only handles the number case inline (without a base argument). - | ldrd CARG12, [BASE] - | cmp NARGS8:RC, #8 - | bne ->fff_fallback - | checktp CARG2, LJ_TISNUM - | bls ->fff_restv - | b ->fff_fallback - | - |.ffunc_1 tostring - | // Only handles the string or number case inline. - | checktp CARG2, LJ_TSTR - | // A __tostring method in the string base metatable is ignored. - | beq ->fff_restv - | // Handle numbers inline, unless a number base metatable is present. - | ldr CARG4, [DISPATCH, #DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])] - | str BASE, L->base - | checktp CARG2, LJ_TISNUM - | cmpls CARG4, #0 - | str PC, SAVE_PC // Redundant (but a defined value). - | bhi ->fff_fallback - | ffgccheck - | mov CARG1, L - | mov CARG2, BASE - | bl extern lj_strfmt_number // (lua_State *L, cTValue *o) - | // Returns GCstr *. - | ldr BASE, L->base - | mvn CARG2, #~LJ_TSTR - | b ->fff_restv - | - |//-- Base library: iterators ------------------------------------------- - | - |.ffunc_1 next - | mvn CARG4, #~LJ_TNIL - | checktab CARG2, ->fff_fallback - | strd CARG34, [BASE, NARGS8:RC] // Set missing 2nd arg to nil. - | ldr PC, [BASE, FRAME_PC] - | mov CARG2, CARG1 - | str BASE, L->base // Add frame since C call can throw. - | mov CARG1, L - | str BASE, L->top // Dummy frame length is ok. - | add CARG3, BASE, #8 - | str PC, SAVE_PC - | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) - | // Returns 0 at end of traversal. - | .IOS ldr BASE, L->base - | cmp CRET1, #0 - | mvneq CRET2, #~LJ_TNIL - | beq ->fff_restv // End of traversal: return nil. - | ldrd CARG12, [BASE, #8] // Copy key and value to results. - | ldrd CARG34, [BASE, #16] - | mov RC, #(2+1)*8 - | strd CARG12, [BASE, #-8] - | strd CARG34, [BASE] - | b ->fff_res - | - |.ffunc_1 pairs - | checktab CARG2, ->fff_fallback -#if LJ_52 - | ldr TAB:RB, TAB:CARG1->metatable -#endif - | ldrd CFUNC:CARG34, CFUNC:CARG3->upvalue[0] - | ldr PC, [BASE, FRAME_PC] -#if LJ_52 - | cmp TAB:RB, #0 - | bne ->fff_fallback -#endif - | mvn CARG2, #~LJ_TNIL - | mov RC, #(3+1)*8 - | strd CFUNC:CARG34, [BASE, #-8] - | str CARG2, [BASE, #12] - | b ->fff_res - | - |.ffunc_2 ipairs_aux - | checktp CARG2, LJ_TTAB - | checktpeq CARG4, LJ_TISNUM - | bne ->fff_fallback - | ldr RB, TAB:CARG1->asize - | ldr RC, TAB:CARG1->array - | add CARG3, CARG3, #1 - | ldr PC, [BASE, FRAME_PC] - | cmp CARG3, RB - | add RC, RC, CARG3, lsl #3 - | strd CARG34, [BASE, #-8] - | ldrdlo CARG12, [RC] - | mov RC, #(0+1)*8 - | bhs >2 // Not in array part? - |1: - | checktp CARG2, LJ_TNIL - | movne RC, #(2+1)*8 - | strdne CARG12, [BASE] - | b ->fff_res - |2: // Check for empty hash part first. Otherwise call C function. - | ldr RB, TAB:CARG1->hmask - | mov CARG2, CARG3 - | cmp RB, #0 - | beq ->fff_res - | .IOS mov RA, BASE - | bl extern lj_tab_getinth // (GCtab *t, int32_t key) - | // Returns cTValue * or NULL. - | .IOS mov BASE, RA - | cmp CRET1, #0 - | beq ->fff_res - | ldrd CARG12, [CRET1] - | b <1 - | - |.ffunc_1 ipairs - | checktab CARG2, ->fff_fallback -#if LJ_52 - | ldr TAB:RB, TAB:CARG1->metatable -#endif - | ldrd CFUNC:CARG34, CFUNC:CARG3->upvalue[0] - | ldr PC, [BASE, FRAME_PC] -#if LJ_52 - | cmp TAB:RB, #0 - | bne ->fff_fallback -#endif - | mov CARG1, #0 - | mvn CARG2, #~LJ_TISNUM - | mov RC, #(3+1)*8 - | strd CFUNC:CARG34, [BASE, #-8] - | strd CARG12, [BASE, #8] - | b ->fff_res - | - |//-- Base library: catch errors ---------------------------------------- - | - |.ffunc pcall - | ldrb RA, [DISPATCH, #DISPATCH_GL(hookmask)] - | cmp NARGS8:RC, #8 - | blo ->fff_fallback - | tst RA, #HOOK_ACTIVE // Remember active hook before pcall. - | mov RB, BASE - | add BASE, BASE, #8 - | moveq PC, #8+FRAME_PCALL - | movne PC, #8+FRAME_PCALLH - | sub NARGS8:RC, NARGS8:RC, #8 - | b ->vm_call_dispatch - | - |.ffunc_2 xpcall - | ldrb RA, [DISPATCH, #DISPATCH_GL(hookmask)] - | checkfunc CARG4, ->fff_fallback // Traceback must be a function. - | mov RB, BASE - | strd CARG12, [BASE, #8] // Swap function and traceback. - | strd CARG34, [BASE] - | tst RA, #HOOK_ACTIVE // Remember active hook before pcall. - | add BASE, BASE, #16 - | moveq PC, #16+FRAME_PCALL - | movne PC, #16+FRAME_PCALLH - | sub NARGS8:RC, NARGS8:RC, #16 - | b ->vm_call_dispatch - | - |//-- Coroutine library -------------------------------------------------- - | - |.macro coroutine_resume_wrap, resume - |.if resume - |.ffunc_1 coroutine_resume - | checktp CARG2, LJ_TTHREAD - | bne ->fff_fallback - |.else - |.ffunc coroutine_wrap_aux - | ldr L:CARG1, CFUNC:CARG3->upvalue[0].gcr - |.endif - | ldr PC, [BASE, FRAME_PC] - | str BASE, L->base - | ldr CARG2, L:CARG1->top - | ldrb RA, L:CARG1->status - | ldr RB, L:CARG1->base - | add CARG3, CARG2, NARGS8:RC - | add CARG4, CARG2, RA - | str PC, SAVE_PC - | cmp CARG4, RB - | beq ->fff_fallback - | ldr CARG4, L:CARG1->maxstack - | ldr RB, L:CARG1->cframe - | cmp RA, #LUA_YIELD - | cmpls CARG3, CARG4 - | cmpls RB, #0 - | bhi ->fff_fallback - |1: - |.if resume - | sub CARG3, CARG3, #8 // Keep resumed thread in stack for GC. - | add BASE, BASE, #8 - | sub NARGS8:RC, NARGS8:RC, #8 - |.endif - | str CARG3, L:CARG1->top - | str BASE, L->top - |2: // Move args to coroutine. - | ldrd CARG34, [BASE, RB] - | cmp RB, NARGS8:RC - | strdne CARG34, [CARG2, RB] - | add RB, RB, #8 - | bne <2 - | - | mov CARG3, #0 - | mov L:RA, L:CARG1 - | mov CARG4, #0 - | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0) - | // Returns thread status. - |4: - | ldr CARG3, L:RA->base - | mv_vmstate CARG2, INTERP - | ldr CARG4, L:RA->top - | cmp CRET1, #LUA_YIELD - | ldr BASE, L->base - | str L, [DISPATCH, #DISPATCH_GL(cur_L)] - | st_vmstate CARG2 - | bhi >8 - | subs RC, CARG4, CARG3 - | ldr CARG1, L->maxstack - | add CARG2, BASE, RC - | beq >6 // No results? - | cmp CARG2, CARG1 - | mov RB, #0 - | bhi >9 // Need to grow stack? - | - | sub CARG4, RC, #8 - | str CARG3, L:RA->top // Clear coroutine stack. - |5: // Move results from coroutine. - | ldrd CARG12, [CARG3, RB] - | cmp RB, CARG4 - | strd CARG12, [BASE, RB] - | add RB, RB, #8 - | bne <5 - |6: - |.if resume - | mvn CARG3, #~LJ_TTRUE - | add RC, RC, #16 - |7: - | str CARG3, [BASE, #-4] // Prepend true/false to results. - | sub RA, BASE, #8 - |.else - | mov RA, BASE - | add RC, RC, #8 - |.endif - | ands CARG1, PC, #FRAME_TYPE - | str PC, SAVE_PC - | str RC, SAVE_MULTRES - | beq ->BC_RET_Z - | b ->vm_return - | - |8: // Coroutine returned with error (at co->top-1). - |.if resume - | ldrd CARG12, [CARG4, #-8]! - | mvn CARG3, #~LJ_TFALSE - | mov RC, #(2+1)*8 - | str CARG4, L:RA->top // Remove error from coroutine stack. - | strd CARG12, [BASE] // Copy error message. - | b <7 - |.else - | mov CARG1, L - | mov CARG2, L:RA - | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) - | // Never returns. - |.endif - | - |9: // Handle stack expansion on return from yield. - | mov CARG1, L - | lsr CARG2, RC, #3 - | bl extern lj_state_growstack // (lua_State *L, int n) - | mov CRET1, #0 - | b <4 - |.endmacro - | - | coroutine_resume_wrap 1 // coroutine.resume - | coroutine_resume_wrap 0 // coroutine.wrap - | - |.ffunc coroutine_yield - | ldr CARG1, L->cframe - | add CARG2, BASE, NARGS8:RC - | str BASE, L->base - | tst CARG1, #CFRAME_RESUME - | str CARG2, L->top - | mov CRET1, #LUA_YIELD - | mov CARG3, #0 - | beq ->fff_fallback - | str CARG3, L->cframe - | strb CRET1, L->status - | b ->vm_leave_unw - | - |//-- Math library ------------------------------------------------------- - | - |.macro math_round, func - | .ffunc_1 math_ .. func - | checktp CARG2, LJ_TISNUM - | beq ->fff_restv - | bhi ->fff_fallback - | // Round FP value and normalize result. - | lsl CARG3, CARG2, #1 - | adds RB, CARG3, #0x00200000 - | bpl >2 // |x| < 1? - | mvn CARG4, #0x3e0 - | subs RB, CARG4, RB, asr #21 - | lsl CARG4, CARG2, #11 - | lsl CARG3, CARG1, #11 - | orr CARG4, CARG4, #0x80000000 - | rsb INS, RB, #32 - | orr CARG4, CARG4, CARG1, lsr #21 - | bls >3 // |x| >= 2^31? - | orr CARG3, CARG3, CARG4, lsl INS - | lsr CARG1, CARG4, RB - |.if "func" == "floor" - | tst CARG3, CARG2, asr #31 - | addne CARG1, CARG1, #1 - |.else - | bics CARG3, CARG3, CARG2, asr #31 - | addsne CARG1, CARG1, #1 - | ldrdvs CARG12, >9 - | bvs ->fff_restv - |.endif - | cmp CARG2, #0 - | rsblt CARG1, CARG1, #0 - |1: - | mvn CARG2, #~LJ_TISNUM - | b ->fff_restv - | - |2: // |x| < 1 - | bcs ->fff_restv // |x| is not finite. - | orr CARG3, CARG3, CARG1 // ztest = abs(hi) | lo - |.if "func" == "floor" - | tst CARG3, CARG2, asr #31 // return (ztest & sign) == 0 ? 0 : -1 - | moveq CARG1, #0 - | mvnne CARG1, #0 - |.else - | bics CARG3, CARG3, CARG2, asr #31 // return (ztest & ~sign) == 0 ? 0 : 1 - | moveq CARG1, #0 - | movne CARG1, #1 - |.endif - | mvn CARG2, #~LJ_TISNUM - | b ->fff_restv - | - |3: // |x| >= 2^31. Check for x == -(2^31). - | cmpeq CARG4, #0x80000000 - |.if "func" == "floor" - | cmpeq CARG3, #0 - |.endif - | bne >4 - | cmp CARG2, #0 - | movmi CARG1, #0x80000000 - | bmi <1 - |4: - | bl ->vm_..func.._sf - | b ->fff_restv - |.endmacro - | - | math_round floor - | math_round ceil - | - |.align 8 - |9: - | .long 0x00000000, 0x41e00000 // 2^31. - | - |.ffunc_1 math_abs - | checktp CARG2, LJ_TISNUM - | bhi ->fff_fallback - | bicne CARG2, CARG2, #0x80000000 - | bne ->fff_restv - | cmp CARG1, #0 - | rsbslt CARG1, CARG1, #0 - | ldrdvs CARG12, <9 - | // Fallthrough. - | - |->fff_restv: - | // CARG12 = TValue result. - | ldr PC, [BASE, FRAME_PC] - | strd CARG12, [BASE, #-8] - |->fff_res1: - | // PC = return. - | mov RC, #(1+1)*8 - |->fff_res: - | // RC = (nresults+1)*8, PC = return. - | ands CARG1, PC, #FRAME_TYPE - | ldreq INS, [PC, #-4] - | str RC, SAVE_MULTRES - | sub RA, BASE, #8 - | bne ->vm_return - | decode_RB8 RB, INS - |5: - | cmp RB, RC // More results expected? - | bhi >6 - | decode_RA8 CARG1, INS - | ins_next1 - | ins_next2 - | // Adjust BASE. KBASE is assumed to be set for the calling frame. - | sub BASE, RA, CARG1 - | ins_next3 - | - |6: // Fill up results with nil. - | add CARG2, RA, RC - | mvn CARG1, #~LJ_TNIL - | add RC, RC, #8 - | str CARG1, [CARG2, #-4] - | b <5 - | - |.macro math_extern, func - |.if HFABI - | .ffunc_d math_ .. func - |.else - | .ffunc_n math_ .. func - |.endif - | .IOS mov RA, BASE - | bl extern func - | .IOS mov BASE, RA - |.if HFABI - | b ->fff_resd - |.else - | b ->fff_restv - |.endif - |.endmacro - | - |.macro math_extern2, func - |.if HFABI - | .ffunc_dd math_ .. func - |.else - | .ffunc_nn math_ .. func - |.endif - | .IOS mov RA, BASE - | bl extern func - | .IOS mov BASE, RA - |.if HFABI - | b ->fff_resd - |.else - | b ->fff_restv - |.endif - |.endmacro - | - |.if FPU - | .ffunc_d math_sqrt - | vsqrt.f64 d0, d0 - |->fff_resd: - | ldr PC, [BASE, FRAME_PC] - | vstr d0, [BASE, #-8] - | b ->fff_res1 - |.else - | math_extern sqrt - |.endif - | - |.ffunc math_log - |.if HFABI - | ldr CARG2, [BASE, #4] - | cmp NARGS8:RC, #8 // Need exactly 1 argument. - | vldr d0, [BASE] - | bne ->fff_fallback - |.else - | ldrd CARG12, [BASE] - | cmp NARGS8:RC, #8 // Need exactly 1 argument. - | bne ->fff_fallback - |.endif - | checktp CARG2, LJ_TISNUM - | bhs ->fff_fallback - | .IOS mov RA, BASE - | bl extern log - | .IOS mov BASE, RA - |.if HFABI - | b ->fff_resd - |.else - | b ->fff_restv - |.endif - | - | math_extern log10 - | math_extern exp - | math_extern sin - | math_extern cos - | math_extern tan - | math_extern asin - | math_extern acos - | math_extern atan - | math_extern sinh - | math_extern cosh - | math_extern tanh - | math_extern2 pow - | math_extern2 atan2 - | math_extern2 fmod - | - |.if HFABI - | .ffunc math_ldexp - | ldr CARG4, [BASE, #4] - | ldrd CARG12, [BASE, #8] - | cmp NARGS8:RC, #16 - | blo ->fff_fallback - | vldr d0, [BASE] - | checktp CARG4, LJ_TISNUM - | bhs ->fff_fallback - | checktp CARG2, LJ_TISNUM - | bne ->fff_fallback - | .IOS mov RA, BASE - | bl extern ldexp // (double x, int exp) - | .IOS mov BASE, RA - | b ->fff_resd - |.else - |.ffunc_2 math_ldexp - | checktp CARG2, LJ_TISNUM - | bhs ->fff_fallback - | checktp CARG4, LJ_TISNUM - | bne ->fff_fallback - | .IOS mov RA, BASE - | bl extern ldexp // (double x, int exp) - | .IOS mov BASE, RA - | b ->fff_restv - |.endif - | - |.if HFABI - |.ffunc_d math_frexp - | mov CARG1, sp - | .IOS mov RA, BASE - | bl extern frexp - | .IOS mov BASE, RA - | ldr CARG3, [sp] - | mvn CARG4, #~LJ_TISNUM - | ldr PC, [BASE, FRAME_PC] - | vstr d0, [BASE, #-8] - | mov RC, #(2+1)*8 - | strd CARG34, [BASE] - | b ->fff_res - |.else - |.ffunc_n math_frexp - | mov CARG3, sp - | .IOS mov RA, BASE - | bl extern frexp - | .IOS mov BASE, RA - | ldr CARG3, [sp] - | mvn CARG4, #~LJ_TISNUM - | ldr PC, [BASE, FRAME_PC] - | strd CARG12, [BASE, #-8] - | mov RC, #(2+1)*8 - | strd CARG34, [BASE] - | b ->fff_res - |.endif - | - |.if HFABI - |.ffunc_d math_modf - | sub CARG1, BASE, #8 - | ldr PC, [BASE, FRAME_PC] - | .IOS mov RA, BASE - | bl extern modf - | .IOS mov BASE, RA - | mov RC, #(2+1)*8 - | vstr d0, [BASE] - | b ->fff_res - |.else - |.ffunc_n math_modf - | sub CARG3, BASE, #8 - | ldr PC, [BASE, FRAME_PC] - | .IOS mov RA, BASE - | bl extern modf - | .IOS mov BASE, RA - | mov RC, #(2+1)*8 - | strd CARG12, [BASE] - | b ->fff_res - |.endif - | - |.macro math_minmax, name, cond, fcond - |.if FPU - | .ffunc_1 name - | add RB, BASE, RC - | checktp CARG2, LJ_TISNUM - | add RA, BASE, #8 - | bne >4 - |1: // Handle integers. - | ldrd CARG34, [RA] - | cmp RA, RB - | bhs ->fff_restv - | checktp CARG4, LJ_TISNUM - | bne >3 - | cmp CARG1, CARG3 - | add RA, RA, #8 - | mov..cond CARG1, CARG3 - | b <1 - |3: // Convert intermediate result to number and continue below. - | vmov s4, CARG1 - | bhi ->fff_fallback - | vldr d1, [RA] - | vcvt.f64.s32 d0, s4 - | b >6 - | - |4: - | vldr d0, [BASE] - | bhi ->fff_fallback - |5: // Handle numbers. - | ldrd CARG34, [RA] - | vldr d1, [RA] - | cmp RA, RB - | bhs ->fff_resd - | checktp CARG4, LJ_TISNUM - | bhs >7 - |6: - | vcmp.f64 d0, d1 - | vmrs - | add RA, RA, #8 - | vmov..fcond.f64 d0, d1 - | b <5 - |7: // Convert integer to number and continue above. - | vmov s4, CARG3 - | bhi ->fff_fallback - | vcvt.f64.s32 d1, s4 - | b <6 - | - |.else - | - | .ffunc_1 name - | checktp CARG2, LJ_TISNUM - | mov RA, #8 - | bne >4 - |1: // Handle integers. - | ldrd CARG34, [BASE, RA] - | cmp RA, RC - | bhs ->fff_restv - | checktp CARG4, LJ_TISNUM - | bne >3 - | cmp CARG1, CARG3 - | add RA, RA, #8 - | mov..cond CARG1, CARG3 - | b <1 - |3: // Convert intermediate result to number and continue below. - | bhi ->fff_fallback - | bl extern __aeabi_i2d - | ldrd CARG34, [BASE, RA] - | b >6 - | - |4: - | bhi ->fff_fallback - |5: // Handle numbers. - | ldrd CARG34, [BASE, RA] - | cmp RA, RC - | bhs ->fff_restv - | checktp CARG4, LJ_TISNUM - | bhs >7 - |6: - | bl extern __aeabi_cdcmple - | add RA, RA, #8 - | mov..fcond CARG1, CARG3 - | mov..fcond CARG2, CARG4 - | b <5 - |7: // Convert integer to number and continue above. - | bhi ->fff_fallback - | strd CARG12, TMPD - | mov CARG1, CARG3 - | bl extern __aeabi_i2d - | ldrd CARG34, TMPD - | b <6 - |.endif - |.endmacro - | - | math_minmax math_min, gt, hi - | math_minmax math_max, lt, lo - | - |//-- String library ----------------------------------------------------- - | - |.ffunc string_byte // Only handle the 1-arg case here. - | ldrd CARG12, [BASE] - | ldr PC, [BASE, FRAME_PC] - | cmp NARGS8:RC, #8 - | checktpeq CARG2, LJ_TSTR // Need exactly 1 argument. - | bne ->fff_fallback - | ldr CARG3, STR:CARG1->len - | ldrb CARG1, STR:CARG1[1] // Access is always ok (NUL at end). - | mvn CARG2, #~LJ_TISNUM - | cmp CARG3, #0 - | moveq RC, #(0+1)*8 - | movne RC, #(1+1)*8 - | strd CARG12, [BASE, #-8] - | b ->fff_res - | - |.ffunc string_char // Only handle the 1-arg case here. - | ffgccheck - | ldrd CARG12, [BASE] - | ldr PC, [BASE, FRAME_PC] - | cmp NARGS8:RC, #8 // Need exactly 1 argument. - | checktpeq CARG2, LJ_TISNUM - | bicseq CARG4, CARG1, #255 - | mov CARG3, #1 - | bne ->fff_fallback - | str CARG1, TMPD - | mov CARG2, TMPDp // Points to stack. Little-endian. - |->fff_newstr: - | // CARG2 = str, CARG3 = len. - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_str_new // (lua_State *L, char *str, size_t l) - |->fff_resstr: - | // Returns GCstr *. - | ldr BASE, L->base - | mvn CARG2, #~LJ_TSTR - | b ->fff_restv - | - |.ffunc string_sub - | ffgccheck - | ldrd CARG12, [BASE] - | ldrd CARG34, [BASE, #16] - | cmp NARGS8:RC, #16 - | mvn RB, #0 - | beq >1 - | blo ->fff_fallback - | checktp CARG4, LJ_TISNUM - | mov RB, CARG3 - | bne ->fff_fallback - |1: - | ldrd CARG34, [BASE, #8] - | checktp CARG2, LJ_TSTR - | ldreq CARG2, STR:CARG1->len - | checktpeq CARG4, LJ_TISNUM - | bne ->fff_fallback - | // CARG1 = str, CARG2 = str->len, CARG3 = start, RB = end - | add CARG4, CARG2, #1 - | cmp CARG3, #0 // if (start < 0) start += len+1 - | addlt CARG3, CARG3, CARG4 - | cmp CARG3, #1 // if (start < 1) start = 1 - | movlt CARG3, #1 - | cmp RB, #0 // if (end < 0) end += len+1 - | addlt RB, RB, CARG4 - | bic RB, RB, RB, asr #31 // if (end < 0) end = 0 - | cmp RB, CARG2 // if (end > len) end = len - | add CARG1, STR:CARG1, #sizeof(GCstr)-1 - | movgt RB, CARG2 - | add CARG2, CARG1, CARG3 - | subs CARG3, RB, CARG3 // len = end - start - | add CARG3, CARG3, #1 // len += 1 - | bge ->fff_newstr - |->fff_emptystr: - | sub STR:CARG1, DISPATCH, #-DISPATCH_GL(strempty) - | mvn CARG2, #~LJ_TSTR - | b ->fff_restv - | - |.macro ffstring_op, name - | .ffunc string_ .. name - | ffgccheck - | ldr CARG3, [BASE, #4] - | cmp NARGS8:RC, #8 - | ldr STR:CARG2, [BASE] - | blo ->fff_fallback - | sub SBUF:CARG1, DISPATCH, #-DISPATCH_GL(tmpbuf) - | checkstr CARG3, ->fff_fallback - | ldr CARG4, SBUF:CARG1->b - | str BASE, L->base - | str PC, SAVE_PC - | str L, SBUF:CARG1->L - | str CARG4, SBUF:CARG1->p - | bl extern lj_buf_putstr_ .. name - | bl extern lj_buf_tostr - | b ->fff_resstr - |.endmacro - | - |ffstring_op reverse - |ffstring_op lower - |ffstring_op upper - | - |//-- Bit library -------------------------------------------------------- - | - |// FP number to bit conversion for soft-float. Clobbers r0-r3. - |->vm_tobit_fb: - | bhi ->fff_fallback - |->vm_tobit: - | lsl RB, CARG2, #1 - | adds RB, RB, #0x00200000 - | movpl CARG1, #0 // |x| < 1? - | bxpl lr - | mvn CARG4, #0x3e0 - | subs RB, CARG4, RB, asr #21 - | bmi >1 // |x| >= 2^32? - | lsl CARG4, CARG2, #11 - | orr CARG4, CARG4, #0x80000000 - | orr CARG4, CARG4, CARG1, lsr #21 - | cmp CARG2, #0 - | lsr CARG1, CARG4, RB - | rsblt CARG1, CARG1, #0 - | bx lr - |1: - | add RB, RB, #21 - | lsr CARG4, CARG1, RB - | rsb RB, RB, #20 - | lsl CARG1, CARG2, #12 - | cmp CARG2, #0 - | orr CARG1, CARG4, CARG1, lsl RB - | rsblt CARG1, CARG1, #0 - | bx lr - | - |.macro .ffunc_bit, name - | .ffunc_1 bit_..name - | checktp CARG2, LJ_TISNUM - | blne ->vm_tobit_fb - |.endmacro - | - |.ffunc_bit tobit - | mvn CARG2, #~LJ_TISNUM - | b ->fff_restv - | - |.macro .ffunc_bit_op, name, ins - | .ffunc_bit name - | mov CARG3, CARG1 - | mov RA, #8 - |1: - | ldrd CARG12, [BASE, RA] - | cmp RA, NARGS8:RC - | add RA, RA, #8 - | bge >2 - | checktp CARG2, LJ_TISNUM - | blne ->vm_tobit_fb - | ins CARG3, CARG3, CARG1 - | b <1 - |.endmacro - | - |.ffunc_bit_op band, and - |.ffunc_bit_op bor, orr - |.ffunc_bit_op bxor, eor - | - |2: - | mvn CARG4, #~LJ_TISNUM - | ldr PC, [BASE, FRAME_PC] - | strd CARG34, [BASE, #-8] - | b ->fff_res1 - | - |.ffunc_bit bswap - | eor CARG3, CARG1, CARG1, ror #16 - | bic CARG3, CARG3, #0x00ff0000 - | ror CARG1, CARG1, #8 - | mvn CARG2, #~LJ_TISNUM - | eor CARG1, CARG1, CARG3, lsr #8 - | b ->fff_restv - | - |.ffunc_bit bnot - | mvn CARG1, CARG1 - | mvn CARG2, #~LJ_TISNUM - | b ->fff_restv - | - |.macro .ffunc_bit_sh, name, ins, shmod - | .ffunc bit_..name - | ldrd CARG12, [BASE, #8] - | cmp NARGS8:RC, #16 - | blo ->fff_fallback - | checktp CARG2, LJ_TISNUM - | blne ->vm_tobit_fb - |.if shmod == 0 - | and RA, CARG1, #31 - |.else - | rsb RA, CARG1, #0 - |.endif - | ldrd CARG12, [BASE] - | checktp CARG2, LJ_TISNUM - | blne ->vm_tobit_fb - | ins CARG1, CARG1, RA - | mvn CARG2, #~LJ_TISNUM - | b ->fff_restv - |.endmacro - | - |.ffunc_bit_sh lshift, lsl, 0 - |.ffunc_bit_sh rshift, lsr, 0 - |.ffunc_bit_sh arshift, asr, 0 - |.ffunc_bit_sh rol, ror, 1 - |.ffunc_bit_sh ror, ror, 0 - | - |//----------------------------------------------------------------------- - | - |->fff_fallback: // Call fast function fallback handler. - | // BASE = new base, RC = nargs*8 - | ldr CARG3, [BASE, FRAME_FUNC] - | ldr CARG2, L->maxstack - | add CARG1, BASE, NARGS8:RC - | ldr PC, [BASE, FRAME_PC] // Fallback may overwrite PC. - | str CARG1, L->top - | ldr CARG3, CFUNC:CARG3->f - | str BASE, L->base - | add CARG1, CARG1, #8*LUA_MINSTACK - | str PC, SAVE_PC // Redundant (but a defined value). - | cmp CARG1, CARG2 - | mov CARG1, L - | bhi >5 // Need to grow stack. - | blx CARG3 // (lua_State *L) - | // Either throws an error, or recovers and returns -1, 0 or nresults+1. - | ldr BASE, L->base - | cmp CRET1, #0 - | lsl RC, CRET1, #3 - | sub RA, BASE, #8 - | bgt ->fff_res // Returned nresults+1? - |1: // Returned 0 or -1: retry fast path. - | ldr CARG1, L->top - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] - | sub NARGS8:RC, CARG1, BASE - | bne ->vm_call_tail // Returned -1? - | ins_callt // Returned 0: retry fast path. - | - |// Reconstruct previous base for vmeta_call during tailcall. - |->vm_call_tail: - | ands CARG1, PC, #FRAME_TYPE - | bic CARG2, PC, #FRAME_TYPEP - | ldreq INS, [PC, #-4] - | andeq CARG2, MASKR8, INS, lsr #5 // Conditional decode_RA8. - | addeq CARG2, CARG2, #8 - | sub RB, BASE, CARG2 - | b ->vm_call_dispatch // Resolve again for tailcall. - | - |5: // Grow stack for fallback handler. - | mov CARG2, #LUA_MINSTACK - | bl extern lj_state_growstack // (lua_State *L, int n) - | ldr BASE, L->base - | cmp CARG1, CARG1 // Set zero-flag to force retry. - | b <1 - | - |->fff_gcstep: // Call GC step function. - | // BASE = new base, RC = nargs*8 - | mov RA, lr - | str BASE, L->base - | add CARG2, BASE, NARGS8:RC - | str PC, SAVE_PC // Redundant (but a defined value). - | str CARG2, L->top - | mov CARG1, L - | bl extern lj_gc_step // (lua_State *L) - | ldr BASE, L->base - | mov lr, RA // Help return address predictor. - | ldr CFUNC:CARG3, [BASE, FRAME_FUNC] - | bx lr - | - |//----------------------------------------------------------------------- - |//-- Special dispatch targets ------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_record: // Dispatch target for recording phase. - |.if JIT - | ldrb CARG1, [DISPATCH, #DISPATCH_GL(hookmask)] - | tst CARG1, #HOOK_VMEVENT // No recording while in vmevent. - | bne >5 - | // Decrement the hookcount for consistency, but always do the call. - | ldr CARG2, [DISPATCH, #DISPATCH_GL(hookcount)] - | tst CARG1, #HOOK_ACTIVE - | bne >1 - | sub CARG2, CARG2, #1 - | tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT - | strne CARG2, [DISPATCH, #DISPATCH_GL(hookcount)] - | b >1 - |.endif - | - |->vm_rethook: // Dispatch target for return hooks. - | ldrb CARG1, [DISPATCH, #DISPATCH_GL(hookmask)] - | tst CARG1, #HOOK_ACTIVE // Hook already active? - | beq >1 - |5: // Re-dispatch to static ins. - | decode_OP OP, INS - | add OP, DISPATCH, OP, lsl #2 - | ldr pc, [OP, #GG_DISP2STATIC] - | - |->vm_inshook: // Dispatch target for instr/line hooks. - | ldrb CARG1, [DISPATCH, #DISPATCH_GL(hookmask)] - | ldr CARG2, [DISPATCH, #DISPATCH_GL(hookcount)] - | tst CARG1, #HOOK_ACTIVE // Hook already active? - | bne <5 - | tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT - | beq <5 - | subs CARG2, CARG2, #1 - | str CARG2, [DISPATCH, #DISPATCH_GL(hookcount)] - | beq >1 - | tst CARG1, #LUA_MASKLINE - | beq <5 - |1: - | mov CARG1, L - | str BASE, L->base - | mov CARG2, PC - | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. - | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) - |3: - | ldr BASE, L->base - |4: // Re-dispatch to static ins. - | ldrb OP, [PC, #-4] - | ldr INS, [PC, #-4] - | add OP, DISPATCH, OP, lsl #2 - | ldr OP, [OP, #GG_DISP2STATIC] - | decode_RA8 RA, INS - | decode_RD RC, INS - | bx OP - | - |->cont_hook: // Continue from hook yield. - | ldr CARG1, [CARG4, #-24] - | add PC, PC, #4 - | str CARG1, SAVE_MULTRES // Restore MULTRES for *M ins. - | b <4 - | - |->vm_hotloop: // Hot loop counter underflow. - |.if JIT - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Same as curr_topL(L). - | sub CARG1, DISPATCH, #-GG_DISP2J - | str PC, SAVE_PC - | ldr CARG3, LFUNC:CARG3->field_pc - | mov CARG2, PC - | str L, [DISPATCH, #DISPATCH_J(L)] - | ldrb CARG3, [CARG3, #PC2PROTO(framesize)] - | str BASE, L->base - | add CARG3, BASE, CARG3, lsl #3 - | str CARG3, L->top - | bl extern lj_trace_hot // (jit_State *J, const BCIns *pc) - | b <3 - |.endif - | - |->vm_callhook: // Dispatch target for call hooks. - | mov CARG2, PC - |.if JIT - | b >1 - |.endif - | - |->vm_hotcall: // Hot call counter underflow. - |.if JIT - | orr CARG2, PC, #1 - |1: - |.endif - | add CARG4, BASE, RC - | str PC, SAVE_PC - | mov CARG1, L - | str BASE, L->base - | sub RA, RA, BASE - | str CARG4, L->top - | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc) - | // Returns ASMFunction. - | ldr BASE, L->base - | ldr CARG4, L->top - | mov CARG2, #0 - | add RA, BASE, RA - | sub NARGS8:RC, CARG4, BASE - | str CARG2, SAVE_PC // Invalidate for subsequent line hook. - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] - | ldr INS, [PC, #-4] - | bx CRET1 - | - |->cont_stitch: // Trace stitching. - |.if JIT - | // RA = resultptr, CARG4 = meta base - | ldr RB, SAVE_MULTRES - | ldr INS, [PC, #-4] - | ldr TRACE:CARG3, [CARG4, #-24] // Save previous trace. - | subs RB, RB, #8 - | decode_RA8 RC, INS // Call base. - | beq >2 - |1: // Move results down. - | ldrd CARG12, [RA] - | add RA, RA, #8 - | subs RB, RB, #8 - | strd CARG12, [BASE, RC] - | add RC, RC, #8 - | bne <1 - |2: - | decode_RA8 RA, INS - | decode_RB8 RB, INS - | add RA, RA, RB - |3: - | cmp RA, RC - | mvn CARG2, #~LJ_TNIL - | bhi >9 // More results wanted? - | - | ldrh RA, TRACE:CARG3->traceno - | ldrh RC, TRACE:CARG3->link - | cmp RC, RA - | beq ->cont_nop // Blacklisted. - | cmp RC, #0 - | bne =>BC_JLOOP // Jump to stitched trace. - | - | // Stitch a new trace to the previous trace. - | str RA, [DISPATCH, #DISPATCH_J(exitno)] - | str L, [DISPATCH, #DISPATCH_J(L)] - | str BASE, L->base - | sub CARG1, DISPATCH, #-GG_DISP2J - | mov CARG2, PC - | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) - | ldr BASE, L->base - | b ->cont_nop - | - |9: // Fill up results with nil. - | strd CARG12, [BASE, RC] - | add RC, RC, #8 - | b <3 - |.endif - | - |->vm_profhook: // Dispatch target for profiler hook. -#if LJ_HASPROFILE - | mov CARG1, L - | str BASE, L->base - | mov CARG2, PC - | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) - | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. - | ldr BASE, L->base - | sub PC, PC, #4 - | b ->cont_nop -#endif - | - |//----------------------------------------------------------------------- - |//-- Trace exit handler ------------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_exit_handler: - |.if JIT - | sub sp, sp, #12 - | push {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12} - | ldr CARG1, [sp, #64] // Load original value of lr. - | ldr DISPATCH, [lr] // Load DISPATCH. - | add CARG3, sp, #64 // Recompute original value of sp. - | mv_vmstate CARG4, EXIT - | str CARG3, [sp, #52] // Store sp in RID_SP - | st_vmstate CARG4 - | ldr CARG2, [CARG1, #-4]! // Get exit instruction. - | str CARG1, [sp, #56] // Store exit pc in RID_LR and RID_PC. - | str CARG1, [sp, #60] - |.if FPU - | vpush {d0-d15} - |.endif - | lsl CARG2, CARG2, #8 - | add CARG1, CARG1, CARG2, asr #6 - | ldr CARG2, [lr, #4] // Load exit stub group offset. - | sub CARG1, CARG1, lr - | ldr L, [DISPATCH, #DISPATCH_GL(cur_L)] - | add CARG1, CARG2, CARG1, lsr #2 // Compute exit number. - | ldr BASE, [DISPATCH, #DISPATCH_GL(jit_base)] - | str CARG1, [DISPATCH, #DISPATCH_J(exitno)] - | mov CARG4, #0 - | str BASE, L->base - | str L, [DISPATCH, #DISPATCH_J(L)] - | str CARG4, [DISPATCH, #DISPATCH_GL(jit_base)] - | sub CARG1, DISPATCH, #-GG_DISP2J - | mov CARG2, sp - | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) - | // Returns MULTRES (unscaled) or negated error code. - | ldr CARG2, L->cframe - | ldr BASE, L->base - | bic CARG2, CARG2, #~CFRAME_RAWMASK // Use two steps: bic sp is deprecated. - | mov sp, CARG2 - | ldr PC, SAVE_PC // Get SAVE_PC. - | str L, SAVE_L // Set SAVE_L (on-trace resume/yield). - | b >1 - |.endif - |->vm_exit_interp: - | // CARG1 = MULTRES or negated error code, BASE, PC and DISPATCH set. - |.if JIT - | ldr L, SAVE_L - |1: - | cmp CARG1, #0 - | blt >9 // Check for error from exit. - | lsl RC, CARG1, #3 - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | str RC, SAVE_MULTRES - | mov CARG3, #0 - | str BASE, L->base - | ldr CARG2, LFUNC:CARG2->field_pc - | str CARG3, [DISPATCH, #DISPATCH_GL(jit_base)] - | mv_vmstate CARG4, INTERP - | ldr KBASE, [CARG2, #PC2PROTO(k)] - | // Modified copy of ins_next which handles function header dispatch, too. - | ldrb OP, [PC] - | mov MASKR8, #255 - | ldr INS, [PC], #4 - | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. - | st_vmstate CARG4 - | cmp OP, #BC_FUNCC+2 // Fast function? - | bhs >4 - |2: - | cmp OP, #BC_FUNCF // Function header? - | ldr OP, [DISPATCH, OP, lsl #2] - | decode_RA8 RA, INS - | lsrlo RC, INS, #16 // No: Decode operands A*8 and D. - | subhs RC, RC, #8 - | addhs RA, RA, BASE // Yes: RA = BASE+framesize*8, RC = nargs*8 - | ldrhs CARG3, [BASE, FRAME_FUNC] - | bx OP - | - |4: // Check frame below fast function. - | ldr CARG1, [BASE, FRAME_PC] - | ands CARG2, CARG1, #FRAME_TYPE - | bne <2 // Trace stitching continuation? - | // Otherwise set KBASE for Lua function below fast function. - | ldr CARG3, [CARG1, #-4] - | decode_RA8 CARG1, CARG3 - | sub CARG2, BASE, CARG1 - | ldr LFUNC:CARG3, [CARG2, #-16] - | ldr CARG3, LFUNC:CARG3->field_pc - | ldr KBASE, [CARG3, #PC2PROTO(k)] - | b <2 - | - |9: // Rethrow error from the right C frame. - | rsb CARG2, CARG1, #0 - | mov CARG1, L - | bl extern lj_err_throw // (lua_State *L, int errcode) - |.endif - | - |//----------------------------------------------------------------------- - |//-- Math helper functions ---------------------------------------------- - |//----------------------------------------------------------------------- - | - |// FP value rounding. Called from JIT code. - |// - |// double lj_vm_floor/ceil/trunc(double x); - |.macro vm_round, func, hf - |.if hf == 1 - | vmov CARG1, CARG2, d0 - |.endif - | lsl CARG3, CARG2, #1 - | adds RB, CARG3, #0x00200000 - | bpl >2 // |x| < 1? - | mvn CARG4, #0x3cc - | subs RB, CARG4, RB, asr #21 // 2^0: RB = 51, 2^51: RB = 0. - | bxlo lr // |x| >= 2^52: done. - | mvn CARG4, #1 - | bic CARG3, CARG1, CARG4, lsl RB // ztest = lo & ~lomask - | and CARG1, CARG1, CARG4, lsl RB // lo &= lomask - | subs RB, RB, #32 - | bicpl CARG4, CARG2, CARG4, lsl RB // |x| <= 2^20: ztest |= hi & ~himask - | orrpl CARG3, CARG3, CARG4 - | mvnpl CARG4, #1 - | andpl CARG2, CARG2, CARG4, lsl RB // |x| <= 2^20: hi &= himask - |.if "func" == "floor" - | tst CARG3, CARG2, asr #31 // iszero = ((ztest & signmask) == 0) - |.else - | bics CARG3, CARG3, CARG2, asr #31 // iszero = ((ztest & ~signmask) == 0) - |.endif - |.if hf == 1 - | vmoveq d0, CARG1, CARG2 - |.endif - | bxeq lr // iszero: done. - | mvn CARG4, #1 - | cmp RB, #0 - | lslpl CARG3, CARG4, RB - | mvnmi CARG3, #0 - | add RB, RB, #32 - | subs CARG1, CARG1, CARG4, lsl RB // lo = lo-lomask - | sbc CARG2, CARG2, CARG3 // hi = hi-himask+carry - |.if hf == 1 - | vmov d0, CARG1, CARG2 - |.endif - | bx lr - | - |2: // |x| < 1: - | bxcs lr // |x| is not finite. - | orr CARG3, CARG3, CARG1 // ztest = (2*hi) | lo - |.if "func" == "floor" - | tst CARG3, CARG2, asr #31 // iszero = ((ztest & signmask) == 0) - |.else - | bics CARG3, CARG3, CARG2, asr #31 // iszero = ((ztest & ~signmask) == 0) - |.endif - | mov CARG1, #0 // lo = 0 - | and CARG2, CARG2, #0x80000000 - | ldrne CARG4, <9 // hi = sign(x) | (iszero ? 0.0 : 1.0) - | orrne CARG2, CARG2, CARG4 - |.if hf == 1 - | vmov d0, CARG1, CARG2 - |.endif - | bx lr - |.endmacro - | - |9: - | .long 0x3ff00000 // hiword(+1.0) - | - |->vm_floor: - |.if HFABI - | vm_round floor, 1 - |.endif - |->vm_floor_sf: - | vm_round floor, 0 - | - |->vm_ceil: - |.if HFABI - | vm_round ceil, 1 - |.endif - |->vm_ceil_sf: - | vm_round ceil, 0 - | - |.macro vm_trunc, hf - |.if JIT - |.if hf == 1 - | vmov CARG1, CARG2, d0 - |.endif - | lsl CARG3, CARG2, #1 - | adds RB, CARG3, #0x00200000 - | andpl CARG2, CARG2, #0x80000000 // |x| < 1? hi = sign(x), lo = 0. - | movpl CARG1, #0 - |.if hf == 1 - | vmovpl d0, CARG1, CARG2 - |.endif - | bxpl lr - | mvn CARG4, #0x3cc - | subs RB, CARG4, RB, asr #21 // 2^0: RB = 51, 2^51: RB = 0. - | bxlo lr // |x| >= 2^52: already done. - | mvn CARG4, #1 - | and CARG1, CARG1, CARG4, lsl RB // lo &= lomask - | subs RB, RB, #32 - | andpl CARG2, CARG2, CARG4, lsl RB // |x| <= 2^20: hi &= himask - |.if hf == 1 - | vmov d0, CARG1, CARG2 - |.endif - | bx lr - |.endif - |.endmacro - | - |->vm_trunc: - |.if HFABI - | vm_trunc 1 - |.endif - |->vm_trunc_sf: - | vm_trunc 0 - | - | // double lj_vm_mod(double dividend, double divisor); - |->vm_mod: - |.if FPU - | // Special calling convention. Also, RC (r11) is not preserved. - | vdiv.f64 d0, d6, d7 - | mov RC, lr - | vmov CARG1, CARG2, d0 - | bl ->vm_floor_sf - | vmov d0, CARG1, CARG2 - | vmul.f64 d0, d0, d7 - | mov lr, RC - | vsub.f64 d6, d6, d0 - | bx lr - |.else - | push {r0, r1, r2, r3, r4, lr} - | bl extern __aeabi_ddiv - | bl ->vm_floor_sf - | ldrd CARG34, [sp, #8] - | bl extern __aeabi_dmul - | ldrd CARG34, [sp] - | eor CARG2, CARG2, #0x80000000 - | bl extern __aeabi_dadd - | add sp, sp, #20 - | pop {pc} - |.endif - | - | // int lj_vm_modi(int dividend, int divisor); - |->vm_modi: - | ands RB, CARG1, #0x80000000 - | rsbmi CARG1, CARG1, #0 // a = |dividend| - | eor RB, RB, CARG2, asr #1 // Keep signdiff and sign(divisor). - | cmp CARG2, #0 - | rsbmi CARG2, CARG2, #0 // b = |divisor| - | subs CARG4, CARG2, #1 - | cmpne CARG1, CARG2 - | moveq CARG1, #0 // if (b == 1 || a == b) a = 0 - | tsthi CARG2, CARG4 - | andeq CARG1, CARG1, CARG4 // else if ((b & (b-1)) == 0) a &= b-1 - | bls >1 - | // Use repeated subtraction to get the remainder. - | clz CARG3, CARG1 - | clz CARG4, CARG2 - | sub CARG4, CARG4, CARG3 - | rsbs CARG3, CARG4, #31 // entry = (31-(clz(b)-clz(a)))*8 - | addne pc, pc, CARG3, lsl #3 // Duff's device. - | nop - { - int i; - for (i = 31; i >= 0; i--) { - | cmp CARG1, CARG2, lsl #i - | subhs CARG1, CARG1, CARG2, lsl #i - } - } - |1: - | cmp CARG1, #0 - | cmpne RB, #0 - | submi CARG1, CARG1, CARG2 // if (y != 0 && signdiff) y = y - b - | eors CARG2, CARG1, RB, lsl #1 - | rsbmi CARG1, CARG1, #0 // if (sign(divisor) != sign(y)) y = -y - | bx lr - | - |//----------------------------------------------------------------------- - |//-- Miscellaneous functions -------------------------------------------- - |//----------------------------------------------------------------------- - | - |//----------------------------------------------------------------------- - |//-- FFI helper functions ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |// Handler for callback functions. - |// Saveregs already performed. Callback slot number in [sp], g in r12. - |->vm_ffi_callback: - |.if FFI - |.type CTSTATE, CTState, PC - | ldr CTSTATE, GL:r12->ctype_state - | add DISPATCH, r12, #GG_G2DISP - |.if FPU - | str r4, SAVE_R4 - | add r4, sp, CFRAME_SPACE+4+8*8 - | vstmdb r4!, {d8-d15} - |.endif - |.if HFABI - | add r12, CTSTATE, #offsetof(CTState, cb.fpr[8]) - |.endif - | strd CARG34, CTSTATE->cb.gpr[2] - | strd CARG12, CTSTATE->cb.gpr[0] - |.if HFABI - | vstmdb r12!, {d0-d7} - |.endif - | ldr CARG4, [sp] - | add CARG3, sp, #CFRAME_SIZE - | mov CARG1, CTSTATE - | lsr CARG4, CARG4, #3 - | str CARG3, CTSTATE->cb.stack - | mov CARG2, sp - | str CARG4, CTSTATE->cb.slot - | str CTSTATE, SAVE_PC // Any value outside of bytecode is ok. - | bl extern lj_ccallback_enter // (CTState *cts, void *cf) - | // Returns lua_State *. - | ldr BASE, L:CRET1->base - | mv_vmstate CARG2, INTERP - | ldr RC, L:CRET1->top - | mov MASKR8, #255 - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] - | mov L, CRET1 - | sub RC, RC, BASE - | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. - | st_vmstate CARG2 - | ins_callt - |.endif - | - |->cont_ffi_callback: // Return from FFI callback. - |.if FFI - | ldr CTSTATE, [DISPATCH, #DISPATCH_GL(ctype_state)] - | str BASE, L->base - | str CARG4, L->top - | str L, CTSTATE->L - | mov CARG1, CTSTATE - | mov CARG2, RA - | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) - | ldrd CARG12, CTSTATE->cb.gpr[0] - |.if HFABI - | vldr d0, CTSTATE->cb.fpr[0] - |.endif - | b ->vm_leave_unw - |.endif - | - |->vm_ffi_call: // Call C function via FFI. - | // Caveat: needs special frame unwinding, see below. - |.if FFI - | .type CCSTATE, CCallState, r4 - | push {CCSTATE, r5, r11, lr} - | mov CCSTATE, CARG1 - | ldr CARG1, CCSTATE:CARG1->spadj - | ldrb CARG2, CCSTATE->nsp - | add CARG3, CCSTATE, #offsetof(CCallState, stack) - |.if HFABI - | add RB, CCSTATE, #offsetof(CCallState, fpr[0]) - |.endif - | mov r11, sp - | sub sp, sp, CARG1 // Readjust stack. - | subs CARG2, CARG2, #1 - |.if HFABI - | vldm RB, {d0-d7} - |.endif - | ldr RB, CCSTATE->func - | bmi >2 - |1: // Copy stack slots. - | ldr CARG4, [CARG3, CARG2, lsl #2] - | str CARG4, [sp, CARG2, lsl #2] - | subs CARG2, CARG2, #1 - | bpl <1 - |2: - | ldrd CARG12, CCSTATE->gpr[0] - | ldrd CARG34, CCSTATE->gpr[2] - | blx RB - | mov sp, r11 - |.if HFABI - | add r12, CCSTATE, #offsetof(CCallState, fpr[4]) - |.endif - | strd CRET1, CCSTATE->gpr[0] - |.if HFABI - | vstmdb r12!, {d0-d3} - |.endif - | pop {CCSTATE, r5, r11, pc} - |.endif - |// Note: vm_ffi_call must be the last function in this object file! - | - |//----------------------------------------------------------------------- -} - -/* Generate the code for a single instruction. */ -static void build_ins(BuildCtx *ctx, BCOp op, int defop) -{ - int vk = 0; - |=>defop: - - switch (op) { - - /* -- Comparison ops ---------------------------------------------------- */ - - /* Remember: all ops branch for a true comparison, fall through otherwise. */ - - case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: - | // RA = src1*8, RC = src2, JMP with RC = target - | lsl RC, RC, #3 - | ldrd CARG12, [RA, BASE]! - | ldrh RB, [PC, #2] - | ldrd CARG34, [RC, BASE]! - | add PC, PC, #4 - | add RB, PC, RB, lsl #2 - | checktp CARG2, LJ_TISNUM - | bne >3 - | checktp CARG4, LJ_TISNUM - | bne >4 - | cmp CARG1, CARG3 - if (op == BC_ISLT) { - | sublt PC, RB, #0x20000 - } else if (op == BC_ISGE) { - | subge PC, RB, #0x20000 - } else if (op == BC_ISLE) { - | suble PC, RB, #0x20000 - } else { - | subgt PC, RB, #0x20000 - } - |1: - | ins_next - | - |3: // CARG12 is not an integer. - |.if FPU - | vldr d0, [RA] - | bhi ->vmeta_comp - | // d0 is a number. - | checktp CARG4, LJ_TISNUM - | vldr d1, [RC] - | blo >5 - | bhi ->vmeta_comp - | // d0 is a number, CARG3 is an integer. - | vmov s4, CARG3 - | vcvt.f64.s32 d1, s4 - | b >5 - |4: // CARG1 is an integer, CARG34 is not an integer. - | vldr d1, [RC] - | bhi ->vmeta_comp - | // CARG1 is an integer, d1 is a number. - | vmov s4, CARG1 - | vcvt.f64.s32 d0, s4 - |5: // d0 and d1 are numbers. - | vcmp.f64 d0, d1 - | vmrs - | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. - if (op == BC_ISLT) { - | sublo PC, RB, #0x20000 - } else if (op == BC_ISGE) { - | subhs PC, RB, #0x20000 - } else if (op == BC_ISLE) { - | subls PC, RB, #0x20000 - } else { - | subhi PC, RB, #0x20000 - } - | b <1 - |.else - | bhi ->vmeta_comp - | // CARG12 is a number. - | checktp CARG4, LJ_TISNUM - | movlo RA, RB // Save RB. - | blo >5 - | bhi ->vmeta_comp - | // CARG12 is a number, CARG3 is an integer. - | mov CARG1, CARG3 - | mov RC, RA - | mov RA, RB // Save RB. - | bl extern __aeabi_i2d - | mov CARG3, CARG1 - | mov CARG4, CARG2 - | ldrd CARG12, [RC] // Restore first operand. - | b >5 - |4: // CARG1 is an integer, CARG34 is not an integer. - | bhi ->vmeta_comp - | // CARG1 is an integer, CARG34 is a number. - | mov RA, RB // Save RB. - | bl extern __aeabi_i2d - | ldrd CARG34, [RC] // Restore second operand. - |5: // CARG12 and CARG34 are numbers. - | bl extern __aeabi_cdcmple - | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. - if (op == BC_ISLT) { - | sublo PC, RA, #0x20000 - } else if (op == BC_ISGE) { - | subhs PC, RA, #0x20000 - } else if (op == BC_ISLE) { - | subls PC, RA, #0x20000 - } else { - | subhi PC, RA, #0x20000 - } - | b <1 - |.endif - break; - - case BC_ISEQV: case BC_ISNEV: - vk = op == BC_ISEQV; - | // RA = src1*8, RC = src2, JMP with RC = target - | lsl RC, RC, #3 - | ldrd CARG12, [RA, BASE]! - | ldrh RB, [PC, #2] - | ldrd CARG34, [RC, BASE]! - | add PC, PC, #4 - | add RB, PC, RB, lsl #2 - | checktp CARG2, LJ_TISNUM - | cmnls CARG4, #-LJ_TISNUM - if (vk) { - | bls ->BC_ISEQN_Z - } else { - | bls ->BC_ISNEN_Z - } - | // Either or both types are not numbers. - |.if FFI - | checktp CARG2, LJ_TCDATA - | checktpne CARG4, LJ_TCDATA - | beq ->vmeta_equal_cd - |.endif - | cmp CARG2, CARG4 // Compare types. - | bne >2 // Not the same type? - | checktp CARG2, LJ_TISPRI - | bhs >1 // Same type and primitive type? - | - | // Same types and not a primitive type. Compare GCobj or pvalue. - | cmp CARG1, CARG3 - if (vk) { - | bne >3 // Different GCobjs or pvalues? - |1: // Branch if same. - | sub PC, RB, #0x20000 - |2: // Different. - | ins_next - |3: - | checktp CARG2, LJ_TISTABUD - | bhi <2 // Different objects and not table/ud? - } else { - | beq >1 // Same GCobjs or pvalues? - | checktp CARG2, LJ_TISTABUD - | bhi >2 // Different objects and not table/ud? - } - | // Different tables or userdatas. Need to check __eq metamethod. - | // Field metatable must be at same offset for GCtab and GCudata! - | ldr TAB:RA, TAB:CARG1->metatable - | cmp TAB:RA, #0 - if (vk) { - | beq <2 // No metatable? - } else { - | beq >2 // No metatable? - } - | ldrb RA, TAB:RA->nomm - | mov CARG4, #1-vk // ne = 0 or 1. - | mov CARG2, CARG1 - | tst RA, #1<vmeta_equal // 'no __eq' flag not set? - if (vk) { - | b <2 - } else { - |2: // Branch if different. - | sub PC, RB, #0x20000 - |1: // Same. - | ins_next - } - break; - - case BC_ISEQS: case BC_ISNES: - vk = op == BC_ISEQS; - | // RA = src*8, RC = str_const (~), JMP with RC = target - | mvn RC, RC - | ldrd CARG12, [BASE, RA] - | ldrh RB, [PC, #2] - | ldr STR:CARG3, [KBASE, RC, lsl #2] - | add PC, PC, #4 - | add RB, PC, RB, lsl #2 - | checktp CARG2, LJ_TSTR - |.if FFI - | bne >7 - | cmp CARG1, CARG3 - |.else - | cmpeq CARG1, CARG3 - |.endif - if (vk) { - | subeq PC, RB, #0x20000 - |1: - } else { - |1: - | subne PC, RB, #0x20000 - } - | ins_next - | - |.if FFI - |7: - | checktp CARG2, LJ_TCDATA - | bne <1 - | b ->vmeta_equal_cd - |.endif - break; - - case BC_ISEQN: case BC_ISNEN: - vk = op == BC_ISEQN; - | // RA = src*8, RC = num_const (~), JMP with RC = target - | lsl RC, RC, #3 - | ldrd CARG12, [RA, BASE]! - | ldrh RB, [PC, #2] - | ldrd CARG34, [RC, KBASE]! - | add PC, PC, #4 - | add RB, PC, RB, lsl #2 - if (vk) { - |->BC_ISEQN_Z: - } else { - |->BC_ISNEN_Z: - } - | checktp CARG2, LJ_TISNUM - | bne >3 - | checktp CARG4, LJ_TISNUM - | bne >4 - | cmp CARG1, CARG3 - if (vk) { - | subeq PC, RB, #0x20000 - |1: - } else { - |1: - | subne PC, RB, #0x20000 - } - |2: - | ins_next - | - |3: // CARG12 is not an integer. - |.if FFI - | bhi >7 - |.else - if (!vk) { - | subhi PC, RB, #0x20000 - } - | bhi <2 - |.endif - |.if FPU - | checktp CARG4, LJ_TISNUM - | vmov s4, CARG3 - | vldr d0, [RA] - | vldrlo d1, [RC] - | vcvths.f64.s32 d1, s4 - | b >5 - |4: // CARG1 is an integer, d1 is a number. - | vmov s4, CARG1 - | vldr d1, [RC] - | vcvt.f64.s32 d0, s4 - |5: // d0 and d1 are numbers. - | vcmp.f64 d0, d1 - | vmrs - if (vk) { - | subeq PC, RB, #0x20000 - } else { - | subne PC, RB, #0x20000 - } - | b <2 - |.else - | // CARG12 is a number. - | checktp CARG4, LJ_TISNUM - | movlo RA, RB // Save RB. - | blo >5 - | // CARG12 is a number, CARG3 is an integer. - | mov CARG1, CARG3 - | mov RC, RA - |4: // CARG1 is an integer, CARG34 is a number. - | mov RA, RB // Save RB. - | bl extern __aeabi_i2d - | ldrd CARG34, [RC] // Restore other operand. - |5: // CARG12 and CARG34 are numbers. - | bl extern __aeabi_cdcmpeq - if (vk) { - | subeq PC, RA, #0x20000 - } else { - | subne PC, RA, #0x20000 - } - | b <2 - |.endif - | - |.if FFI - |7: - | checktp CARG2, LJ_TCDATA - | bne <1 - | b ->vmeta_equal_cd - |.endif - break; - - case BC_ISEQP: case BC_ISNEP: - vk = op == BC_ISEQP; - | // RA = src*8, RC = primitive_type (~), JMP with RC = target - | ldrd CARG12, [BASE, RA] - | ldrh RB, [PC, #2] - | add PC, PC, #4 - | mvn RC, RC - | add RB, PC, RB, lsl #2 - |.if FFI - | checktp CARG2, LJ_TCDATA - | beq ->vmeta_equal_cd - |.endif - | cmp CARG2, RC - if (vk) { - | subeq PC, RB, #0x20000 - } else { - | subne PC, RB, #0x20000 - } - | ins_next - break; - - /* -- Unary test and copy ops ------------------------------------------- */ - - case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: - | // RA = dst*8 or unused, RC = src, JMP with RC = target - | add RC, BASE, RC, lsl #3 - | ldrh RB, [PC, #2] - | ldrd CARG12, [RC] - | add PC, PC, #4 - | add RB, PC, RB, lsl #2 - | checktp CARG2, LJ_TTRUE - if (op == BC_ISTC || op == BC_IST) { - | subls PC, RB, #0x20000 - if (op == BC_ISTC) { - | strdls CARG12, [BASE, RA] - } - } else { - | subhi PC, RB, #0x20000 - if (op == BC_ISFC) { - | strdhi CARG12, [BASE, RA] - } - } - | ins_next - break; - - case BC_ISTYPE: - | // RA = src*8, RC = -type - | ldrd CARG12, [BASE, RA] - | ins_next1 - | cmn CARG2, RC - | ins_next2 - | bne ->vmeta_istype - | ins_next3 - break; - case BC_ISNUM: - | // RA = src*8, RC = -(TISNUM-1) - | ldrd CARG12, [BASE, RA] - | ins_next1 - | checktp CARG2, LJ_TISNUM - | ins_next2 - | bhs ->vmeta_istype - | ins_next3 - break; - - /* -- Unary ops --------------------------------------------------------- */ - - case BC_MOV: - | // RA = dst*8, RC = src - | lsl RC, RC, #3 - | ins_next1 - | ldrd CARG12, [BASE, RC] - | ins_next2 - | strd CARG12, [BASE, RA] - | ins_next3 - break; - case BC_NOT: - | // RA = dst*8, RC = src - | add RC, BASE, RC, lsl #3 - | ins_next1 - | ldr CARG1, [RC, #4] - | add RA, BASE, RA - | ins_next2 - | checktp CARG1, LJ_TTRUE - | mvnls CARG2, #~LJ_TFALSE - | mvnhi CARG2, #~LJ_TTRUE - | str CARG2, [RA, #4] - | ins_next3 - break; - case BC_UNM: - | // RA = dst*8, RC = src - | lsl RC, RC, #3 - | ldrd CARG12, [BASE, RC] - | ins_next1 - | ins_next2 - | checktp CARG2, LJ_TISNUM - | bhi ->vmeta_unm - | eorne CARG2, CARG2, #0x80000000 - | bne >5 - | rsbseq CARG1, CARG1, #0 - | ldrdvs CARG12, >9 - |5: - | strd CARG12, [BASE, RA] - | ins_next3 - | - |.align 8 - |9: - | .long 0x00000000, 0x41e00000 // 2^31. - break; - case BC_LEN: - | // RA = dst*8, RC = src - | lsl RC, RC, #3 - | ldrd CARG12, [BASE, RC] - | checkstr CARG2, >2 - | ldr CARG1, STR:CARG1->len - |1: - | mvn CARG2, #~LJ_TISNUM - | ins_next1 - | ins_next2 - | strd CARG12, [BASE, RA] - | ins_next3 - |2: - | checktab CARG2, ->vmeta_len -#if LJ_52 - | ldr TAB:CARG3, TAB:CARG1->metatable - | cmp TAB:CARG3, #0 - | bne >9 - |3: -#endif - |->BC_LEN_Z: - | .IOS mov RC, BASE - | bl extern lj_tab_len // (GCtab *t) - | // Returns uint32_t (but less than 2^31). - | .IOS mov BASE, RC - | b <1 -#if LJ_52 - |9: - | ldrb CARG4, TAB:CARG3->nomm - | tst CARG4, #1<vmeta_len -#endif - break; - - /* -- Binary ops -------------------------------------------------------- */ - - |.macro ins_arithcheck, cond, ncond, target - ||if (vk == 1) { - | cmn CARG4, #-LJ_TISNUM - | cmn..cond CARG2, #-LJ_TISNUM - ||} else { - | cmn CARG2, #-LJ_TISNUM - | cmn..cond CARG4, #-LJ_TISNUM - ||} - | b..ncond target - |.endmacro - |.macro ins_arithcheck_int, target - | ins_arithcheck eq, ne, target - |.endmacro - |.macro ins_arithcheck_num, target - | ins_arithcheck lo, hs, target - |.endmacro - | - |.macro ins_arithpre - | decode_RB8 RB, INS - | decode_RC8 RC, INS - | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 - ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); - ||switch (vk) { - ||case 0: - | .if FPU - | ldrd CARG12, [RB, BASE]! - | ldrd CARG34, [RC, KBASE]! - | .else - | ldrd CARG12, [BASE, RB] - | ldrd CARG34, [KBASE, RC] - | .endif - || break; - ||case 1: - | .if FPU - | ldrd CARG34, [RB, BASE]! - | ldrd CARG12, [RC, KBASE]! - | .else - | ldrd CARG34, [BASE, RB] - | ldrd CARG12, [KBASE, RC] - | .endif - || break; - ||default: - | .if FPU - | ldrd CARG12, [RB, BASE]! - | ldrd CARG34, [RC, BASE]! - | .else - | ldrd CARG12, [BASE, RB] - | ldrd CARG34, [BASE, RC] - | .endif - || break; - ||} - |.endmacro - | - |.macro ins_arithpre_fpu, reg1, reg2 - |.if FPU - ||if (vk == 1) { - | vldr reg2, [RB] - | vldr reg1, [RC] - ||} else { - | vldr reg1, [RB] - | vldr reg2, [RC] - ||} - |.endif - |.endmacro - | - |.macro ins_arithpost_fpu, reg - | ins_next1 - | add RA, BASE, RA - | ins_next2 - | vstr reg, [RA] - | ins_next3 - |.endmacro - | - |.macro ins_arithfallback, ins - ||switch (vk) { - ||case 0: - | ins ->vmeta_arith_vn - || break; - ||case 1: - | ins ->vmeta_arith_nv - || break; - ||default: - | ins ->vmeta_arith_vv - || break; - ||} - |.endmacro - | - |.macro ins_arithdn, intins, fpins, fpcall - | ins_arithpre - |.if "intins" ~= "vm_modi" and not FPU - | ins_next1 - |.endif - | ins_arithcheck_int >5 - |.if "intins" == "smull" - | smull CARG1, RC, CARG3, CARG1 - | cmp RC, CARG1, asr #31 - | ins_arithfallback bne - |.elif "intins" == "vm_modi" - | movs CARG2, CARG3 - | ins_arithfallback beq - | bl ->vm_modi - | mvn CARG2, #~LJ_TISNUM - |.else - | intins CARG1, CARG1, CARG3 - | ins_arithfallback bvs - |.endif - |4: - |.if "intins" == "vm_modi" or FPU - | ins_next1 - |.endif - | ins_next2 - | strd CARG12, [BASE, RA] - | ins_next3 - |5: // FP variant. - | ins_arithpre_fpu d6, d7 - | ins_arithfallback ins_arithcheck_num - |.if FPU - |.if "intins" == "vm_modi" - | bl fpcall - |.else - | fpins d6, d6, d7 - |.endif - | ins_arithpost_fpu d6 - |.else - | bl fpcall - |.if "intins" ~= "vm_modi" - | ins_next1 - |.endif - | b <4 - |.endif - |.endmacro - | - |.macro ins_arithfp, fpins, fpcall - | ins_arithpre - |.if "fpins" ~= "extern" or HFABI - | ins_arithpre_fpu d0, d1 - |.endif - | ins_arithfallback ins_arithcheck_num - |.if "fpins" == "extern" - | .IOS mov RC, BASE - | bl fpcall - | .IOS mov BASE, RC - |.elif FPU - | fpins d0, d0, d1 - |.else - | bl fpcall - |.endif - |.if ("fpins" ~= "extern" or HFABI) and FPU - | ins_arithpost_fpu d0 - |.else - | ins_next1 - | ins_next2 - | strd CARG12, [BASE, RA] - | ins_next3 - |.endif - |.endmacro - - case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: - | ins_arithdn adds, vadd.f64, extern __aeabi_dadd - break; - case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: - | ins_arithdn subs, vsub.f64, extern __aeabi_dsub - break; - case BC_MULVN: case BC_MULNV: case BC_MULVV: - | ins_arithdn smull, vmul.f64, extern __aeabi_dmul - break; - case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: - | ins_arithfp vdiv.f64, extern __aeabi_ddiv - break; - case BC_MODVN: case BC_MODNV: case BC_MODVV: - | ins_arithdn vm_modi, vm_mod, ->vm_mod - break; - case BC_POW: - | // NYI: (partial) integer arithmetic. - | ins_arithfp extern, extern pow - break; - - case BC_CAT: - | decode_RB8 RC, INS - | decode_RC8 RB, INS - | // RA = dst*8, RC = src_start*8, RB = src_end*8 (note: RB/RC swapped!) - | sub CARG3, RB, RC - | str BASE, L->base - | add CARG2, BASE, RB - |->BC_CAT_Z: - | // RA = dst*8, RC = src_start*8, CARG2 = top-1 - | mov CARG1, L - | str PC, SAVE_PC - | lsr CARG3, CARG3, #3 - | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left) - | // Returns NULL (finished) or TValue * (metamethod). - | ldr BASE, L->base - | cmp CRET1, #0 - | bne ->vmeta_binop - | ldrd CARG34, [BASE, RC] - | ins_next1 - | ins_next2 - | strd CARG34, [BASE, RA] // Copy result to RA. - | ins_next3 - break; - - /* -- Constant ops ------------------------------------------------------ */ - - case BC_KSTR: - | // RA = dst*8, RC = str_const (~) - | mvn RC, RC - | ins_next1 - | ldr CARG1, [KBASE, RC, lsl #2] - | mvn CARG2, #~LJ_TSTR - | ins_next2 - | strd CARG12, [BASE, RA] - | ins_next3 - break; - case BC_KCDATA: - |.if FFI - | // RA = dst*8, RC = cdata_const (~) - | mvn RC, RC - | ins_next1 - | ldr CARG1, [KBASE, RC, lsl #2] - | mvn CARG2, #~LJ_TCDATA - | ins_next2 - | strd CARG12, [BASE, RA] - | ins_next3 - |.endif - break; - case BC_KSHORT: - | // RA = dst*8, (RC = int16_literal) - | mov CARG1, INS, asr #16 // Refetch sign-extended reg. - | mvn CARG2, #~LJ_TISNUM - | ins_next1 - | ins_next2 - | strd CARG12, [BASE, RA] - | ins_next3 - break; - case BC_KNUM: - | // RA = dst*8, RC = num_const - | lsl RC, RC, #3 - | ins_next1 - | ldrd CARG12, [KBASE, RC] - | ins_next2 - | strd CARG12, [BASE, RA] - | ins_next3 - break; - case BC_KPRI: - | // RA = dst*8, RC = primitive_type (~) - | add RA, BASE, RA - | mvn RC, RC - | ins_next1 - | ins_next2 - | str RC, [RA, #4] - | ins_next3 - break; - case BC_KNIL: - | // RA = base*8, RC = end - | add RA, BASE, RA - | add RC, BASE, RC, lsl #3 - | mvn CARG1, #~LJ_TNIL - | str CARG1, [RA, #4] - | add RA, RA, #8 - |1: - | str CARG1, [RA, #4] - | cmp RA, RC - | add RA, RA, #8 - | blt <1 - | ins_next_ - break; - - /* -- Upvalue and function ops ------------------------------------------ */ - - case BC_UGET: - | // RA = dst*8, RC = uvnum - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | lsl RC, RC, #2 - | add RC, RC, #offsetof(GCfuncL, uvptr) - | ldr UPVAL:CARG2, [LFUNC:CARG2, RC] - | ldr CARG2, UPVAL:CARG2->v - | ldrd CARG34, [CARG2] - | ins_next1 - | ins_next2 - | strd CARG34, [BASE, RA] - | ins_next3 - break; - case BC_USETV: - | // RA = uvnum*8, RC = src - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | lsr RA, RA, #1 - | add RA, RA, #offsetof(GCfuncL, uvptr) - | lsl RC, RC, #3 - | ldr UPVAL:CARG2, [LFUNC:CARG2, RA] - | ldrd CARG34, [BASE, RC] - | ldrb RB, UPVAL:CARG2->marked - | ldrb RC, UPVAL:CARG2->closed - | ldr CARG2, UPVAL:CARG2->v - | tst RB, #LJ_GC_BLACK // isblack(uv) - | add RB, CARG4, #-LJ_TISGCV - | cmpne RC, #0 - | strd CARG34, [CARG2] - | bne >2 // Upvalue is closed and black? - |1: - | ins_next - | - |2: // Check if new value is collectable. - | cmn RB, #-(LJ_TNUMX - LJ_TISGCV) - | ldrbhi RC, GCOBJ:CARG3->gch.marked - | bls <1 // tvisgcv(v) - | sub CARG1, DISPATCH, #-GG_DISP2G - | tst RC, #LJ_GC_WHITES - | // Crossed a write barrier. Move the barrier forward. - |.if IOS - | beq <1 - | mov RC, BASE - | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) - | mov BASE, RC - |.else - | blne extern lj_gc_barrieruv // (global_State *g, TValue *tv) - |.endif - | b <1 - break; - case BC_USETS: - | // RA = uvnum*8, RC = str_const (~) - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | lsr RA, RA, #1 - | add RA, RA, #offsetof(GCfuncL, uvptr) - | mvn RC, RC - | ldr UPVAL:CARG2, [LFUNC:CARG2, RA] - | ldr STR:CARG3, [KBASE, RC, lsl #2] - | ldrb RB, UPVAL:CARG2->marked - | ldrb RC, UPVAL:CARG2->closed - | ldr CARG2, UPVAL:CARG2->v - | mvn CARG4, #~LJ_TSTR - | tst RB, #LJ_GC_BLACK // isblack(uv) - | ldrb RB, STR:CARG3->marked - | strd CARG34, [CARG2] - | bne >2 - |1: - | ins_next - | - |2: // Check if string is white and ensure upvalue is closed. - | tst RB, #LJ_GC_WHITES // iswhite(str) - | cmpne RC, #0 - | sub CARG1, DISPATCH, #-GG_DISP2G - | // Crossed a write barrier. Move the barrier forward. - |.if IOS - | beq <1 - | mov RC, BASE - | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) - | mov BASE, RC - |.else - | blne extern lj_gc_barrieruv // (global_State *g, TValue *tv) - |.endif - | b <1 - break; - case BC_USETN: - | // RA = uvnum*8, RC = num_const - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | lsr RA, RA, #1 - | add RA, RA, #offsetof(GCfuncL, uvptr) - | lsl RC, RC, #3 - | ldr UPVAL:CARG2, [LFUNC:CARG2, RA] - | ldrd CARG34, [KBASE, RC] - | ldr CARG2, UPVAL:CARG2->v - | ins_next1 - | ins_next2 - | strd CARG34, [CARG2] - | ins_next3 - break; - case BC_USETP: - | // RA = uvnum*8, RC = primitive_type (~) - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | lsr RA, RA, #1 - | add RA, RA, #offsetof(GCfuncL, uvptr) - | ldr UPVAL:CARG2, [LFUNC:CARG2, RA] - | mvn RC, RC - | ldr CARG2, UPVAL:CARG2->v - | ins_next1 - | ins_next2 - | str RC, [CARG2, #4] - | ins_next3 - break; - - case BC_UCLO: - | // RA = level*8, RC = target - | ldr CARG3, L->openupval - | add RC, PC, RC, lsl #2 - | str BASE, L->base - | cmp CARG3, #0 - | sub PC, RC, #0x20000 - | beq >1 - | mov CARG1, L - | add CARG2, BASE, RA - | bl extern lj_func_closeuv // (lua_State *L, TValue *level) - | ldr BASE, L->base - |1: - | ins_next - break; - - case BC_FNEW: - | // RA = dst*8, RC = proto_const (~) (holding function prototype) - | mvn RC, RC - | str BASE, L->base - | ldr CARG2, [KBASE, RC, lsl #2] - | str PC, SAVE_PC - | ldr CARG3, [BASE, FRAME_FUNC] - | mov CARG1, L - | // (lua_State *L, GCproto *pt, GCfuncL *parent) - | bl extern lj_func_newL_gc - | // Returns GCfuncL *. - | ldr BASE, L->base - | mvn CARG2, #~LJ_TFUNC - | ins_next1 - | ins_next2 - | strd CARG12, [BASE, RA] - | ins_next3 - break; - - /* -- Table ops --------------------------------------------------------- */ - - case BC_TNEW: - case BC_TDUP: - | // RA = dst*8, RC = (hbits|asize) | tab_const (~) - if (op == BC_TDUP) { - | mvn RC, RC - } - | ldr CARG3, [DISPATCH, #DISPATCH_GL(gc.total)] - | ldr CARG4, [DISPATCH, #DISPATCH_GL(gc.threshold)] - | str BASE, L->base - | str PC, SAVE_PC - | cmp CARG3, CARG4 - | mov CARG1, L - | bhs >5 - |1: - if (op == BC_TNEW) { - | lsl CARG2, RC, #21 - | lsr CARG3, RC, #11 - | asr RC, CARG2, #21 - | lsr CARG2, CARG2, #21 - | cmn RC, #1 - | addeq CARG2, CARG2, #2 - | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) - | // Returns GCtab *. - } else { - | ldr CARG2, [KBASE, RC, lsl #2] - | bl extern lj_tab_dup // (lua_State *L, Table *kt) - | // Returns GCtab *. - } - | ldr BASE, L->base - | mvn CARG2, #~LJ_TTAB - | ins_next1 - | ins_next2 - | strd CARG12, [BASE, RA] - | ins_next3 - |5: - | bl extern lj_gc_step_fixtop // (lua_State *L) - | mov CARG1, L - | b <1 - break; - - case BC_GGET: - | // RA = dst*8, RC = str_const (~) - case BC_GSET: - | // RA = dst*8, RC = str_const (~) - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | mvn RC, RC - | ldr TAB:CARG1, LFUNC:CARG2->env - | ldr STR:RC, [KBASE, RC, lsl #2] - if (op == BC_GGET) { - | b ->BC_TGETS_Z - } else { - | b ->BC_TSETS_Z - } - break; - - case BC_TGETV: - | decode_RB8 RB, INS - | decode_RC8 RC, INS - | // RA = dst*8, RB = table*8, RC = key*8 - | ldrd TAB:CARG12, [BASE, RB] - | ldrd CARG34, [BASE, RC] - | checktab CARG2, ->vmeta_tgetv // STALL: load CARG12. - | checktp CARG4, LJ_TISNUM // Integer key? - | ldreq CARG4, TAB:CARG1->array - | ldreq CARG2, TAB:CARG1->asize - | bne >9 - | - | add CARG4, CARG4, CARG3, lsl #3 - | cmp CARG3, CARG2 // In array part? - | ldrdlo CARG34, [CARG4] - | bhs ->vmeta_tgetv - | ins_next1 // Overwrites RB! - | checktp CARG4, LJ_TNIL - | beq >5 - |1: - | ins_next2 - | strd CARG34, [BASE, RA] - | ins_next3 - | - |5: // Check for __index if table value is nil. - | ldr TAB:CARG2, TAB:CARG1->metatable - | cmp TAB:CARG2, #0 - | beq <1 // No metatable: done. - | ldrb CARG2, TAB:CARG2->nomm - | tst CARG2, #1<vmeta_tgetv - | - |9: - | checktp CARG4, LJ_TSTR // String key? - | moveq STR:RC, CARG3 - | beq ->BC_TGETS_Z - | b ->vmeta_tgetv - break; - case BC_TGETS: - | decode_RB8 RB, INS - | and RC, RC, #255 - | // RA = dst*8, RB = table*8, RC = str_const (~) - | ldrd CARG12, [BASE, RB] - | mvn RC, RC - | ldr STR:RC, [KBASE, RC, lsl #2] // STALL: early RC. - | checktab CARG2, ->vmeta_tgets1 - |->BC_TGETS_Z: - | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8 - | ldr CARG3, TAB:CARG1->hmask - | ldr CARG4, STR:RC->hash - | ldr NODE:INS, TAB:CARG1->node - | mov TAB:RB, TAB:CARG1 - | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask - | add CARG3, CARG3, CARG3, lsl #1 - | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 - |1: - | ldrd CARG12, NODE:INS->key // STALL: early NODE:INS. - | ldrd CARG34, NODE:INS->val - | ldr NODE:INS, NODE:INS->next - | checktp CARG2, LJ_TSTR - | cmpeq CARG1, STR:RC - | bne >4 - | checktp CARG4, LJ_TNIL - | beq >5 - |3: - | ins_next1 - | ins_next2 - | strd CARG34, [BASE, RA] - | ins_next3 - | - |4: // Follow hash chain. - | cmp NODE:INS, #0 - | bne <1 - | // End of hash chain: key not found, nil result. - | - |5: // Check for __index if table value is nil. - | ldr TAB:CARG1, TAB:RB->metatable - | mov CARG3, #0 // Optional clear of undef. value (during load stall). - | mvn CARG4, #~LJ_TNIL - | cmp TAB:CARG1, #0 - | beq <3 // No metatable: done. - | ldrb CARG2, TAB:CARG1->nomm - | tst CARG2, #1<vmeta_tgets - break; - case BC_TGETB: - | decode_RB8 RB, INS - | and RC, RC, #255 - | // RA = dst*8, RB = table*8, RC = index - | ldrd CARG12, [BASE, RB] - | checktab CARG2, ->vmeta_tgetb // STALL: load CARG12. - | ldr CARG3, TAB:CARG1->asize - | ldr CARG4, TAB:CARG1->array - | lsl CARG2, RC, #3 - | cmp RC, CARG3 - | ldrdlo CARG34, [CARG4, CARG2] - | bhs ->vmeta_tgetb - | ins_next1 // Overwrites RB! - | checktp CARG4, LJ_TNIL - | beq >5 - |1: - | ins_next2 - | strd CARG34, [BASE, RA] - | ins_next3 - | - |5: // Check for __index if table value is nil. - | ldr TAB:CARG2, TAB:CARG1->metatable - | cmp TAB:CARG2, #0 - | beq <1 // No metatable: done. - | ldrb CARG2, TAB:CARG2->nomm - | tst CARG2, #1<vmeta_tgetb - break; - case BC_TGETR: - | decode_RB8 RB, INS - | decode_RC8 RC, INS - | // RA = dst*8, RB = table*8, RC = key*8 - | ldr TAB:CARG1, [BASE, RB] - | ldr CARG2, [BASE, RC] - | ldr CARG4, TAB:CARG1->array - | ldr CARG3, TAB:CARG1->asize - | add CARG4, CARG4, CARG2, lsl #3 - | cmp CARG2, CARG3 // In array part? - | bhs ->vmeta_tgetr - | ldrd CARG12, [CARG4] - |->BC_TGETR_Z: - | ins_next1 - | ins_next2 - | strd CARG12, [BASE, RA] - | ins_next3 - break; - - case BC_TSETV: - | decode_RB8 RB, INS - | decode_RC8 RC, INS - | // RA = src*8, RB = table*8, RC = key*8 - | ldrd TAB:CARG12, [BASE, RB] - | ldrd CARG34, [BASE, RC] - | checktab CARG2, ->vmeta_tsetv // STALL: load CARG12. - | checktp CARG4, LJ_TISNUM // Integer key? - | ldreq CARG2, TAB:CARG1->array - | ldreq CARG4, TAB:CARG1->asize - | bne >9 - | - | add CARG2, CARG2, CARG3, lsl #3 - | cmp CARG3, CARG4 // In array part? - | ldrlo INS, [CARG2, #4] - | bhs ->vmeta_tsetv - | ins_next1 // Overwrites RB! - | checktp INS, LJ_TNIL - | ldrb INS, TAB:CARG1->marked - | ldrd CARG34, [BASE, RA] - | beq >5 - |1: - | tst INS, #LJ_GC_BLACK // isblack(table) - | strd CARG34, [CARG2] - | bne >7 - |2: - | ins_next2 - | ins_next3 - | - |5: // Check for __newindex if previous value is nil. - | ldr TAB:RA, TAB:CARG1->metatable - | cmp TAB:RA, #0 - | beq <1 // No metatable: done. - | ldrb RA, TAB:RA->nomm - | tst RA, #1<vmeta_tsetv - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:CARG1, INS, CARG3 - | b <2 - | - |9: - | checktp CARG4, LJ_TSTR // String key? - | moveq STR:RC, CARG3 - | beq ->BC_TSETS_Z - | b ->vmeta_tsetv - break; - case BC_TSETS: - | decode_RB8 RB, INS - | and RC, RC, #255 - | // RA = src*8, RB = table*8, RC = str_const (~) - | ldrd CARG12, [BASE, RB] - | mvn RC, RC - | ldr STR:RC, [KBASE, RC, lsl #2] // STALL: early RC. - | checktab CARG2, ->vmeta_tsets1 - |->BC_TSETS_Z: - | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8 - | ldr CARG3, TAB:CARG1->hmask - | ldr CARG4, STR:RC->hash - | ldr NODE:INS, TAB:CARG1->node - | mov TAB:RB, TAB:CARG1 - | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask - | add CARG3, CARG3, CARG3, lsl #1 - | mov CARG4, #0 - | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 - | strb CARG4, TAB:RB->nomm // Clear metamethod cache. - |1: - | ldrd CARG12, NODE:INS->key - | ldr CARG4, NODE:INS->val.it - | ldr NODE:CARG3, NODE:INS->next - | checktp CARG2, LJ_TSTR - | cmpeq CARG1, STR:RC - | bne >5 - | ldrb CARG2, TAB:RB->marked - | checktp CARG4, LJ_TNIL // Key found, but nil value? - | ldrd CARG34, [BASE, RA] - | beq >4 - |2: - | tst CARG2, #LJ_GC_BLACK // isblack(table) - | strd CARG34, NODE:INS->val - | bne >7 - |3: - | ins_next - | - |4: // Check for __newindex if previous value is nil. - | ldr TAB:CARG1, TAB:RB->metatable - | cmp TAB:CARG1, #0 - | beq <2 // No metatable: done. - | ldrb CARG1, TAB:CARG1->nomm - | tst CARG1, #1<vmeta_tsets - | - |5: // Follow hash chain. - | movs NODE:INS, NODE:CARG3 - | bne <1 - | // End of hash chain: key not found, add a new one. - | - | // But check for __newindex first. - | ldr TAB:CARG1, TAB:RB->metatable - | mov CARG3, TMPDp - | str PC, SAVE_PC - | cmp TAB:CARG1, #0 // No metatable: continue. - | str BASE, L->base - | ldrbne CARG2, TAB:CARG1->nomm - | mov CARG1, L - | beq >6 - | tst CARG2, #1<vmeta_tsets // 'no __newindex' flag NOT set: check. - |6: - | mvn CARG4, #~LJ_TSTR - | str STR:RC, TMPDlo - | mov CARG2, TAB:RB - | str CARG4, TMPDhi - | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) - | // Returns TValue *. - | ldr BASE, L->base - | ldrd CARG34, [BASE, RA] - | strd CARG34, [CRET1] - | b <3 // No 2nd write barrier needed. - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, CARG2, CARG3 - | b <3 - break; - case BC_TSETB: - | decode_RB8 RB, INS - | and RC, RC, #255 - | // RA = src*8, RB = table*8, RC = index - | ldrd CARG12, [BASE, RB] - | checktab CARG2, ->vmeta_tsetb // STALL: load CARG12. - | ldr CARG3, TAB:CARG1->asize - | ldr RB, TAB:CARG1->array - | lsl CARG2, RC, #3 - | cmp RC, CARG3 - | ldrdlo CARG34, [CARG2, RB]! - | bhs ->vmeta_tsetb - | ins_next1 // Overwrites RB! - | checktp CARG4, LJ_TNIL - | ldrb INS, TAB:CARG1->marked - | ldrd CARG34, [BASE, RA] - | beq >5 - |1: - | tst INS, #LJ_GC_BLACK // isblack(table) - | strd CARG34, [CARG2] - | bne >7 - |2: - | ins_next2 - | ins_next3 - | - |5: // Check for __newindex if previous value is nil. - | ldr TAB:RA, TAB:CARG1->metatable - | cmp TAB:RA, #0 - | beq <1 // No metatable: done. - | ldrb RA, TAB:RA->nomm - | tst RA, #1<vmeta_tsetb - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:CARG1, INS, CARG3 - | b <2 - break; - case BC_TSETR: - | decode_RB8 RB, INS - | decode_RC8 RC, INS - | // RA = src*8, RB = table*8, RC = key*8 - | ldr TAB:CARG2, [BASE, RB] - | ldr CARG3, [BASE, RC] - | ldrb INS, TAB:CARG2->marked - | ldr CARG1, TAB:CARG2->array - | ldr CARG4, TAB:CARG2->asize - | tst INS, #LJ_GC_BLACK // isblack(table) - | add CARG1, CARG1, CARG3, lsl #3 - | bne >7 - |2: - | cmp CARG3, CARG4 // In array part? - | bhs ->vmeta_tsetr - |->BC_TSETR_Z: - | ldrd CARG34, [BASE, RA] - | ins_next1 - | ins_next2 - | strd CARG34, [CARG1] - | ins_next3 - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:CARG2, INS, RB - | b <2 - break; - - case BC_TSETM: - | // RA = base*8 (table at base-1), RC = num_const (start index) - | add RA, BASE, RA - |1: - | ldr RB, SAVE_MULTRES - | ldr TAB:CARG2, [RA, #-8] // Guaranteed to be a table. - | ldr CARG1, [KBASE, RC, lsl #3] // Integer constant is in lo-word. - | subs RB, RB, #8 - | ldr CARG4, TAB:CARG2->asize - | beq >4 // Nothing to copy? - | add CARG3, CARG1, RB, lsr #3 - | cmp CARG3, CARG4 - | ldr CARG4, TAB:CARG2->array - | add RB, RA, RB - | bhi >5 - | add INS, CARG4, CARG1, lsl #3 - | ldrb CARG1, TAB:CARG2->marked - |3: // Copy result slots to table. - | ldrd CARG34, [RA], #8 - | strd CARG34, [INS], #8 - | cmp RA, RB - | blo <3 - | tst CARG1, #LJ_GC_BLACK // isblack(table) - | bne >7 - |4: - | ins_next - | - |5: // Need to resize array part. - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) - | // Must not reallocate the stack. - | .IOS ldr BASE, L->base - | b <1 - | - |7: // Possible table write barrier for any value. Skip valiswhite check. - | barrierback TAB:CARG2, CARG1, CARG3 - | b <4 - break; - - /* -- Calls and vararg handling ----------------------------------------- */ - - case BC_CALLM: - | // RA = base*8, (RB = nresults+1,) RC = extra_nargs - | ldr CARG1, SAVE_MULTRES - | decode_RC8 NARGS8:RC, INS - | add NARGS8:RC, NARGS8:RC, CARG1 - | b ->BC_CALL_Z - break; - case BC_CALL: - | decode_RC8 NARGS8:RC, INS - | // RA = base*8, (RB = nresults+1,) RC = (nargs+1)*8 - |->BC_CALL_Z: - | mov RB, BASE // Save old BASE for vmeta_call. - | ldrd CARG34, [BASE, RA]! - | sub NARGS8:RC, NARGS8:RC, #8 - | add BASE, BASE, #8 - | checkfunc CARG4, ->vmeta_call - | ins_call - break; - - case BC_CALLMT: - | // RA = base*8, (RB = 0,) RC = extra_nargs - | ldr CARG1, SAVE_MULTRES - | add NARGS8:RC, CARG1, RC, lsl #3 - | b ->BC_CALLT1_Z - break; - case BC_CALLT: - | lsl NARGS8:RC, RC, #3 - | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 - |->BC_CALLT1_Z: - | ldrd LFUNC:CARG34, [RA, BASE]! - | sub NARGS8:RC, NARGS8:RC, #8 - | add RA, RA, #8 - | checkfunc CARG4, ->vmeta_callt - | ldr PC, [BASE, FRAME_PC] - |->BC_CALLT2_Z: - | mov RB, #0 - | ldrb CARG4, LFUNC:CARG3->ffid - | tst PC, #FRAME_TYPE - | bne >7 - |1: - | str LFUNC:CARG3, [BASE, FRAME_FUNC] // Copy function down, but keep PC. - | cmp NARGS8:RC, #0 - | beq >3 - |2: - | ldrd CARG12, [RA, RB] - | add INS, RB, #8 - | cmp INS, NARGS8:RC - | strd CARG12, [BASE, RB] - | mov RB, INS - | bne <2 - |3: - | cmp CARG4, #1 // (> FF_C) Calling a fast function? - | bhi >5 - |4: - | ins_callt - | - |5: // Tailcall to a fast function with a Lua frame below. - | ldr INS, [PC, #-4] - | decode_RA8 RA, INS - | sub CARG1, BASE, RA - | ldr LFUNC:CARG1, [CARG1, #-16] - | ldr CARG1, LFUNC:CARG1->field_pc - | ldr KBASE, [CARG1, #PC2PROTO(k)] - | b <4 - | - |7: // Tailcall from a vararg function. - | eor PC, PC, #FRAME_VARG - | tst PC, #FRAME_TYPEP // Vararg frame below? - | movne CARG4, #0 // Clear ffid if no Lua function below. - | bne <1 - | sub BASE, BASE, PC - | ldr PC, [BASE, FRAME_PC] - | tst PC, #FRAME_TYPE - | movne CARG4, #0 // Clear ffid if no Lua function below. - | b <1 - break; - - case BC_ITERC: - | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1)) - | add RA, BASE, RA - | mov RB, BASE // Save old BASE for vmeta_call. - | ldrd CARG34, [RA, #-16] - | ldrd CARG12, [RA, #-8] - | add BASE, RA, #8 - | strd CARG34, [RA, #8] // Copy state. - | strd CARG12, [RA, #16] // Copy control var. - | // STALL: locked CARG34. - | ldrd LFUNC:CARG34, [RA, #-24] - | mov NARGS8:RC, #16 // Iterators get 2 arguments. - | // STALL: load CARG34. - | strd LFUNC:CARG34, [RA] // Copy callable. - | checkfunc CARG4, ->vmeta_call - | ins_call - break; - - case BC_ITERN: - | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1)) - |.if JIT - | // NYI: add hotloop, record BC_ITERN. - |.endif - | add RA, BASE, RA - | ldr TAB:RB, [RA, #-16] - | ldr CARG1, [RA, #-8] // Get index from control var. - | ldr INS, TAB:RB->asize - | ldr CARG2, TAB:RB->array - | add PC, PC, #4 - |1: // Traverse array part. - | subs RC, CARG1, INS - | add CARG3, CARG2, CARG1, lsl #3 - | bhs >5 // Index points after array part? - | ldrd CARG34, [CARG3] - | checktp CARG4, LJ_TNIL - | addeq CARG1, CARG1, #1 // Skip holes in array part. - | beq <1 - | ldrh RC, [PC, #-2] - | mvn CARG2, #~LJ_TISNUM - | strd CARG34, [RA, #8] - | add RC, PC, RC, lsl #2 - | add RB, CARG1, #1 - | strd CARG12, [RA] - | sub PC, RC, #0x20000 - | str RB, [RA, #-8] // Update control var. - |3: - | ins_next - | - |5: // Traverse hash part. - | ldr CARG4, TAB:RB->hmask - | ldr NODE:RB, TAB:RB->node - |6: - | add CARG1, RC, RC, lsl #1 - | cmp RC, CARG4 // End of iteration? Branch to ITERL+1. - | add NODE:CARG3, NODE:RB, CARG1, lsl #3 // node = tab->node + idx*3*8 - | bhi <3 - | ldrd CARG12, NODE:CARG3->val - | checktp CARG2, LJ_TNIL - | add RC, RC, #1 - | beq <6 // Skip holes in hash part. - | ldrh RB, [PC, #-2] - | add RC, RC, INS - | ldrd CARG34, NODE:CARG3->key - | str RC, [RA, #-8] // Update control var. - | strd CARG12, [RA, #8] - | add RC, PC, RB, lsl #2 - | sub PC, RC, #0x20000 - | strd CARG34, [RA] - | b <3 - break; - - case BC_ISNEXT: - | // RA = base*8, RC = target (points to ITERN) - | add RA, BASE, RA - | add RC, PC, RC, lsl #2 - | ldrd CFUNC:CARG12, [RA, #-24] - | ldr CARG3, [RA, #-12] - | ldr CARG4, [RA, #-4] - | checktp CARG2, LJ_TFUNC - | ldrbeq CARG1, CFUNC:CARG1->ffid - | checktpeq CARG3, LJ_TTAB - | checktpeq CARG4, LJ_TNIL - | cmpeq CARG1, #FF_next_N - | subeq PC, RC, #0x20000 - | bne >5 - | ins_next1 - | ins_next2 - | mov CARG1, #0 - | mvn CARG2, #0x00018000 - | strd CARG1, [RA, #-8] // Initialize control var. - |1: - | ins_next3 - |5: // Despecialize bytecode if any of the checks fail. - | mov CARG1, #BC_JMP - | mov OP, #BC_ITERC - | strb CARG1, [PC, #-4] - | sub PC, RC, #0x20000 - | strb OP, [PC] // Subsumes ins_next1. - | ins_next2 - | b <1 - break; - - case BC_VARG: - | decode_RB8 RB, INS - | decode_RC8 RC, INS - | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 - | ldr CARG1, [BASE, FRAME_PC] - | add RC, BASE, RC - | add RA, BASE, RA - | add RC, RC, #FRAME_VARG - | add CARG4, RA, RB - | sub CARG3, BASE, #8 // CARG3 = vtop - | sub RC, RC, CARG1 // RC = vbase - | // Note: RC may now be even _above_ BASE if nargs was < numparams. - | cmp RB, #0 - | sub CARG1, CARG3, RC - | beq >5 // Copy all varargs? - | sub CARG4, CARG4, #16 - |1: // Copy vararg slots to destination slots. - | cmp RC, CARG3 - | ldrdlo CARG12, [RC], #8 - | mvnhs CARG2, #~LJ_TNIL - | cmp RA, CARG4 - | strd CARG12, [RA], #8 - | blo <1 - |2: - | ins_next - | - |5: // Copy all varargs. - | ldr CARG4, L->maxstack - | cmp CARG1, #0 - | movle RB, #8 // MULTRES = (0+1)*8 - | addgt RB, CARG1, #8 - | add CARG2, RA, CARG1 - | str RB, SAVE_MULTRES - | ble <2 - | cmp CARG2, CARG4 - | bhi >7 - |6: - | ldrd CARG12, [RC], #8 - | strd CARG12, [RA], #8 - | cmp RC, CARG3 - | blo <6 - | b <2 - | - |7: // Grow stack for varargs. - | lsr CARG2, CARG1, #3 - | str RA, L->top - | mov CARG1, L - | str BASE, L->base - | sub RC, RC, BASE // Need delta, because BASE may change. - | str PC, SAVE_PC - | sub RA, RA, BASE - | bl extern lj_state_growstack // (lua_State *L, int n) - | ldr BASE, L->base - | add RA, BASE, RA - | add RC, BASE, RC - | sub CARG3, BASE, #8 - | b <6 - break; - - /* -- Returns ----------------------------------------------------------- */ - - case BC_RETM: - | // RA = results*8, RC = extra results - | ldr CARG1, SAVE_MULTRES - | ldr PC, [BASE, FRAME_PC] - | add RA, BASE, RA - | add RC, CARG1, RC, lsl #3 - | b ->BC_RETM_Z - break; - - case BC_RET: - | // RA = results*8, RC = nresults+1 - | ldr PC, [BASE, FRAME_PC] - | lsl RC, RC, #3 - | add RA, BASE, RA - |->BC_RETM_Z: - | str RC, SAVE_MULTRES - |1: - | ands CARG1, PC, #FRAME_TYPE - | eor CARG2, PC, #FRAME_VARG - | bne ->BC_RETV2_Z - | - |->BC_RET_Z: - | // BASE = base, RA = resultptr, RC = (nresults+1)*8, PC = return - | ldr INS, [PC, #-4] - | subs CARG4, RC, #8 - | sub CARG3, BASE, #8 - | beq >3 - |2: - | ldrd CARG12, [RA], #8 - | add BASE, BASE, #8 - | subs CARG4, CARG4, #8 - | strd CARG12, [BASE, #-16] - | bne <2 - |3: - | decode_RA8 RA, INS - | sub CARG4, CARG3, RA - | decode_RB8 RB, INS - | ldr LFUNC:CARG1, [CARG4, FRAME_FUNC] - |5: - | cmp RB, RC // More results expected? - | bhi >6 - | mov BASE, CARG4 - | ldr CARG2, LFUNC:CARG1->field_pc - | ins_next1 - | ins_next2 - | ldr KBASE, [CARG2, #PC2PROTO(k)] - | ins_next3 - | - |6: // Fill up results with nil. - | mvn CARG2, #~LJ_TNIL - | add BASE, BASE, #8 - | add RC, RC, #8 - | str CARG2, [BASE, #-12] - | b <5 - | - |->BC_RETV1_Z: // Non-standard return case. - | add RA, BASE, RA - |->BC_RETV2_Z: - | tst CARG2, #FRAME_TYPEP - | bne ->vm_return - | // Return from vararg function: relocate BASE down. - | sub BASE, BASE, CARG2 - | ldr PC, [BASE, FRAME_PC] - | b <1 - break; - - case BC_RET0: case BC_RET1: - | // RA = results*8, RC = nresults+1 - | ldr PC, [BASE, FRAME_PC] - | lsl RC, RC, #3 - | str RC, SAVE_MULTRES - | ands CARG1, PC, #FRAME_TYPE - | eor CARG2, PC, #FRAME_VARG - | ldreq INS, [PC, #-4] - | bne ->BC_RETV1_Z - if (op == BC_RET1) { - | ldrd CARG12, [BASE, RA] - } - | sub CARG4, BASE, #8 - | decode_RA8 RA, INS - if (op == BC_RET1) { - | strd CARG12, [CARG4] - } - | sub BASE, CARG4, RA - | decode_RB8 RB, INS - | ldr LFUNC:CARG1, [BASE, FRAME_FUNC] - |5: - | cmp RB, RC - | bhi >6 - | ldr CARG2, LFUNC:CARG1->field_pc - | ins_next1 - | ins_next2 - | ldr KBASE, [CARG2, #PC2PROTO(k)] - | ins_next3 - | - |6: // Fill up results with nil. - | sub CARG2, CARG4, #4 - | mvn CARG3, #~LJ_TNIL - | str CARG3, [CARG2, RC] - | add RC, RC, #8 - | b <5 - break; - - /* -- Loops and branches ------------------------------------------------ */ - - |.define FOR_IDX, [RA]; .define FOR_TIDX, [RA, #4] - |.define FOR_STOP, [RA, #8]; .define FOR_TSTOP, [RA, #12] - |.define FOR_STEP, [RA, #16]; .define FOR_TSTEP, [RA, #20] - |.define FOR_EXT, [RA, #24]; .define FOR_TEXT, [RA, #28] - - case BC_FORL: - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_IFORL follows. - break; - - case BC_JFORI: - case BC_JFORL: -#if !LJ_HASJIT - break; -#endif - case BC_FORI: - case BC_IFORL: - | // RA = base*8, RC = target (after end of loop or start of loop) - vk = (op == BC_IFORL || op == BC_JFORL); - | ldrd CARG12, [RA, BASE]! - if (op != BC_JFORL) { - | add RC, PC, RC, lsl #2 - } - if (!vk) { - | ldrd CARG34, FOR_STOP - | checktp CARG2, LJ_TISNUM - | ldr RB, FOR_TSTEP - | bne >5 - | checktp CARG4, LJ_TISNUM - | ldr CARG4, FOR_STEP - | checktpeq RB, LJ_TISNUM - | bne ->vmeta_for - | cmp CARG4, #0 - | blt >4 - | cmp CARG1, CARG3 - } else { - | ldrd CARG34, FOR_STEP - | checktp CARG2, LJ_TISNUM - | bne >5 - | adds CARG1, CARG1, CARG3 - | ldr CARG4, FOR_STOP - if (op == BC_IFORL) { - | addvs RC, PC, #0x20000 // Overflow: prevent branch. - } else { - | bvs >2 // Overflow: do not enter mcode. - } - | cmp CARG3, #0 - | blt >4 - | cmp CARG1, CARG4 - } - |1: - if (op == BC_FORI) { - | subgt PC, RC, #0x20000 - } else if (op == BC_JFORI) { - | sub PC, RC, #0x20000 - | ldrhle RC, [PC, #-2] - } else if (op == BC_IFORL) { - | suble PC, RC, #0x20000 - } - if (vk) { - | strd CARG12, FOR_IDX - } - |2: - | ins_next1 - | ins_next2 - | strd CARG12, FOR_EXT - if (op == BC_JFORI || op == BC_JFORL) { - | ble =>BC_JLOOP - } - |3: - | ins_next3 - | - |4: // Invert check for negative step. - if (!vk) { - | cmp CARG3, CARG1 - } else { - | cmp CARG4, CARG1 - } - | b <1 - | - |5: // FP loop. - if (!vk) { - | cmnlo CARG4, #-LJ_TISNUM - | cmnlo RB, #-LJ_TISNUM - | bhs ->vmeta_for - |.if FPU - | vldr d0, FOR_IDX - | vldr d1, FOR_STOP - | cmp RB, #0 - | vstr d0, FOR_EXT - |.else - | cmp RB, #0 - | strd CARG12, FOR_EXT - | blt >8 - |.endif - } else { - |.if FPU - | vldr d0, FOR_IDX - | vldr d2, FOR_STEP - | vldr d1, FOR_STOP - | cmp CARG4, #0 - | vadd.f64 d0, d0, d2 - |.else - | cmp CARG4, #0 - | blt >8 - | bl extern __aeabi_dadd - | strd CARG12, FOR_IDX - | ldrd CARG34, FOR_STOP - | strd CARG12, FOR_EXT - |.endif - } - |6: - |.if FPU - | vcmpge.f64 d0, d1 - | vcmplt.f64 d1, d0 - | vmrs - |.else - | bl extern __aeabi_cdcmple - |.endif - if (vk) { - |.if FPU - | vstr d0, FOR_IDX - | vstr d0, FOR_EXT - |.endif - } - if (op == BC_FORI) { - | subhi PC, RC, #0x20000 - } else if (op == BC_JFORI) { - | sub PC, RC, #0x20000 - | ldrhls RC, [PC, #-2] - | bls =>BC_JLOOP - } else if (op == BC_IFORL) { - | subls PC, RC, #0x20000 - } else { - | bls =>BC_JLOOP - } - | ins_next1 - | ins_next2 - | b <3 - | - |.if not FPU - |8: // Invert check for negative step. - if (vk) { - | bl extern __aeabi_dadd - | strd CARG12, FOR_IDX - | strd CARG12, FOR_EXT - } - | mov CARG3, CARG1 - | mov CARG4, CARG2 - | ldrd CARG12, FOR_STOP - | b <6 - |.endif - break; - - case BC_ITERL: - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_IITERL follows. - break; - - case BC_JITERL: -#if !LJ_HASJIT - break; -#endif - case BC_IITERL: - | // RA = base*8, RC = target - | ldrd CARG12, [RA, BASE]! - if (op == BC_JITERL) { - | cmn CARG2, #-LJ_TNIL // Stop if iterator returned nil. - | strdne CARG12, [RA, #-8] - | bne =>BC_JLOOP - } else { - | add RC, PC, RC, lsl #2 - | // STALL: load CARG12. - | cmn CARG2, #-LJ_TNIL // Stop if iterator returned nil. - | subne PC, RC, #0x20000 // Otherwise save control var + branch. - | strdne CARG12, [RA, #-8] - } - | ins_next - break; - - case BC_LOOP: - | // RA = base*8, RC = target (loop extent) - | // Note: RA/RC is only used by trace recorder to determine scope/extent - | // This opcode does NOT jump, it's only purpose is to detect a hot loop. - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_ILOOP follows. - break; - - case BC_ILOOP: - | // RA = base*8, RC = target (loop extent) - | ins_next - break; - - case BC_JLOOP: - |.if JIT - | // RA = base (ignored), RC = traceno - | ldr CARG1, [DISPATCH, #DISPATCH_J(trace)] - | mov CARG2, #0 // Traces on ARM don't store the trace number, so use 0. - | ldr TRACE:RC, [CARG1, RC, lsl #2] - | st_vmstate CARG2 - | ldr RA, TRACE:RC->mcode - | str BASE, [DISPATCH, #DISPATCH_GL(jit_base)] - | str L, [DISPATCH, #DISPATCH_GL(tmpbuf.L)] - | bx RA - |.endif - break; - - case BC_JMP: - | // RA = base*8 (only used by trace recorder), RC = target - | add RC, PC, RC, lsl #2 - | sub PC, RC, #0x20000 - | ins_next - break; - - /* -- Function headers -------------------------------------------------- */ - - case BC_FUNCF: - |.if JIT - | hotcall - |.endif - case BC_FUNCV: /* NYI: compiled vararg functions. */ - | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. - break; - - case BC_JFUNCF: -#if !LJ_HASJIT - break; -#endif - case BC_IFUNCF: - | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8 - | ldr CARG1, L->maxstack - | ldrb CARG2, [PC, #-4+PC2PROTO(numparams)] - | ldr KBASE, [PC, #-4+PC2PROTO(k)] - | cmp RA, CARG1 - | bhi ->vm_growstack_l - if (op != BC_JFUNCF) { - | ins_next1 - | ins_next2 - } - |2: - | cmp NARGS8:RC, CARG2, lsl #3 // Check for missing parameters. - | mvn CARG4, #~LJ_TNIL - | blo >3 - if (op == BC_JFUNCF) { - | decode_RD RC, INS - | b =>BC_JLOOP - } else { - | ins_next3 - } - | - |3: // Clear missing parameters. - | strd CARG34, [BASE, NARGS8:RC] - | add NARGS8:RC, NARGS8:RC, #8 - | b <2 - break; - - case BC_JFUNCV: -#if !LJ_HASJIT - break; -#endif - | NYI // NYI: compiled vararg functions - break; /* NYI: compiled vararg functions. */ - - case BC_IFUNCV: - | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8 - | ldr CARG1, L->maxstack - | add CARG4, BASE, RC - | add RA, RA, RC - | str LFUNC:CARG3, [CARG4] // Store copy of LFUNC. - | add CARG2, RC, #8+FRAME_VARG - | ldr KBASE, [PC, #-4+PC2PROTO(k)] - | cmp RA, CARG1 - | str CARG2, [CARG4, #4] // Store delta + FRAME_VARG. - | bhs ->vm_growstack_l - | ldrb RB, [PC, #-4+PC2PROTO(numparams)] - | mov RA, BASE - | mov RC, CARG4 - | cmp RB, #0 - | add BASE, CARG4, #8 - | beq >3 - | mvn CARG3, #~LJ_TNIL - |1: - | cmp RA, RC // Less args than parameters? - | ldrdlo CARG12, [RA], #8 - | movhs CARG2, CARG3 - | strlo CARG3, [RA, #-4] // Clear old fixarg slot (help the GC). - |2: - | subs RB, RB, #1 - | strd CARG12, [CARG4, #8]! - | bne <1 - |3: - | ins_next - break; - - case BC_FUNCC: - case BC_FUNCCW: - | // BASE = new base, RA = BASE+framesize*8, CARG3 = CFUNC, RC = nargs*8 - if (op == BC_FUNCC) { - | ldr CARG4, CFUNC:CARG3->f - } else { - | ldr CARG4, [DISPATCH, #DISPATCH_GL(wrapf)] - } - | add CARG2, RA, NARGS8:RC - | ldr CARG1, L->maxstack - | add RC, BASE, NARGS8:RC - | str BASE, L->base - | cmp CARG2, CARG1 - | str RC, L->top - if (op == BC_FUNCCW) { - | ldr CARG2, CFUNC:CARG3->f - } - | mv_vmstate CARG3, C - | mov CARG1, L - | bhi ->vm_growstack_c // Need to grow stack. - | st_vmstate CARG3 - | blx CARG4 // (lua_State *L [, lua_CFunction f]) - | // Returns nresults. - | ldr BASE, L->base - | mv_vmstate CARG3, INTERP - | ldr CRET2, L->top - | str L, [DISPATCH, #DISPATCH_GL(cur_L)] - | lsl RC, CRET1, #3 - | st_vmstate CARG3 - | ldr PC, [BASE, FRAME_PC] - | sub RA, CRET2, RC // RA = L->top - nresults*8 - | b ->vm_returnc - break; - - /* ---------------------------------------------------------------------- */ - - default: - fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); - exit(2); - break; - } -} - -static int build_backend(BuildCtx *ctx) -{ - int op; - - dasm_growpc(Dst, BC__MAX); - - build_subroutines(ctx); - - |.code_op - for (op = 0; op < BC__MAX; op++) - build_ins(ctx, (BCOp)op, op); - - return BC__MAX; -} - -/* Emit pseudo frame-info for all assembler functions. */ -static void emit_asm_debug(BuildCtx *ctx) -{ - int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); - int i; - switch (ctx->mode) { - case BUILD_elfasm: - fprintf(ctx->fp, "\t.section .debug_frame,\"\",%%progbits\n"); - fprintf(ctx->fp, - ".Lframe0:\n" - "\t.long .LECIE0-.LSCIE0\n" - ".LSCIE0:\n" - "\t.long 0xffffffff\n" - "\t.byte 0x1\n" - "\t.string \"\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -4\n" - "\t.byte 0xe\n" /* Return address is in lr. */ - "\t.byte 0xc\n\t.uleb128 0xd\n\t.uleb128 0\n" /* def_cfa sp */ - "\t.align 2\n" - ".LECIE0:\n\n"); - fprintf(ctx->fp, - ".LSFDE0:\n" - "\t.long .LEFDE0-.LASFDE0\n" - ".LASFDE0:\n" - "\t.long .Lframe0\n" - "\t.long .Lbegin\n" - "\t.long %d\n" - "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ - "\t.byte 0x8e\n\t.uleb128 1\n", /* offset lr */ - fcofs, CFRAME_SIZE); - for (i = 11; i >= (LJ_ARCH_HASFPU ? 5 : 4); i--) /* offset r4-r11 */ - fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2+(11-i)); -#if LJ_ARCH_HASFPU - for (i = 15; i >= 8; i--) /* offset d8-d15 */ - fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 %d, %d\n", - 64+2*i, 10+2*(15-i)); - fprintf(ctx->fp, "\t.byte 0x84\n\t.uleb128 %d\n", 25); /* offset r4 */ -#endif - fprintf(ctx->fp, - "\t.align 2\n" - ".LEFDE0:\n\n"); -#if LJ_HASFFI - fprintf(ctx->fp, - ".LSFDE1:\n" - "\t.long .LEFDE1-.LASFDE1\n" - ".LASFDE1:\n" - "\t.long .Lframe0\n" - "\t.long lj_vm_ffi_call\n" - "\t.long %d\n" - "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */ - "\t.byte 0x8e\n\t.uleb128 1\n" /* offset lr */ - "\t.byte 0x8b\n\t.uleb128 2\n" /* offset r11 */ - "\t.byte 0x85\n\t.uleb128 3\n" /* offset r5 */ - "\t.byte 0x84\n\t.uleb128 4\n" /* offset r4 */ - "\t.byte 0xd\n\t.uleb128 0xb\n" /* def_cfa_register r11 */ - "\t.align 2\n" - ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); -#endif - break; - default: - break; - } -} - diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc deleted file mode 100644 index bb2496ab18..0000000000 --- a/src/vm_arm64.dasc +++ /dev/null @@ -1,3964 +0,0 @@ -|// Low-level VM code for ARM64 CPUs. -|// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -| -|.arch arm64 -|.section code_op, code_sub -| -|.actionlist build_actionlist -|.globals GLOB_ -|.globalnames globnames -|.externnames extnames -| -|// Note: The ragged indentation of the instructions is intentional. -|// The starting columns indicate data dependencies. -| -|//----------------------------------------------------------------------- -| -|// ARM64 registers and the AAPCS64 ABI 1.0 at a glance: -|// -|// x0-x17 temp, x19-x28 callee-saved, x29 fp, x30 lr -|// x18 is reserved on most platforms. Don't use it, save it or restore it. -|// x31 doesn't exist. Register number 31 either means xzr/wzr (zero) or sp, -|// depending on the instruction. -|// v0-v7 temp, v8-v15 callee-saved (only d8-d15 preserved), v16-v31 temp -|// -|// x0-x7/v0-v7 hold parameters and results. -| -|// Fixed register assignments for the interpreter. -| -|// The following must be C callee-save. -|.define BASE, x19 // Base of current Lua stack frame. -|.define KBASE, x20 // Constants of current Lua function. -|.define PC, x21 // Next PC. -|.define GLREG, x22 // Global state. -|.define LREG, x23 // Register holding lua_State (also in SAVE_L). -|.define TISNUM, x24 // Constant LJ_TISNUM << 47. -|.define TISNUMhi, x25 // Constant LJ_TISNUM << 15. -|.define TISNIL, x26 // Constant -1LL. -|.define fp, x29 // Yes, we have to maintain a frame pointer. -| -|.define ST_INTERP, w26 // Constant -1. -| -|// The following temporaries are not saved across C calls, except for RA/RC. -|.define RA, x27 -|.define RC, x28 -|.define RB, x17 -|.define RAw, w27 -|.define RCw, w28 -|.define RBw, w17 -|.define INS, x16 -|.define INSw, w16 -|.define ITYPE, x15 -|.define TMP0, x8 -|.define TMP1, x9 -|.define TMP2, x10 -|.define TMP3, x11 -|.define TMP0w, w8 -|.define TMP1w, w9 -|.define TMP2w, w10 -|.define TMP3w, w11 -| -|// Calling conventions. Also used as temporaries. -|.define CARG1, x0 -|.define CARG2, x1 -|.define CARG3, x2 -|.define CARG4, x3 -|.define CARG5, x4 -|.define CARG1w, w0 -|.define CARG2w, w1 -|.define CARG3w, w2 -|.define CARG4w, w3 -|.define CARG5w, w4 -| -|.define FARG1, d0 -|.define FARG2, d1 -| -|.define CRET1, x0 -|.define CRET1w, w0 -| -|// Stack layout while in interpreter. Must match with lj_frame.h. -| -|.define CFRAME_SPACE, 208 -|//----- 16 byte aligned, <-- sp entering interpreter -|// Unused [sp, #204] // 32 bit values -|.define SAVE_NRES, [sp, #200] -|.define SAVE_ERRF, [sp, #196] -|.define SAVE_MULTRES, [sp, #192] -|.define TMPD, [sp, #184] // 64 bit values -|.define SAVE_L, [sp, #176] -|.define SAVE_PC, [sp, #168] -|.define SAVE_CFRAME, [sp, #160] -|.define SAVE_FPR_, 96 // 96+8*8: 64 bit FPR saves -|.define SAVE_GPR_, 16 // 16+10*8: 64 bit GPR saves -|.define SAVE_LR, [sp, #8] -|.define SAVE_FP, [sp] -|//----- 16 byte aligned, <-- sp while in interpreter. -| -|.define TMPDofs, #184 -| -|.macro save_, gpr1, gpr2, fpr1, fpr2 -| stp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8] -| stp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8] -|.endmacro -|.macro rest_, gpr1, gpr2, fpr1, fpr2 -| ldp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8] -| ldp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8] -|.endmacro -| -|.macro saveregs -| stp fp, lr, [sp, #-CFRAME_SPACE]! -| add fp, sp, #0 -| stp x19, x20, [sp, # SAVE_GPR_] -| save_ 21, 22, 8, 9 -| save_ 23, 24, 10, 11 -| save_ 25, 26, 12, 13 -| save_ 27, 28, 14, 15 -|.endmacro -|.macro restoreregs -| ldp x19, x20, [sp, # SAVE_GPR_] -| rest_ 21, 22, 8, 9 -| rest_ 23, 24, 10, 11 -| rest_ 25, 26, 12, 13 -| rest_ 27, 28, 14, 15 -| ldp fp, lr, [sp], # CFRAME_SPACE -|.endmacro -| -|// Type definitions. Some of these are only used for documentation. -|.type L, lua_State, LREG -|.type GL, global_State, GLREG -|.type TVALUE, TValue -|.type GCOBJ, GCobj -|.type STR, GCstr -|.type TAB, GCtab -|.type LFUNC, GCfuncL -|.type CFUNC, GCfuncC -|.type PROTO, GCproto -|.type UPVAL, GCupval -|.type NODE, Node -|.type NARGS8, int -|.type TRACE, GCtrace -|.type SBUF, SBuf -| -|//----------------------------------------------------------------------- -| -|// Trap for not-yet-implemented parts. -|.macro NYI; brk; .endmacro -| -|//----------------------------------------------------------------------- -| -|// Access to frame relative to BASE. -|.define FRAME_FUNC, #-16 -|.define FRAME_PC, #-8 -| -|.macro decode_RA, dst, ins; ubfx dst, ins, #8, #8; .endmacro -|.macro decode_RB, dst, ins; ubfx dst, ins, #24, #8; .endmacro -|.macro decode_RC, dst, ins; ubfx dst, ins, #16, #8; .endmacro -|.macro decode_RD, dst, ins; ubfx dst, ins, #16, #16; .endmacro -|.macro decode_RC8RD, dst, src; ubfiz dst, src, #3, #8; .endmacro -| -|// Instruction decode+dispatch. -|.macro ins_NEXT -| ldr INSw, [PC], #4 -| add TMP1, GL, INS, uxtb #3 -| decode_RA RA, INS -| ldr TMP0, [TMP1, #GG_G2DISP] -| decode_RD RC, INS -| br TMP0 -|.endmacro -| -|// Instruction footer. -|.if 1 -| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. -| .define ins_next, ins_NEXT -| .define ins_next_, ins_NEXT -|.else -| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. -| // Affects only certain kinds of benchmarks (and only with -j off). -| .macro ins_next -| b ->ins_next -| .endmacro -| .macro ins_next_ -| ->ins_next: -| ins_NEXT -| .endmacro -|.endif -| -|// Call decode and dispatch. -|.macro ins_callt -| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC -| ldr PC, LFUNC:CARG3->pc -| ldr INSw, [PC], #4 -| add TMP1, GL, INS, uxtb #3 -| decode_RA RA, INS -| ldr TMP0, [TMP1, #GG_G2DISP] -| add RA, BASE, RA, lsl #3 -| br TMP0 -|.endmacro -| -|.macro ins_call -| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, PC = caller PC -| str PC, [BASE, FRAME_PC] -| ins_callt -|.endmacro -| -|//----------------------------------------------------------------------- -| -|// Macros to check the TValue type and extract the GCobj. Branch on failure. -|.macro checktp, reg, tp, target -| asr ITYPE, reg, #47 -| cmn ITYPE, #-tp -| and reg, reg, #LJ_GCVMASK -| bne target -|.endmacro -|.macro checktp, dst, reg, tp, target -| asr ITYPE, reg, #47 -| cmn ITYPE, #-tp -| and dst, reg, #LJ_GCVMASK -| bne target -|.endmacro -|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro -|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro -|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro -|.macro checkint, reg, target -| cmp TISNUMhi, reg, lsr #32 -| bne target -|.endmacro -|.macro checknum, reg, target -| cmp TISNUMhi, reg, lsr #32 -| bls target -|.endmacro -|.macro checknumber, reg, target -| cmp TISNUMhi, reg, lsr #32 -| blo target -|.endmacro -| -|.macro mov_false, reg; movn reg, #0x8000, lsl #32; .endmacro -|.macro mov_true, reg; movn reg, #0x0001, lsl #48; .endmacro -| -#define GL_J(field) (GG_G2J + (int)offsetof(jit_State, field)) -| -#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) -| -|.macro hotcheck, delta -| lsr CARG1, PC, #1 -| and CARG1, CARG1, #126 -| add CARG1, CARG1, #GG_G2DISP+GG_DISP2HOT -| ldrh CARG2w, [GL, CARG1] -| subs CARG2, CARG2, #delta -| strh CARG2w, [GL, CARG1] -|.endmacro -| -|.macro hotloop -| hotcheck HOTCOUNT_LOOP -| blo ->vm_hotloop -|.endmacro -| -|.macro hotcall -| hotcheck HOTCOUNT_CALL -| blo ->vm_hotcall -|.endmacro -| -|// Set current VM state. -|.macro mv_vmstate, reg, st; movn reg, #LJ_VMST_..st; .endmacro -|.macro st_vmstate, reg; str reg, GL->vmstate; .endmacro -| -|// Move table write barrier back. Overwrites mark and tmp. -|.macro barrierback, tab, mark, tmp -| ldr tmp, GL->gc.grayagain -| and mark, mark, #~LJ_GC_BLACK // black2gray(tab) -| str tab, GL->gc.grayagain -| strb mark, tab->marked -| str tmp, tab->gclist -|.endmacro -| -|//----------------------------------------------------------------------- - -#if !LJ_DUALNUM -#error "Only dual-number mode supported for ARM64 target" -#endif - -/* Generate subroutines used by opcodes and other parts of the VM. */ -/* The .code_sub section should be last to help static branch prediction. */ -static void build_subroutines(BuildCtx *ctx) -{ - |.code_sub - | - |//----------------------------------------------------------------------- - |//-- Return handling ---------------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_returnp: - | // See vm_return. Also: RB = previous base. - | tbz PC, #2, ->cont_dispatch // (PC & FRAME_P) == 0? - | - | // Return from pcall or xpcall fast func. - | ldr PC, [RB, FRAME_PC] // Fetch PC of previous frame. - | mov_true TMP0 - | mov BASE, RB - | // Prepending may overwrite the pcall frame, so do it at the end. - | str TMP0, [RA, #-8]! // Prepend true to results. - | - |->vm_returnc: - | adds RC, RC, #8 // RC = (nresults+1)*8. - | mov CRET1, #LUA_YIELD - | beq ->vm_unwind_c_eh - | str RCw, SAVE_MULTRES - | ands CARG1, PC, #FRAME_TYPE - | beq ->BC_RET_Z // Handle regular return to Lua. - | - |->vm_return: - | // BASE = base, RA = resultptr, RC/MULTRES = (nresults+1)*8, PC = return - | // CARG1 = PC & FRAME_TYPE - | and RB, PC, #~FRAME_TYPEP - | cmp CARG1, #FRAME_C - | sub RB, BASE, RB // RB = previous base. - | bne ->vm_returnp - | - | str RB, L->base - | ldrsw CARG2, SAVE_NRES // CARG2 = nresults+1. - | mv_vmstate TMP0w, C - | sub BASE, BASE, #16 - | subs TMP2, RC, #8 - | st_vmstate TMP0w - | beq >2 - |1: - | subs TMP2, TMP2, #8 - | ldr TMP0, [RA], #8 - | str TMP0, [BASE], #8 - | bne <1 - |2: - | cmp RC, CARG2, lsl #3 // More/less results wanted? - | bne >6 - |3: - | str BASE, L->top // Store new top. - | - |->vm_leave_cp: - | ldr RC, SAVE_CFRAME // Restore previous C frame. - | mov CRET1, #0 // Ok return status for vm_pcall. - | str RC, L->cframe - | - |->vm_leave_unw: - | restoreregs - | ret - | - |6: - | bgt >7 // Less results wanted? - | // More results wanted. Check stack size and fill up results with nil. - | ldr CARG3, L->maxstack - | cmp BASE, CARG3 - | bhs >8 - | str TISNIL, [BASE], #8 - | add RC, RC, #8 - | b <2 - | - |7: // Less results wanted. - | cbz CARG2, <3 // LUA_MULTRET+1 case? - | sub CARG1, RC, CARG2, lsl #3 - | sub BASE, BASE, CARG1 // Shrink top. - | b <3 - | - |8: // Corner case: need to grow stack for filling up results. - | // This can happen if: - | // - A C function grows the stack (a lot). - | // - The GC shrinks the stack in between. - | // - A return back from a lua_call() with (high) nresults adjustment. - | str BASE, L->top // Save current top held in BASE (yes). - | mov CARG1, L - | bl extern lj_state_growstack // (lua_State *L, int n) - | ldr BASE, L->top // Need the (realloced) L->top in BASE. - | ldrsw CARG2, SAVE_NRES - | b <2 - | - |->vm_unwind_c: // Unwind C stack, return from vm_pcall. - | // (void *cframe, int errcode) - | mov sp, CARG1 - | mov CRET1, CARG2 - |->vm_unwind_c_eh: // Landing pad for external unwinder. - | ldr L, SAVE_L - | mv_vmstate TMP0w, C - | ldr GL, L->glref - | st_vmstate TMP0w - | b ->vm_leave_unw - | - |->vm_unwind_ff: // Unwind C stack, return from ff pcall. - | // (void *cframe) - | and sp, CARG1, #CFRAME_RAWMASK - |->vm_unwind_ff_eh: // Landing pad for external unwinder. - | ldr L, SAVE_L - | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 - | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 - | movn TISNIL, #0 - | mov RC, #16 // 2 results: false + error message. - | ldr BASE, L->base - | ldr GL, L->glref // Setup pointer to global state. - | mov_false TMP0 - | sub RA, BASE, #8 // Results start at BASE-8. - | ldr PC, [BASE, FRAME_PC] // Fetch PC of previous frame. - | str TMP0, [BASE, #-8] // Prepend false to error message. - | st_vmstate ST_INTERP - | b ->vm_returnc - | - |//----------------------------------------------------------------------- - |//-- Grow stack for calls ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_growstack_c: // Grow stack for C function. - | // CARG1 = L - | mov CARG2, #LUA_MINSTACK - | b >2 - | - |->vm_growstack_l: // Grow stack for Lua function. - | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC - | add RC, BASE, RC - | sub RA, RA, BASE - | mov CARG1, L - | stp BASE, RC, L->base - | add PC, PC, #4 // Must point after first instruction. - | lsr CARG2, RA, #3 - |2: - | // L->base = new base, L->top = top - | str PC, SAVE_PC - | bl extern lj_state_growstack // (lua_State *L, int n) - | ldp BASE, RC, L->base - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] - | sub NARGS8:RC, RC, BASE - | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC - | ins_callt // Just retry the call. - | - |//----------------------------------------------------------------------- - |//-- Entry points into the assembler VM --------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_resume: // Setup C frame and resume thread. - | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) - | saveregs - | mov L, CARG1 - | ldr GL, L->glref // Setup pointer to global state. - | mov BASE, CARG2 - | str L, SAVE_L - | mov PC, #FRAME_CP - | str wzr, SAVE_NRES - | add TMP0, sp, #CFRAME_RESUME - | ldrb TMP1w, L->status - | str wzr, SAVE_ERRF - | str L, SAVE_PC // Any value outside of bytecode is ok. - | str xzr, SAVE_CFRAME - | str TMP0, L->cframe - | cbz TMP1w, >3 - | - | // Resume after yield (like a return). - | str L, GL->cur_L - | mov RA, BASE - | ldp BASE, CARG1, L->base - | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 - | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 - | ldr PC, [BASE, FRAME_PC] - | strb wzr, L->status - | movn TISNIL, #0 - | sub RC, CARG1, BASE - | ands CARG1, PC, #FRAME_TYPE - | add RC, RC, #8 - | st_vmstate ST_INTERP - | str RCw, SAVE_MULTRES - | beq ->BC_RET_Z - | b ->vm_return - | - |->vm_pcall: // Setup protected C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) - | saveregs - | mov PC, #FRAME_CP - | str CARG4w, SAVE_ERRF - | b >1 - | - |->vm_call: // Setup C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1) - | saveregs - | mov PC, #FRAME_C - | - |1: // Entry point for vm_pcall above (PC = ftype). - | ldr RC, L:CARG1->cframe - | str CARG3w, SAVE_NRES - | mov L, CARG1 - | str CARG1, SAVE_L - | ldr GL, L->glref // Setup pointer to global state. - | mov BASE, CARG2 - | str CARG1, SAVE_PC // Any value outside of bytecode is ok. - | str RC, SAVE_CFRAME - | str fp, L->cframe // Add our C frame to cframe chain. - | - |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). - | str L, GL->cur_L - | ldp RB, CARG1, L->base // RB = old base (for vmeta_call). - | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 - | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 - | add PC, PC, BASE - | movn TISNIL, #0 - | sub PC, PC, RB // PC = frame delta + frame type - | sub NARGS8:RC, CARG1, BASE - | st_vmstate ST_INTERP - | - |->vm_call_dispatch: - | // RB = old base, BASE = new base, RC = nargs*8, PC = caller PC - | ldr CARG3, [BASE, FRAME_FUNC] - | checkfunc CARG3, ->vmeta_call - | - |->vm_call_dispatch_f: - | ins_call - | // BASE = new base, CARG3 = func, RC = nargs*8, PC = caller PC - | - |->vm_cpcall: // Setup protected C frame, call C. - | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) - | saveregs - | mov L, CARG1 - | ldr RA, L:CARG1->stack - | str CARG1, SAVE_L - | ldr GL, L->glref // Setup pointer to global state. - | ldr RB, L->top - | str CARG1, SAVE_PC // Any value outside of bytecode is ok. - | ldr RC, L->cframe - | sub RA, RA, RB // Compute -savestack(L, L->top). - | str RAw, SAVE_NRES // Neg. delta means cframe w/o frame. - | str wzr, SAVE_ERRF // No error function. - | str RC, SAVE_CFRAME - | str fp, L->cframe // Add our C frame to cframe chain. - | str L, GL->cur_L - | blr CARG4 // (lua_State *L, lua_CFunction func, void *ud) - | mov BASE, CRET1 - | mov PC, #FRAME_CP - | cbnz BASE, <3 // Else continue with the call. - | b ->vm_leave_cp // No base? Just remove C frame. - | - |//----------------------------------------------------------------------- - |//-- Metamethod handling ------------------------------------------------ - |//----------------------------------------------------------------------- - | - |//-- Continuation dispatch ---------------------------------------------- - | - |->cont_dispatch: - | // BASE = meta base, RA = resultptr, RC = (nresults+1)*8 - | ldr LFUNC:CARG3, [RB, FRAME_FUNC] - | ldr CARG1, [BASE, #-32] // Get continuation. - | mov CARG4, BASE - | mov BASE, RB // Restore caller BASE. - | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - |.if FFI - | cmp CARG1, #1 - |.endif - | ldr PC, [CARG4, #-24] // Restore PC from [cont|PC]. - | ldr CARG3, LFUNC:CARG3->pc - | add TMP0, RA, RC - | str TISNIL, [TMP0, #-8] // Ensure one valid arg. - |.if FFI - | bls >1 - |.endif - | ldr KBASE, [CARG3, #PC2PROTO(k)] - | // BASE = base, RA = resultptr, CARG4 = meta base - | br CARG1 - | - |.if FFI - |1: - | beq ->cont_ffi_callback // cont = 1: return from FFI callback. - | // cont = 0: tailcall from C function. - | sub CARG4, CARG4, #32 - | sub RC, CARG4, BASE - | b ->vm_call_tail - |.endif - | - |->cont_cat: // RA = resultptr, CARG4 = meta base - | ldr INSw, [PC, #-4] - | sub CARG2, CARG4, #32 - | ldr TMP0, [RA] - | str BASE, L->base - | decode_RB RB, INS - | decode_RA RA, INS - | add TMP1, BASE, RB, lsl #3 - | subs TMP1, CARG2, TMP1 - | beq >1 - | str TMP0, [CARG2] - | lsr CARG3, TMP1, #3 - | b ->BC_CAT_Z - | - |1: - | str TMP0, [BASE, RA, lsl #3] - | b ->cont_nop - | - |//-- Table indexing metamethods ----------------------------------------- - | - |->vmeta_tgets1: - | movn CARG4, #~LJ_TSTR - | add CARG2, BASE, RB, lsl #3 - | add CARG4, STR:RC, CARG4, lsl #47 - | b >2 - | - |->vmeta_tgets: - | movk CARG2, #(LJ_TTAB>>1)&0xffff, lsl #48 - | str CARG2, GL->tmptv - | add CARG2, GL, #offsetof(global_State, tmptv) - |2: - | add CARG3, sp, TMPDofs - | str CARG4, TMPD - | b >1 - | - |->vmeta_tgetb: // RB = table, RC = index - | add RC, RC, TISNUM - | add CARG2, BASE, RB, lsl #3 - | add CARG3, sp, TMPDofs - | str RC, TMPD - | b >1 - | - |->vmeta_tgetv: // RB = table, RC = key - | add CARG2, BASE, RB, lsl #3 - | add CARG3, BASE, RC, lsl #3 - |1: - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) - | // Returns TValue * (finished) or NULL (metamethod). - | cbz CRET1, >3 - | ldr TMP0, [CRET1] - | str TMP0, [BASE, RA, lsl #3] - | ins_next - | - |3: // Call __index metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k - | sub TMP1, BASE, #FRAME_CONT - | ldr BASE, L->top - | mov NARGS8:RC, #16 // 2 args for func(t, k). - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. - | str PC, [BASE, #-24] // [cont|PC] - | sub PC, BASE, TMP1 - | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - | b ->vm_call_dispatch_f - | - |->vmeta_tgetr: - | sxtw CARG2, TMP1w - | bl extern lj_tab_getinth // (GCtab *t, int32_t key) - | // Returns cTValue * or NULL. - | mov TMP0, TISNIL - | cbz CRET1, ->BC_TGETR_Z - | ldr TMP0, [CRET1] - | b ->BC_TGETR_Z - | - |//----------------------------------------------------------------------- - | - |->vmeta_tsets1: - | movn CARG4, #~LJ_TSTR - | add CARG2, BASE, RB, lsl #3 - | add CARG4, STR:RC, CARG4, lsl #47 - | b >2 - | - |->vmeta_tsets: - | movk CARG2, #(LJ_TTAB>>1)&0xffff, lsl #48 - | str CARG2, GL->tmptv - | add CARG2, GL, #offsetof(global_State, tmptv) - |2: - | add CARG3, sp, TMPDofs - | str CARG4, TMPD - | b >1 - | - |->vmeta_tsetb: // RB = table, RC = index - | add RC, RC, TISNUM - | add CARG2, BASE, RB, lsl #3 - | add CARG3, sp, TMPDofs - | str RC, TMPD - | b >1 - | - |->vmeta_tsetv: - | add CARG2, BASE, RB, lsl #3 - | add CARG3, BASE, RC, lsl #3 - |1: - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) - | // Returns TValue * (finished) or NULL (metamethod). - | ldr TMP0, [BASE, RA, lsl #3] - | cbz CRET1, >3 - | // NOBARRIER: lj_meta_tset ensures the table is not black. - | str TMP0, [CRET1] - | ins_next - | - |3: // Call __newindex metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) - | sub TMP1, BASE, #FRAME_CONT - | ldr BASE, L->top - | mov NARGS8:RC, #24 // 3 args for func(t, k, v). - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. - | str TMP0, [BASE, #16] // Copy value to third argument. - | str PC, [BASE, #-24] // [cont|PC] - | sub PC, BASE, TMP1 - | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - | b ->vm_call_dispatch_f - | - |->vmeta_tsetr: - | sxtw CARG3, TMP1w - | str BASE, L->base - | str PC, SAVE_PC - | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) - | // Returns TValue *. - | b ->BC_TSETR_Z - | - |//-- Comparison metamethods --------------------------------------------- - | - |->vmeta_comp: - | add CARG2, BASE, RA, lsl #3 - | sub PC, PC, #4 - | add CARG3, BASE, RC, lsl #3 - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | uxtb CARG4w, INSw - | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) - | // Returns 0/1 or TValue * (metamethod). - |3: - | cmp CRET1, #1 - | bhi ->vmeta_binop - |4: - | ldrh RBw, [PC, #2] - | add PC, PC, #4 - | add RB, PC, RB, lsl #2 - | sub RB, RB, #0x20000 - | csel PC, PC, RB, lo - |->cont_nop: - | ins_next - | - |->cont_ra: // RA = resultptr - | ldr INSw, [PC, #-4] - | ldr TMP0, [RA] - | decode_RA TMP1, INS - | str TMP0, [BASE, TMP1, lsl #3] - | b ->cont_nop - | - |->cont_condt: // RA = resultptr - | ldr TMP0, [RA] - | mov_true TMP1 - | cmp TMP1, TMP0 // Branch if result is true. - | b <4 - | - |->cont_condf: // RA = resultptr - | ldr TMP0, [RA] - | mov_false TMP1 - | cmp TMP0, TMP1 // Branch if result is false. - | b <4 - | - |->vmeta_equal: - | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV. - | and TAB:CARG3, CARG3, #LJ_GCVMASK - | sub PC, PC, #4 - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) - | // Returns 0/1 or TValue * (metamethod). - | b <3 - | - |->vmeta_equal_cd: - |.if FFI - | sub PC, PC, #4 - | str BASE, L->base - | mov CARG1, L - | mov CARG2, INS - | str PC, SAVE_PC - | bl extern lj_meta_equal_cd // (lua_State *L, BCIns op) - | // Returns 0/1 or TValue * (metamethod). - | b <3 - |.endif - | - |->vmeta_istype: - | sub PC, PC, #4 - | str BASE, L->base - | mov CARG1, L - | mov CARG2, RA - | mov CARG3, RC - | str PC, SAVE_PC - | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) - | b ->cont_nop - | - |//-- Arithmetic metamethods --------------------------------------------- - | - |->vmeta_arith_vn: - | add CARG3, BASE, RB, lsl #3 - | add CARG4, KBASE, RC, lsl #3 - | b >1 - | - |->vmeta_arith_nv: - | add CARG4, BASE, RB, lsl #3 - | add CARG3, KBASE, RC, lsl #3 - | b >1 - | - |->vmeta_unm: - | add CARG3, BASE, RC, lsl #3 - | mov CARG4, CARG3 - | b >1 - | - |->vmeta_arith_vv: - | add CARG3, BASE, RB, lsl #3 - | add CARG4, BASE, RC, lsl #3 - |1: - | uxtb CARG5w, INSw - | add CARG2, BASE, RA, lsl #3 - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) - | // Returns NULL (finished) or TValue * (metamethod). - | cbz CRET1, ->cont_nop - | - | // Call metamethod for binary op. - |->vmeta_binop: - | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 - | sub TMP1, CRET1, BASE - | str PC, [CRET1, #-24] // [cont|PC] - | add PC, TMP1, #FRAME_CONT - | mov BASE, CRET1 - | mov NARGS8:RC, #16 // 2 args for func(o1, o2). - | b ->vm_call_dispatch - | - |->vmeta_len: - | add CARG2, BASE, RC, lsl #3 -#if LJ_52 - | mov TAB:RC, TAB:CARG1 // Save table (ignored for other types). -#endif - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_meta_len // (lua_State *L, TValue *o) - | // Returns NULL (retry) or TValue * (metamethod base). -#if LJ_52 - | cbnz CRET1, ->vmeta_binop // Binop call for compatibility. - | mov TAB:CARG1, TAB:RC - | b ->BC_LEN_Z -#else - | b ->vmeta_binop // Binop call for compatibility. -#endif - | - |//-- Call metamethod ---------------------------------------------------- - | - |->vmeta_call: // Resolve and call __call metamethod. - | // RB = old base, BASE = new base, RC = nargs*8 - | mov CARG1, L - | str RB, L->base // This is the callers base! - | sub CARG2, BASE, #16 - | str PC, SAVE_PC - | add CARG3, BASE, NARGS8:RC - | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. - | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now. - | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - | ins_call - | - |->vmeta_callt: // Resolve __call for BC_CALLT. - | // BASE = old base, RA = new base, RC = nargs*8 - | mov CARG1, L - | str BASE, L->base - | sub CARG2, RA, #16 - | str PC, SAVE_PC - | add CARG3, RA, NARGS8:RC - | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) - | ldr TMP1, [RA, FRAME_FUNC] // Guaranteed to be a function here. - | ldr PC, [BASE, FRAME_PC] - | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now. - | and LFUNC:CARG3, TMP1, #LJ_GCVMASK - | b ->BC_CALLT2_Z - | - |//-- Argument coercion for 'for' statement ------------------------------ - | - |->vmeta_for: - | mov CARG1, L - | str BASE, L->base - | mov CARG2, RA - | str PC, SAVE_PC - | bl extern lj_meta_for // (lua_State *L, TValue *base) - | ldr INSw, [PC, #-4] - |.if JIT - | uxtb TMP0w, INSw - |.endif - | decode_RA RA, INS - | decode_RD RC, INS - |.if JIT - | cmp TMP0, #BC_JFORI - | beq =>BC_JFORI - |.endif - | b =>BC_FORI - | - |//----------------------------------------------------------------------- - |//-- Fast functions ----------------------------------------------------- - |//----------------------------------------------------------------------- - | - |.macro .ffunc, name - |->ff_ .. name: - |.endmacro - | - |.macro .ffunc_1, name - |->ff_ .. name: - | ldr CARG1, [BASE] - | cmp NARGS8:RC, #8 - | blo ->fff_fallback - |.endmacro - | - |.macro .ffunc_2, name - |->ff_ .. name: - | ldp CARG1, CARG2, [BASE] - | cmp NARGS8:RC, #16 - | blo ->fff_fallback - |.endmacro - | - |.macro .ffunc_n, name - | .ffunc name - | ldr CARG1, [BASE] - | cmp NARGS8:RC, #8 - | ldr FARG1, [BASE] - | blo ->fff_fallback - | checknum CARG1, ->fff_fallback - |.endmacro - | - |.macro .ffunc_nn, name - | .ffunc name - | ldp CARG1, CARG2, [BASE] - | cmp NARGS8:RC, #16 - | ldp FARG1, FARG2, [BASE] - | blo ->fff_fallback - | checknum CARG1, ->fff_fallback - | checknum CARG2, ->fff_fallback - |.endmacro - | - |// Inlined GC threshold check. Caveat: uses CARG1 and CARG2. - |.macro ffgccheck - | ldp CARG1, CARG2, GL->gc.total // Assumes threshold follows total. - | cmp CARG1, CARG2 - | blt >1 - | bl ->fff_gcstep - |1: - |.endmacro - | - |//-- Base library: checks ----------------------------------------------- - | - |.ffunc_1 assert - | ldr PC, [BASE, FRAME_PC] - | mov_false TMP1 - | cmp CARG1, TMP1 - | bhs ->fff_fallback - | str CARG1, [BASE, #-16] - | sub RB, BASE, #8 - | subs RA, NARGS8:RC, #8 - | add RC, NARGS8:RC, #8 // Compute (nresults+1)*8. - | cbz RA, ->fff_res // Done if exactly 1 argument. - |1: - | ldr CARG1, [RB, #16] - | sub RA, RA, #8 - | str CARG1, [RB], #8 - | cbnz RA, <1 - | b ->fff_res - | - |.ffunc_1 type - | mov TMP0, #~LJ_TISNUM - | asr ITYPE, CARG1, #47 - | cmn ITYPE, #~LJ_TISNUM - | csinv TMP1, TMP0, ITYPE, lo - | add TMP1, TMP1, #offsetof(GCfuncC, upvalue)/8 - | ldr CARG1, [CFUNC:CARG3, TMP1, lsl #3] - | b ->fff_restv - | - |//-- Base library: getters and setters --------------------------------- - | - |.ffunc_1 getmetatable - | asr ITYPE, CARG1, #47 - | cmn ITYPE, #-LJ_TTAB - | ccmn ITYPE, #-LJ_TUDATA, #4, ne - | and TAB:CARG1, CARG1, #LJ_GCVMASK - | bne >6 - |1: // Field metatable must be at same offset for GCtab and GCudata! - | ldr TAB:RB, TAB:CARG1->metatable - |2: - | mov CARG1, TISNIL - | ldr STR:RC, GL->gcroot[GCROOT_MMNAME+MM_metatable] - | cbz TAB:RB, ->fff_restv - | ldr TMP1w, TAB:RB->hmask - | ldr TMP2w, STR:RC->hash - | ldr NODE:CARG3, TAB:RB->node - | and TMP1w, TMP1w, TMP2w // idx = str->hash & tab->hmask - | add TMP1, TMP1, TMP1, lsl #1 - | movn CARG4, #~LJ_TSTR - | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8 - | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for. - |3: // Rearranged logic, because we expect _not_ to find the key. - | ldp CARG1, TMP0, NODE:CARG3->val - | ldr NODE:CARG3, NODE:CARG3->next - | cmp TMP0, CARG4 - | beq >5 - | cbnz NODE:CARG3, <3 - |4: - | mov CARG1, RB // Use metatable as default result. - | movk CARG1, #(LJ_TTAB>>1)&0xffff, lsl #48 - | b ->fff_restv - |5: - | cmp TMP0, TISNIL - | bne ->fff_restv - | b <4 - | - |6: - | movn TMP0, #~LJ_TISNUM - | cmp ITYPE, TMP0 - | csel ITYPE, ITYPE, TMP0, hs - | sub TMP1, GL, ITYPE, lsl #3 - | ldr TAB:RB, [TMP1, #offsetof(global_State, gcroot[GCROOT_BASEMT])-8] - | b <2 - | - |.ffunc_2 setmetatable - | // Fast path: no mt for table yet and not clearing the mt. - | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback - | ldr TAB:TMP0, TAB:TMP1->metatable - | asr ITYPE, CARG2, #47 - | ldrb TMP2w, TAB:TMP1->marked - | cmn ITYPE, #-LJ_TTAB - | and TAB:CARG2, CARG2, #LJ_GCVMASK - | ccmp TAB:TMP0, #0, #0, eq - | bne ->fff_fallback - | str TAB:CARG2, TAB:TMP1->metatable - | tbz TMP2w, #2, ->fff_restv // isblack(table) - | barrierback TAB:TMP1, TMP2w, TMP0 - | b ->fff_restv - | - |.ffunc rawget - | ldr CARG2, [BASE] - | cmp NARGS8:RC, #16 - | blo ->fff_fallback - | checktab CARG2, ->fff_fallback - | mov CARG1, L - | add CARG3, BASE, #8 - | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) - | // Returns cTValue *. - | ldr CARG1, [CRET1] - | b ->fff_restv - | - |//-- Base library: conversions ------------------------------------------ - | - |.ffunc tonumber - | // Only handles the number case inline (without a base argument). - | ldr CARG1, [BASE] - | cmp NARGS8:RC, #8 - | bne ->fff_fallback - | checknumber CARG1, ->fff_fallback - | b ->fff_restv - | - |.ffunc_1 tostring - | // Only handles the string or number case inline. - | asr ITYPE, CARG1, #47 - | cmn ITYPE, #-LJ_TSTR - | // A __tostring method in the string base metatable is ignored. - | beq ->fff_restv - | // Handle numbers inline, unless a number base metatable is present. - | ldr TMP1, GL->gcroot[GCROOT_BASEMT_NUM] - | str BASE, L->base - | cmn ITYPE, #-LJ_TISNUM - | ccmp TMP1, #0, #0, ls - | str PC, SAVE_PC // Redundant (but a defined value). - | bne ->fff_fallback - | ffgccheck - | mov CARG1, L - | mov CARG2, BASE - | bl extern lj_strfmt_number // (lua_State *L, cTValue *o) - | // Returns GCstr *. - | movn TMP1, #~LJ_TSTR - | ldr BASE, L->base - | add CARG1, CARG1, TMP1, lsl #47 - | b ->fff_restv - | - |//-- Base library: iterators ------------------------------------------- - | - |.ffunc_1 next - | checktp CARG2, CARG1, LJ_TTAB, ->fff_fallback - | str TISNIL, [BASE, NARGS8:RC] // Set missing 2nd arg to nil. - | ldr PC, [BASE, FRAME_PC] - | stp BASE, BASE, L->base // Add frame since C call can throw. - | mov CARG1, L - | add CARG3, BASE, #8 - | str PC, SAVE_PC - | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) - | // Returns 0 at end of traversal. - | str TISNIL, [BASE, #-16] - | cbz CRET1, ->fff_res1 // End of traversal: return nil. - | ldp CARG1, CARG2, [BASE, #8] // Copy key and value to results. - | mov RC, #(2+1)*8 - | stp CARG1, CARG2, [BASE, #-16] - | b ->fff_res - | - |.ffunc_1 pairs - | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback -#if LJ_52 - | ldr TAB:CARG2, TAB:TMP1->metatable -#endif - | ldr CFUNC:CARG4, CFUNC:CARG3->upvalue[0] - | ldr PC, [BASE, FRAME_PC] -#if LJ_52 - | cbnz TAB:CARG2, ->fff_fallback -#endif - | mov RC, #(3+1)*8 - | stp CARG1, TISNIL, [BASE, #-8] - | str CFUNC:CARG4, [BASE, #-16] - | b ->fff_res - | - |.ffunc_2 ipairs_aux - | checktab CARG1, ->fff_fallback - | checkint CARG2, ->fff_fallback - | ldr TMP1w, TAB:CARG1->asize - | ldr CARG3, TAB:CARG1->array - | ldr TMP0w, TAB:CARG1->hmask - | add CARG2w, CARG2w, #1 - | cmp CARG2w, TMP1w - | ldr PC, [BASE, FRAME_PC] - | add TMP2, CARG2, TISNUM - | mov RC, #(0+1)*8 - | str TMP2, [BASE, #-16] - | bhs >2 // Not in array part? - | ldr TMP0, [CARG3, CARG2, lsl #3] - |1: - | mov TMP1, #(2+1)*8 - | cmp TMP0, TISNIL - | str TMP0, [BASE, #-8] - | csel RC, RC, TMP1, eq - | b ->fff_res - |2: // Check for empty hash part first. Otherwise call C function. - | cbz TMP0w, ->fff_res - | bl extern lj_tab_getinth // (GCtab *t, int32_t key) - | // Returns cTValue * or NULL. - | cbz CRET1, ->fff_res - | ldr TMP0, [CRET1] - | b <1 - | - |.ffunc_1 ipairs - | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback -#if LJ_52 - | ldr TAB:CARG2, TAB:TMP1->metatable -#endif - | ldr CFUNC:CARG4, CFUNC:CARG3->upvalue[0] - | ldr PC, [BASE, FRAME_PC] -#if LJ_52 - | cbnz TAB:CARG2, ->fff_fallback -#endif - | mov RC, #(3+1)*8 - | stp CARG1, TISNUM, [BASE, #-8] - | str CFUNC:CARG4, [BASE, #-16] - | b ->fff_res - | - |//-- Base library: catch errors ---------------------------------------- - | - |.ffunc pcall - | ldrb TMP0w, GL->hookmask - | subs NARGS8:RC, NARGS8:RC, #8 - | blo ->fff_fallback - | mov RB, BASE - | add BASE, BASE, #16 - | ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1 - | add PC, TMP0, #16+FRAME_PCALL - | beq ->vm_call_dispatch - |1: - | add TMP2, BASE, NARGS8:RC - |2: - | ldr TMP0, [TMP2, #-16] - | str TMP0, [TMP2, #-8]! - | cmp TMP2, BASE - | bne <2 - | b ->vm_call_dispatch - | - |.ffunc xpcall - | ldp CARG1, CARG2, [BASE] - | ldrb TMP0w, GL->hookmask - | subs NARGS8:RC, NARGS8:RC, #16 - | blo ->fff_fallback - | mov RB, BASE - | add BASE, BASE, #24 - | asr ITYPE, CARG2, #47 - | ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1 - | cmn ITYPE, #-LJ_TFUNC - | add PC, TMP0, #24+FRAME_PCALL - | bne ->fff_fallback // Traceback must be a function. - | stp CARG2, CARG1, [RB] // Swap function and traceback. - | cbz NARGS8:RC, ->vm_call_dispatch - | b <1 - | - |//-- Coroutine library -------------------------------------------------- - | - |.macro coroutine_resume_wrap, resume - |.if resume - |.ffunc_1 coroutine_resume - | checktp CARG1, LJ_TTHREAD, ->fff_fallback - |.else - |.ffunc coroutine_wrap_aux - | ldr L:CARG1, CFUNC:CARG3->upvalue[0].gcr - | and L:CARG1, CARG1, #LJ_GCVMASK - |.endif - | ldr PC, [BASE, FRAME_PC] - | str BASE, L->base - | ldp RB, CARG2, L:CARG1->base - | ldrb TMP1w, L:CARG1->status - | add TMP0, CARG2, TMP1 - | str PC, SAVE_PC - | cmp TMP0, RB - | beq ->fff_fallback - | cmp TMP1, #LUA_YIELD - | add TMP0, CARG2, #8 - | csel CARG2, CARG2, TMP0, hs - | ldr CARG4, L:CARG1->maxstack - | add CARG3, CARG2, NARGS8:RC - | ldr RB, L:CARG1->cframe - | ccmp CARG3, CARG4, #2, ls - | ccmp RB, #0, #2, ls - | bhi ->fff_fallback - |.if resume - | sub CARG3, CARG3, #8 // Keep resumed thread in stack for GC. - | add BASE, BASE, #8 - | sub NARGS8:RC, NARGS8:RC, #8 - |.endif - | str CARG3, L:CARG1->top - | str BASE, L->top - | cbz NARGS8:RC, >3 - |2: // Move args to coroutine. - | ldr TMP0, [BASE, RB] - | cmp RB, NARGS8:RC - | str TMP0, [CARG2, RB] - | add RB, RB, #8 - | bne <2 - |3: - | mov CARG3, #0 - | mov L:RA, L:CARG1 - | mov CARG4, #0 - | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0) - | // Returns thread status. - |4: - | ldp CARG3, CARG4, L:RA->base - | cmp CRET1, #LUA_YIELD - | ldr BASE, L->base - | str L, GL->cur_L - | st_vmstate ST_INTERP - | bhi >8 - | sub RC, CARG4, CARG3 - | ldr CARG1, L->maxstack - | add CARG2, BASE, RC - | cbz RC, >6 // No results? - | cmp CARG2, CARG1 - | mov RB, #0 - | bhi >9 // Need to grow stack? - | - | sub CARG4, RC, #8 - | str CARG3, L:RA->top // Clear coroutine stack. - |5: // Move results from coroutine. - | ldr TMP0, [CARG3, RB] - | cmp RB, CARG4 - | str TMP0, [BASE, RB] - | add RB, RB, #8 - | bne <5 - |6: - |.if resume - | mov_true TMP1 - | add RC, RC, #16 - |7: - | str TMP1, [BASE, #-8] // Prepend true/false to results. - | sub RA, BASE, #8 - |.else - | mov RA, BASE - | add RC, RC, #8 - |.endif - | ands CARG1, PC, #FRAME_TYPE - | str PC, SAVE_PC - | str RCw, SAVE_MULTRES - | beq ->BC_RET_Z - | b ->vm_return - | - |8: // Coroutine returned with error (at co->top-1). - |.if resume - | ldr TMP0, [CARG4, #-8]! - | mov_false TMP1 - | mov RC, #(2+1)*8 - | str CARG4, L:RA->top // Remove error from coroutine stack. - | str TMP0, [BASE] // Copy error message. - | b <7 - |.else - | mov CARG1, L - | mov CARG2, L:RA - | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) - | // Never returns. - |.endif - | - |9: // Handle stack expansion on return from yield. - | mov CARG1, L - | lsr CARG2, RC, #3 - | bl extern lj_state_growstack // (lua_State *L, int n) - | mov CRET1, #0 - | b <4 - |.endmacro - | - | coroutine_resume_wrap 1 // coroutine.resume - | coroutine_resume_wrap 0 // coroutine.wrap - | - |.ffunc coroutine_yield - | ldr TMP0, L->cframe - | add TMP1, BASE, NARGS8:RC - | mov CRET1, #LUA_YIELD - | stp BASE, TMP1, L->base - | tbz TMP0, #0, ->fff_fallback - | str xzr, L->cframe - | strb CRET1w, L->status - | b ->vm_leave_unw - | - |//-- Math library ------------------------------------------------------- - | - |.macro math_round, func, round - | .ffunc math_ .. func - | ldr CARG1, [BASE] - | cmp NARGS8:RC, #8 - | ldr d0, [BASE] - | blo ->fff_fallback - | cmp TISNUMhi, CARG1, lsr #32 - | beq ->fff_restv - | blo ->fff_fallback - | round d0, d0 - | b ->fff_resn - |.endmacro - | - | math_round floor, frintm - | math_round ceil, frintp - | - |.ffunc_1 math_abs - | checknumber CARG1, ->fff_fallback - | and CARG1, CARG1, #U64x(7fffffff,ffffffff) - | bne ->fff_restv - | eor CARG2w, CARG1w, CARG1w, asr #31 - | movz CARG3, #0x41e0, lsl #48 // 2^31. - | subs CARG1w, CARG2w, CARG1w, asr #31 - | add CARG1, CARG1, TISNUM - | csel CARG1, CARG1, CARG3, pl - | // Fallthrough. - | - |->fff_restv: - | // CARG1 = TValue result. - | ldr PC, [BASE, FRAME_PC] - | str CARG1, [BASE, #-16] - |->fff_res1: - | // PC = return. - | mov RC, #(1+1)*8 - |->fff_res: - | // RC = (nresults+1)*8, PC = return. - | ands CARG1, PC, #FRAME_TYPE - | str RCw, SAVE_MULTRES - | sub RA, BASE, #16 - | bne ->vm_return - | ldr INSw, [PC, #-4] - | decode_RB RB, INS - |5: - | cmp RC, RB, lsl #3 // More results expected? - | blo >6 - | decode_RA TMP1, INS - | // Adjust BASE. KBASE is assumed to be set for the calling frame. - | sub BASE, RA, TMP1, lsl #3 - | ins_next - | - |6: // Fill up results with nil. - | add TMP1, RA, RC - | add RC, RC, #8 - | str TISNIL, [TMP1, #-8] - | b <5 - | - |.macro math_extern, func - | .ffunc_n math_ .. func - | bl extern func - | b ->fff_resn - |.endmacro - | - |.macro math_extern2, func - | .ffunc_nn math_ .. func - | bl extern func - | b ->fff_resn - |.endmacro - | - |.ffunc_n math_sqrt - | fsqrt d0, d0 - |->fff_resn: - | ldr PC, [BASE, FRAME_PC] - | str d0, [BASE, #-16] - | b ->fff_res1 - | - |.ffunc math_log - | ldr CARG1, [BASE] - | cmp NARGS8:RC, #8 - | ldr FARG1, [BASE] - | bne ->fff_fallback // Need exactly 1 argument. - | checknum CARG1, ->fff_fallback - | bl extern log - | b ->fff_resn - | - | math_extern log10 - | math_extern exp - | math_extern sin - | math_extern cos - | math_extern tan - | math_extern asin - | math_extern acos - | math_extern atan - | math_extern sinh - | math_extern cosh - | math_extern tanh - | math_extern2 pow - | math_extern2 atan2 - | math_extern2 fmod - | - |.ffunc_2 math_ldexp - | ldr FARG1, [BASE] - | checknum CARG1, ->fff_fallback - | checkint CARG2, ->fff_fallback - | sxtw CARG1, CARG2w - | bl extern ldexp // (double x, int exp) - | b ->fff_resn - | - |.ffunc_n math_frexp - | add CARG1, sp, TMPDofs - | bl extern frexp - | ldr CARG2w, TMPD - | ldr PC, [BASE, FRAME_PC] - | str d0, [BASE, #-16] - | mov RC, #(2+1)*8 - | add CARG2, CARG2, TISNUM - | str CARG2, [BASE, #-8] - | b ->fff_res - | - |.ffunc_n math_modf - | sub CARG1, BASE, #16 - | ldr PC, [BASE, FRAME_PC] - | bl extern modf - | mov RC, #(2+1)*8 - | str d0, [BASE, #-8] - | b ->fff_res - | - |.macro math_minmax, name, cond, fcond - | .ffunc_1 name - | add RB, BASE, RC - | add RA, BASE, #8 - | checkint CARG1, >4 - |1: // Handle integers. - | ldr CARG2, [RA] - | cmp RA, RB - | bhs ->fff_restv - | checkint CARG2, >3 - | cmp CARG1w, CARG2w - | add RA, RA, #8 - | csel CARG1, CARG2, CARG1, cond - | b <1 - |3: // Convert intermediate result to number and continue below. - | scvtf d0, CARG1w - | blo ->fff_fallback - | ldr d1, [RA] - | b >6 - | - |4: - | ldr d0, [BASE] - | blo ->fff_fallback - |5: // Handle numbers. - | ldr CARG2, [RA] - | ldr d1, [RA] - | cmp RA, RB - | bhs ->fff_resn - | checknum CARG2, >7 - |6: - | fcmp d0, d1 - | add RA, RA, #8 - | fcsel d0, d1, d0, fcond - | b <5 - |7: // Convert integer to number and continue above. - | scvtf d1, CARG2w - | blo ->fff_fallback - | b <6 - |.endmacro - | - | math_minmax math_min, gt, hi - | math_minmax math_max, lt, lo - | - |//-- String library ----------------------------------------------------- - | - |.ffunc string_byte // Only handle the 1-arg case here. - | ldp PC, CARG1, [BASE, FRAME_PC] - | cmp NARGS8:RC, #8 - | asr ITYPE, CARG1, #47 - | ccmn ITYPE, #-LJ_TSTR, #0, eq - | and STR:CARG1, CARG1, #LJ_GCVMASK - | bne ->fff_fallback - | ldrb TMP0w, STR:CARG1[1] // Access is always ok (NUL at end). - | ldr CARG3w, STR:CARG1->len - | add TMP0, TMP0, TISNUM - | str TMP0, [BASE, #-16] - | mov RC, #(0+1)*8 - | cbz CARG3, ->fff_res - | b ->fff_res1 - | - |.ffunc string_char // Only handle the 1-arg case here. - | ffgccheck - | ldp PC, CARG1, [BASE, FRAME_PC] - | cmp CARG1w, #255 - | ccmp NARGS8:RC, #8, #0, ls // Need exactly 1 argument. - | bne ->fff_fallback - | checkint CARG1, ->fff_fallback - | mov CARG3, #1 - | mov CARG2, BASE // Points to stack. Little-endian. - |->fff_newstr: - | // CARG2 = str, CARG3 = len. - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_str_new // (lua_State *L, char *str, size_t l) - |->fff_resstr: - | // Returns GCstr *. - | ldr BASE, L->base - | movn TMP1, #~LJ_TSTR - | add CARG1, CARG1, TMP1, lsl #47 - | b ->fff_restv - | - |.ffunc string_sub - | ffgccheck - | ldr CARG1, [BASE] - | ldr CARG3, [BASE, #16] - | cmp NARGS8:RC, #16 - | movn RB, #0 - | beq >1 - | blo ->fff_fallback - | checkint CARG3, ->fff_fallback - | sxtw RB, CARG3w - |1: - | ldr CARG2, [BASE, #8] - | checkstr CARG1, ->fff_fallback - | ldr TMP1w, STR:CARG1->len - | checkint CARG2, ->fff_fallback - | sxtw CARG2, CARG2w - | // CARG1 = str, TMP1 = str->len, CARG2 = start, RB = end - | add TMP2, RB, TMP1 - | cmp RB, #0 - | add TMP0, CARG2, TMP1 - | csinc RB, RB, TMP2, ge // if (end < 0) end += len+1 - | cmp CARG2, #0 - | csinc CARG2, CARG2, TMP0, ge // if (start < 0) start += len+1 - | cmp RB, #0 - | csel RB, RB, xzr, ge // if (end < 0) end = 0 - | cmp CARG2, #1 - | csinc CARG2, CARG2, xzr, ge // if (start < 1) start = 1 - | cmp RB, TMP1 - | csel RB, RB, TMP1, le // if (end > len) end = len - | add CARG1, STR:CARG1, #sizeof(GCstr)-1 - | subs CARG3, RB, CARG2 // len = end - start - | add CARG2, CARG1, CARG2 - | add CARG3, CARG3, #1 // len += 1 - | bge ->fff_newstr - | add STR:CARG1, GL, #offsetof(global_State, strempty) - | movn TMP1, #~LJ_TSTR - | add CARG1, CARG1, TMP1, lsl #47 - | b ->fff_restv - | - |.macro ffstring_op, name - | .ffunc string_ .. name - | ffgccheck - | ldr CARG2, [BASE] - | cmp NARGS8:RC, #8 - | asr ITYPE, CARG2, #47 - | ccmn ITYPE, #-LJ_TSTR, #0, hs - | and STR:CARG2, CARG2, #LJ_GCVMASK - | bne ->fff_fallback - | ldr TMP0, GL->tmpbuf.b - | add SBUF:CARG1, GL, #offsetof(global_State, tmpbuf) - | str BASE, L->base - | str PC, SAVE_PC - | str L, GL->tmpbuf.L - | str TMP0, GL->tmpbuf.p - | bl extern lj_buf_putstr_ .. name - | bl extern lj_buf_tostr - | b ->fff_resstr - |.endmacro - | - |ffstring_op reverse - |ffstring_op lower - |ffstring_op upper - | - |//-- Bit library -------------------------------------------------------- - | - |// FP number to bit conversion for soft-float. Clobbers CARG1-CARG3 - |->vm_tobit_fb: - | bls ->fff_fallback - | add CARG2, CARG1, CARG1 - | mov CARG3, #1076 - | sub CARG3, CARG3, CARG2, lsr #53 - | cmp CARG3, #53 - | bhi >1 - | and CARG2, CARG2, #U64x(001fffff,ffffffff) - | orr CARG2, CARG2, #U64x(00200000,00000000) - | cmp CARG1, #0 - | lsr CARG2, CARG2, CARG3 - | cneg CARG1w, CARG2w, mi - | br lr - |1: - | mov CARG1w, #0 - | br lr - | - |.macro .ffunc_bit, name - | .ffunc_1 bit_..name - | adr lr, >1 - | checkint CARG1, ->vm_tobit_fb - |1: - |.endmacro - | - |.macro .ffunc_bit_op, name, ins - | .ffunc_bit name - | mov RA, #8 - | mov TMP0w, CARG1w - | adr lr, >2 - |1: - | ldr CARG1, [BASE, RA] - | cmp RA, NARGS8:RC - | add RA, RA, #8 - | bge >9 - | checkint CARG1, ->vm_tobit_fb - |2: - | ins TMP0w, TMP0w, CARG1w - | b <1 - |.endmacro - | - |.ffunc_bit_op band, and - |.ffunc_bit_op bor, orr - |.ffunc_bit_op bxor, eor - | - |.ffunc_bit tobit - | mov TMP0w, CARG1w - |9: // Label reused by .ffunc_bit_op users. - | add CARG1, TMP0, TISNUM - | b ->fff_restv - | - |.ffunc_bit bswap - | rev TMP0w, CARG1w - | add CARG1, TMP0, TISNUM - | b ->fff_restv - | - |.ffunc_bit bnot - | mvn TMP0w, CARG1w - | add CARG1, TMP0, TISNUM - | b ->fff_restv - | - |.macro .ffunc_bit_sh, name, ins, shmod - | .ffunc bit_..name - | ldp TMP0, CARG1, [BASE] - | cmp NARGS8:RC, #16 - | blo ->fff_fallback - | adr lr, >1 - | checkint CARG1, ->vm_tobit_fb - |1: - |.if shmod == 0 - | mov TMP1, CARG1 - |.else - | neg TMP1, CARG1 - |.endif - | mov CARG1, TMP0 - | adr lr, >2 - | checkint CARG1, ->vm_tobit_fb - |2: - | ins TMP0w, CARG1w, TMP1w - | add CARG1, TMP0, TISNUM - | b ->fff_restv - |.endmacro - | - |.ffunc_bit_sh lshift, lsl, 0 - |.ffunc_bit_sh rshift, lsr, 0 - |.ffunc_bit_sh arshift, asr, 0 - |.ffunc_bit_sh rol, ror, 1 - |.ffunc_bit_sh ror, ror, 0 - | - |//----------------------------------------------------------------------- - | - |->fff_fallback: // Call fast function fallback handler. - | // BASE = new base, RC = nargs*8 - | ldp CFUNC:CARG3, PC, [BASE, FRAME_FUNC] // Fallback may overwrite PC. - | ldr TMP2, L->maxstack - | add TMP1, BASE, NARGS8:RC - | stp BASE, TMP1, L->base - | and CFUNC:CARG3, CARG3, #LJ_GCVMASK - | add TMP1, TMP1, #8*LUA_MINSTACK - | ldr CARG3, CFUNC:CARG3->f - | str PC, SAVE_PC // Redundant (but a defined value). - | cmp TMP1, TMP2 - | mov CARG1, L - | bhi >5 // Need to grow stack. - | blr CARG3 // (lua_State *L) - | // Either throws an error, or recovers and returns -1, 0 or nresults+1. - | ldr BASE, L->base - | cmp CRET1w, #0 - | lsl RC, CRET1, #3 - | sub RA, BASE, #16 - | bgt ->fff_res // Returned nresults+1? - |1: // Returned 0 or -1: retry fast path. - | ldr CARG1, L->top - | ldr CFUNC:CARG3, [BASE, FRAME_FUNC] - | sub NARGS8:RC, CARG1, BASE - | bne ->vm_call_tail // Returned -1? - | and CFUNC:CARG3, CARG3, #LJ_GCVMASK - | ins_callt // Returned 0: retry fast path. - | - |// Reconstruct previous base for vmeta_call during tailcall. - |->vm_call_tail: - | ands TMP0, PC, #FRAME_TYPE - | and TMP1, PC, #~FRAME_TYPEP - | bne >3 - | ldrb RAw, [PC, #-3] - | lsl RA, RA, #3 - | add TMP1, RA, #16 - |3: - | sub RB, BASE, TMP1 - | b ->vm_call_dispatch // Resolve again for tailcall. - | - |5: // Grow stack for fallback handler. - | mov CARG2, #LUA_MINSTACK - | bl extern lj_state_growstack // (lua_State *L, int n) - | ldr BASE, L->base - | cmp CARG1, CARG1 // Set zero-flag to force retry. - | b <1 - | - |->fff_gcstep: // Call GC step function. - | // BASE = new base, RC = nargs*8 - | add CARG2, BASE, NARGS8:RC // Calculate L->top. - | mov RA, lr - | stp BASE, CARG2, L->base - | str PC, SAVE_PC // Redundant (but a defined value). - | mov CARG1, L - | bl extern lj_gc_step // (lua_State *L) - | ldp BASE, CARG2, L->base - | ldr CFUNC:CARG3, [BASE, FRAME_FUNC] - | mov lr, RA // Help return address predictor. - | sub NARGS8:RC, CARG2, BASE // Calculate nargs*8. - | and CFUNC:CARG3, CARG3, #LJ_GCVMASK - | ret - | - |//----------------------------------------------------------------------- - |//-- Special dispatch targets ------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_record: // Dispatch target for recording phase. - |.if JIT - | ldrb CARG1w, GL->hookmask - | tst CARG1, #HOOK_VMEVENT // No recording while in vmevent. - | bne >5 - | // Decrement the hookcount for consistency, but always do the call. - | ldr CARG2w, GL->hookcount - | tst CARG1, #HOOK_ACTIVE - | bne >1 - | sub CARG2w, CARG2w, #1 - | tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT - | beq >1 - | str CARG2w, GL->hookcount - | b >1 - |.endif - | - |->vm_rethook: // Dispatch target for return hooks. - | ldrb TMP2w, GL->hookmask - | tbz TMP2w, #HOOK_ACTIVE_SHIFT, >1 // Hook already active? - |5: // Re-dispatch to static ins. - | ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC] - | br TMP0 - | - |->vm_inshook: // Dispatch target for instr/line hooks. - | ldrb TMP2w, GL->hookmask - | ldr TMP3w, GL->hookcount - | tbnz TMP2w, #HOOK_ACTIVE_SHIFT, <5 // Hook already active? - | tst TMP2w, #LUA_MASKLINE|LUA_MASKCOUNT - | beq <5 - | sub TMP3w, TMP3w, #1 - | str TMP3w, GL->hookcount - | cbz TMP3w, >1 - | tbz TMP2w, #LUA_HOOKLINE, <5 - |1: - | mov CARG1, L - | str BASE, L->base - | mov CARG2, PC - | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. - | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) - |3: - | ldr BASE, L->base - |4: // Re-dispatch to static ins. - | ldr INSw, [PC, #-4] - | add TMP1, GL, INS, uxtb #3 - | decode_RA RA, INS - | ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC] - | decode_RD RC, INS - | br TMP0 - | - |->cont_hook: // Continue from hook yield. - | ldr CARG1, [CARG4, #-40] - | add PC, PC, #4 - | str CARG1w, SAVE_MULTRES // Restore MULTRES for *M ins. - | b <4 - | - |->vm_hotloop: // Hot loop counter underflow. - |.if JIT - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Same as curr_topL(L). - | add CARG1, GL, #GG_G2DISP+GG_DISP2J - | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - | str PC, SAVE_PC - | ldr CARG3, LFUNC:CARG3->pc - | mov CARG2, PC - | str L, [GL, #GL_J(L)] - | ldrb CARG3w, [CARG3, #PC2PROTO(framesize)] - | str BASE, L->base - | add CARG3, BASE, CARG3, lsl #3 - | str CARG3, L->top - | bl extern lj_trace_hot // (jit_State *J, const BCIns *pc) - | b <3 - |.endif - | - |->vm_callhook: // Dispatch target for call hooks. - | mov CARG2, PC - |.if JIT - | b >1 - |.endif - | - |->vm_hotcall: // Hot call counter underflow. - |.if JIT - | orr CARG2, PC, #1 - |1: - |.endif - | add TMP1, BASE, NARGS8:RC - | str PC, SAVE_PC - | mov CARG1, L - | sub RA, RA, BASE - | stp BASE, TMP1, L->base - | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc) - | // Returns ASMFunction. - | ldp BASE, TMP1, L->base - | str xzr, SAVE_PC // Invalidate for subsequent line hook. - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] - | add RA, BASE, RA - | sub NARGS8:RC, TMP1, BASE - | ldr INSw, [PC, #-4] - | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - | br CRET1 - | - |->cont_stitch: // Trace stitching. - |.if JIT - | // RA = resultptr, CARG4 = meta base - | ldr RB, SAVE_MULTRES - | ldr INSw, [PC, #-4] - | ldr TRACE:CARG3, [CARG4, #-40] // Save previous trace. - | subs RB, RB, #8 - | decode_RA RC, INS // Call base. - | and CARG3, CARG3, #LJ_GCVMASK - | beq >2 - |1: // Move results down. - | ldr CARG1, [RA] - | add RA, RA, #8 - | subs RB, RB, #8 - | str CARG1, [BASE, RC, lsl #3] - | add RC, RC, #1 - | bne <1 - |2: - | decode_RA RA, INS - | decode_RB RB, INS - | add RA, RA, RB - |3: - | cmp RA, RC - | bhi >9 // More results wanted? - | - | ldrh RAw, TRACE:CARG3->traceno - | ldrh RCw, TRACE:CARG3->link - | cmp RCw, RAw - | beq ->cont_nop // Blacklisted. - | cmp RCw, #0 - | bne =>BC_JLOOP // Jump to stitched trace. - | - | // Stitch a new trace to the previous trace. - | mov CARG1, #GL_J(exitno) - | str RA, [GL, CARG1] - | mov CARG1, #GL_J(L) - | str L, [GL, CARG1] - | str BASE, L->base - | add CARG1, GL, #GG_G2J - | mov CARG2, PC - | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) - | ldr BASE, L->base - | b ->cont_nop - | - |9: // Fill up results with nil. - | str TISNIL, [BASE, RC, lsl #3] - | add RC, RC, #1 - | b <3 - |.endif - | - |->vm_profhook: // Dispatch target for profiler hook. -#if LJ_HASPROFILE - | mov CARG1, L - | str BASE, L->base - | mov CARG2, PC - | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) - | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. - | ldr BASE, L->base - | sub PC, PC, #4 - | b ->cont_nop -#endif - | - |//----------------------------------------------------------------------- - |//-- Trace exit handler ------------------------------------------------- - |//----------------------------------------------------------------------- - | - |.macro savex_, a, b - | stp d..a, d..b, [sp, #a*8] - | stp x..a, x..b, [sp, #32*8+a*8] - |.endmacro - | - |->vm_exit_handler: - |.if JIT - | sub sp, sp, #(64*8) - | savex_, 0, 1 - | savex_, 2, 3 - | savex_, 4, 5 - | savex_, 6, 7 - | savex_, 8, 9 - | savex_, 10, 11 - | savex_, 12, 13 - | savex_, 14, 15 - | savex_, 16, 17 - | savex_, 18, 19 - | savex_, 20, 21 - | savex_, 22, 23 - | savex_, 24, 25 - | savex_, 26, 27 - | savex_, 28, 29 - | stp d30, d31, [sp, #30*8] - | ldr CARG1, [sp, #64*8] // Load original value of lr. - | add CARG3, sp, #64*8 // Recompute original value of sp. - | mv_vmstate CARG4, EXIT - | stp xzr, CARG3, [sp, #62*8] // Store 0/sp in RID_LR/RID_SP. - | sub CARG1, CARG1, lr - | ldr L, GL->cur_L - | lsr CARG1, CARG1, #2 - | ldr BASE, GL->jit_base - | sub CARG1, CARG1, #2 - | ldr CARG2w, [lr] // Load trace number. - | st_vmstate CARG4 - | str BASE, L->base - | ubfx CARG2w, CARG2w, #5, #16 - | str CARG1w, [GL, #GL_J(exitno)] - | str CARG2w, [GL, #GL_J(parent)] - | str L, [GL, #GL_J(L)] - | str xzr, GL->jit_base - | add CARG1, GL, #GG_G2J - | mov CARG2, sp - | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) - | // Returns MULTRES (unscaled) or negated error code. - | ldr CARG2, L->cframe - | ldr BASE, L->base - | and sp, CARG2, #CFRAME_RAWMASK - | ldr PC, SAVE_PC // Get SAVE_PC. - | str L, SAVE_L // Set SAVE_L (on-trace resume/yield). - | b >1 - |.endif - | - |->vm_exit_interp: - | // CARG1 = MULTRES or negated error code, BASE, PC and GL set. - |.if JIT - | ldr L, SAVE_L - |1: - | cmp CARG1w, #0 - | blt >9 // Check for error from exit. - | lsl RC, CARG1, #3 - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 - | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 - | movn TISNIL, #0 - | and LFUNC:CARG2, CARG2, #LJ_GCVMASK - | str RC, SAVE_MULTRES - | str BASE, L->base - | ldr CARG2, LFUNC:CARG2->pc - | str xzr, GL->jit_base - | mv_vmstate CARG4, INTERP - | ldr KBASE, [CARG2, #PC2PROTO(k)] - | // Modified copy of ins_next which handles function header dispatch, too. - | ldrb RBw, [PC] - | ldr INSw, [PC], #4 - | st_vmstate CARG4 - | cmp RBw, #BC_FUNCC+2 // Fast function? - | add TMP1, GL, INS, uxtb #3 - | bhs >4 - |2: - | cmp RBw, #BC_FUNCF // Function header? - | add TMP0, GL, RB, uxtb #3 - | ldr RB, [TMP0, #GG_G2DISP] - | decode_RA RA, INS - | lsr TMP0, INS, #16 - | csel RC, TMP0, RC, lo - | blo >5 - | ldr CARG3, [BASE, FRAME_FUNC] - | sub RC, RC, #8 - | add RA, BASE, RA, lsl #3 // Yes: RA = BASE+framesize*8, RC = nargs*8 - | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - |5: - | br RB - | - |4: // Check frame below fast function. - | ldr CARG1, [BASE, FRAME_PC] - | ands CARG2, CARG1, #FRAME_TYPE - | bne <2 // Trace stitching continuation? - | // Otherwise set KBASE for Lua function below fast function. - | ldr CARG3, [CARG1, #-4] - | decode_RA CARG1, CARG3 - | sub CARG2, BASE, CARG1, lsl #3 - | ldr LFUNC:CARG3, [CARG2, #-32] - | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - | ldr CARG3, LFUNC:CARG3->pc - | ldr KBASE, [CARG3, #PC2PROTO(k)] - | b <2 - | - |9: // Rethrow error from the right C frame. - | neg CARG2, CARG1 - | mov CARG1, L - | bl extern lj_err_throw // (lua_State *L, int errcode) - |.endif - | - |//----------------------------------------------------------------------- - |//-- Math helper functions ---------------------------------------------- - |//----------------------------------------------------------------------- - | - | // int lj_vm_modi(int dividend, int divisor); - |->vm_modi: - | eor CARG4w, CARG1w, CARG2w - | cmp CARG4w, #0 - | eor CARG3w, CARG1w, CARG1w, asr #31 - | eor CARG4w, CARG2w, CARG2w, asr #31 - | sub CARG3w, CARG3w, CARG1w, asr #31 - | sub CARG4w, CARG4w, CARG2w, asr #31 - | udiv CARG1w, CARG3w, CARG4w - | msub CARG1w, CARG1w, CARG4w, CARG3w - | ccmp CARG1w, #0, #4, mi - | sub CARG3w, CARG1w, CARG4w - | csel CARG1w, CARG1w, CARG3w, eq - | eor CARG3w, CARG1w, CARG2w - | cmp CARG3w, #0 - | cneg CARG1w, CARG1w, mi - | ret - | - |//----------------------------------------------------------------------- - |//-- Miscellaneous functions -------------------------------------------- - |//----------------------------------------------------------------------- - | - |//----------------------------------------------------------------------- - |//-- FFI helper functions ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |// Handler for callback functions. - |// Saveregs already performed. Callback slot number in [sp], g in r12. - |->vm_ffi_callback: - |.if FFI - |.type CTSTATE, CTState, PC - | saveregs - | ldr CTSTATE, GL:x10->ctype_state - | mov GL, x10 - | add x10, sp, # CFRAME_SPACE - | str w9, CTSTATE->cb.slot - | stp x0, x1, CTSTATE->cb.gpr[0] - | stp d0, d1, CTSTATE->cb.fpr[0] - | stp x2, x3, CTSTATE->cb.gpr[2] - | stp d2, d3, CTSTATE->cb.fpr[2] - | stp x4, x5, CTSTATE->cb.gpr[4] - | stp d4, d5, CTSTATE->cb.fpr[4] - | stp x6, x7, CTSTATE->cb.gpr[6] - | stp d6, d7, CTSTATE->cb.fpr[6] - | str x10, CTSTATE->cb.stack - | mov CARG1, CTSTATE - | str CTSTATE, SAVE_PC // Any value outside of bytecode is ok. - | mov CARG2, sp - | bl extern lj_ccallback_enter // (CTState *cts, void *cf) - | // Returns lua_State *. - | ldp BASE, RC, L:CRET1->base - | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 - | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 - | movn TISNIL, #0 - | mov L, CRET1 - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] - | sub RC, RC, BASE - | st_vmstate ST_INTERP - | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - | ins_callt - |.endif - | - |->cont_ffi_callback: // Return from FFI callback. - |.if FFI - | ldr CTSTATE, GL->ctype_state - | stp BASE, CARG4, L->base - | str L, CTSTATE->L - | mov CARG1, CTSTATE - | mov CARG2, RA - | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) - | ldp x0, x1, CTSTATE->cb.gpr[0] - | ldp d0, d1, CTSTATE->cb.fpr[0] - | b ->vm_leave_unw - |.endif - | - |->vm_ffi_call: // Call C function via FFI. - | // Caveat: needs special frame unwinding, see below. - |.if FFI - | .type CCSTATE, CCallState, x19 - | stp fp, lr, [sp, #-32]! - | add fp, sp, #0 - | str CCSTATE, [sp, #16] - | mov CCSTATE, x0 - | ldr TMP0w, CCSTATE:x0->spadj - | ldrb TMP1w, CCSTATE->nsp - | add TMP2, CCSTATE, #offsetof(CCallState, stack) - | subs TMP1, TMP1, #1 - | ldr TMP3, CCSTATE->func - | sub sp, fp, TMP0 - | bmi >2 - |1: // Copy stack slots - | ldr TMP0, [TMP2, TMP1, lsl #3] - | str TMP0, [sp, TMP1, lsl #3] - | subs TMP1, TMP1, #1 - | bpl <1 - |2: - | ldp x0, x1, CCSTATE->gpr[0] - | ldp d0, d1, CCSTATE->fpr[0] - | ldp x2, x3, CCSTATE->gpr[2] - | ldp d2, d3, CCSTATE->fpr[2] - | ldp x4, x5, CCSTATE->gpr[4] - | ldp d4, d5, CCSTATE->fpr[4] - | ldp x6, x7, CCSTATE->gpr[6] - | ldp d6, d7, CCSTATE->fpr[6] - | ldr x8, CCSTATE->retp - | blr TMP3 - | mov sp, fp - | stp x0, x1, CCSTATE->gpr[0] - | stp d0, d1, CCSTATE->fpr[0] - | stp d2, d3, CCSTATE->fpr[2] - | ldr CCSTATE, [sp, #16] - | ldp fp, lr, [sp], #32 - | ret - |.endif - |// Note: vm_ffi_call must be the last function in this object file! - | - |//----------------------------------------------------------------------- -} - -/* Generate the code for a single instruction. */ -static void build_ins(BuildCtx *ctx, BCOp op, int defop) -{ - int vk = 0; - |=>defop: - - switch (op) { - - /* -- Comparison ops ---------------------------------------------------- */ - - /* Remember: all ops branch for a true comparison, fall through otherwise. */ - - case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: - | // RA = src1, RC = src2, JMP with RC = target - | ldr CARG1, [BASE, RA, lsl #3] - | ldrh RBw, [PC, #2] - | ldr CARG2, [BASE, RC, lsl #3] - | add PC, PC, #4 - | add RB, PC, RB, lsl #2 - | sub RB, RB, #0x20000 - | checkint CARG1, >3 - | checkint CARG2, >4 - | cmp CARG1w, CARG2w - if (op == BC_ISLT) { - | csel PC, RB, PC, lt - } else if (op == BC_ISGE) { - | csel PC, RB, PC, ge - } else if (op == BC_ISLE) { - | csel PC, RB, PC, le - } else { - | csel PC, RB, PC, gt - } - |1: - | ins_next - | - |3: // RA not int. - | ldr FARG1, [BASE, RA, lsl #3] - | blo ->vmeta_comp - | ldr FARG2, [BASE, RC, lsl #3] - | cmp TISNUMhi, CARG2, lsr #32 - | bhi >5 - | bne ->vmeta_comp - | // RA number, RC int. - | scvtf FARG2, CARG2w - | b >5 - | - |4: // RA int, RC not int - | ldr FARG2, [BASE, RC, lsl #3] - | blo ->vmeta_comp - | // RA int, RC number. - | scvtf FARG1, CARG1w - | - |5: // RA number, RC number - | fcmp FARG1, FARG2 - | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. - if (op == BC_ISLT) { - | csel PC, RB, PC, lo - } else if (op == BC_ISGE) { - | csel PC, RB, PC, hs - } else if (op == BC_ISLE) { - | csel PC, RB, PC, ls - } else { - | csel PC, RB, PC, hi - } - | b <1 - break; - - case BC_ISEQV: case BC_ISNEV: - vk = op == BC_ISEQV; - | // RA = src1, RC = src2, JMP with RC = target - | ldr CARG1, [BASE, RA, lsl #3] - | add RC, BASE, RC, lsl #3 - | ldrh RBw, [PC, #2] - | ldr CARG3, [RC] - | add PC, PC, #4 - | add RB, PC, RB, lsl #2 - | sub RB, RB, #0x20000 - | asr ITYPE, CARG3, #47 - | cmn ITYPE, #-LJ_TISNUM - if (vk) { - | bls ->BC_ISEQN_Z - } else { - | bls ->BC_ISNEN_Z - } - | // RC is not a number. - | asr TMP0, CARG1, #47 - |.if FFI - | // Check if RC or RA is a cdata. - | cmn ITYPE, #-LJ_TCDATA - | ccmn TMP0, #-LJ_TCDATA, #4, ne - | beq ->vmeta_equal_cd - |.endif - | cmp CARG1, CARG3 - | bne >2 - | // Tag and value are equal. - if (vk) { - |->BC_ISEQV_Z: - | mov PC, RB // Perform branch. - } - |1: - | ins_next - | - |2: // Check if the tags are the same and it's a table or userdata. - | cmp ITYPE, TMP0 - | ccmn ITYPE, #-LJ_TISTABUD, #2, eq - if (vk) { - | bhi <1 - } else { - | bhi ->BC_ISEQV_Z // Reuse code from opposite instruction. - } - | // Different tables or userdatas. Need to check __eq metamethod. - | // Field metatable must be at same offset for GCtab and GCudata! - | and TAB:CARG2, CARG1, #LJ_GCVMASK - | ldr TAB:TMP2, TAB:CARG2->metatable - if (vk) { - | cbz TAB:TMP2, <1 // No metatable? - | ldrb TMP1w, TAB:TMP2->nomm - | mov CARG4, #0 // ne = 0 - | tbnz TMP1w, #MM_eq, <1 // 'no __eq' flag set: done. - } else { - | cbz TAB:TMP2, ->BC_ISEQV_Z // No metatable? - | ldrb TMP1w, TAB:TMP2->nomm - | mov CARG4, #1 // ne = 1. - | tbnz TMP1w, #MM_eq, ->BC_ISEQV_Z // 'no __eq' flag set: done. - } - | b ->vmeta_equal - break; - - case BC_ISEQS: case BC_ISNES: - vk = op == BC_ISEQS; - | // RA = src, RC = str_const (~), JMP with RC = target - | ldr CARG1, [BASE, RA, lsl #3] - | mvn RC, RC - | ldrh RBw, [PC, #2] - | ldr CARG2, [KBASE, RC, lsl #3] - | add PC, PC, #4 - | movn TMP0, #~LJ_TSTR - |.if FFI - | asr ITYPE, CARG1, #47 - |.endif - | add RB, PC, RB, lsl #2 - | add CARG2, CARG2, TMP0, lsl #47 - | sub RB, RB, #0x20000 - |.if FFI - | cmn ITYPE, #-LJ_TCDATA - | beq ->vmeta_equal_cd - |.endif - | cmp CARG1, CARG2 - if (vk) { - | csel PC, RB, PC, eq - } else { - | csel PC, RB, PC, ne - } - | ins_next - break; - - case BC_ISEQN: case BC_ISNEN: - vk = op == BC_ISEQN; - | // RA = src, RC = num_const (~), JMP with RC = target - | ldr CARG1, [BASE, RA, lsl #3] - | add RC, KBASE, RC, lsl #3 - | ldrh RBw, [PC, #2] - | ldr CARG3, [RC] - | add PC, PC, #4 - | add RB, PC, RB, lsl #2 - | sub RB, RB, #0x20000 - if (vk) { - |->BC_ISEQN_Z: - } else { - |->BC_ISNEN_Z: - } - | checkint CARG1, >4 - | checkint CARG3, >6 - | cmp CARG1w, CARG3w - |1: - if (vk) { - | csel PC, RB, PC, eq - |2: - } else { - |2: - | csel PC, RB, PC, ne - } - |3: - | ins_next - | - |4: // RA not int. - |.if FFI - | blo >7 - |.else - | blo <2 - |.endif - | ldr FARG1, [BASE, RA, lsl #3] - | ldr FARG2, [RC] - | cmp TISNUMhi, CARG3, lsr #32 - | bne >5 - | // RA number, RC int. - | scvtf FARG2, CARG3w - |5: - | // RA number, RC number. - | fcmp FARG1, FARG2 - | b <1 - | - |6: // RA int, RC number - | ldr FARG2, [RC] - | scvtf FARG1, CARG1w - | fcmp FARG1, FARG2 - | b <1 - | - |.if FFI - |7: - | asr ITYPE, CARG1, #47 - | cmn ITYPE, #-LJ_TCDATA - | bne <2 - | b ->vmeta_equal_cd - |.endif - break; - - case BC_ISEQP: case BC_ISNEP: - vk = op == BC_ISEQP; - | // RA = src, RC = primitive_type (~), JMP with RC = target - | ldr TMP0, [BASE, RA, lsl #3] - | ldrh RBw, [PC, #2] - | add PC, PC, #4 - | add RC, RC, #1 - | add RB, PC, RB, lsl #2 - |.if FFI - | asr ITYPE, TMP0, #47 - | cmn ITYPE, #-LJ_TCDATA - | beq ->vmeta_equal_cd - | cmn RC, ITYPE - |.else - | cmn RC, TMP0, asr #47 - |.endif - | sub RB, RB, #0x20000 - if (vk) { - | csel PC, RB, PC, eq - } else { - | csel PC, RB, PC, ne - } - | ins_next - break; - - /* -- Unary test and copy ops ------------------------------------------- */ - - case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: - | // RA = dst or unused, RC = src, JMP with RC = target - | ldrh RBw, [PC, #2] - | ldr TMP0, [BASE, RC, lsl #3] - | add PC, PC, #4 - | mov_false TMP1 - | add RB, PC, RB, lsl #2 - | cmp TMP0, TMP1 - | sub RB, RB, #0x20000 - if (op == BC_ISTC || op == BC_IST) { - if (op == BC_ISTC) { - | csel RA, RA, RC, lo - } - | csel PC, RB, PC, lo - } else { - if (op == BC_ISFC) { - | csel RA, RA, RC, hs - } - | csel PC, RB, PC, hs - } - if (op == BC_ISTC || op == BC_ISFC) { - | str TMP0, [BASE, RA, lsl #3] - } - | ins_next - break; - - case BC_ISTYPE: - | // RA = src, RC = -type - | ldr TMP0, [BASE, RA, lsl #3] - | cmn RC, TMP0, asr #47 - | bne ->vmeta_istype - | ins_next - break; - case BC_ISNUM: - | // RA = src, RC = -(TISNUM-1) - | ldr TMP0, [BASE, RA] - | checknum TMP0, ->vmeta_istype - | ins_next - break; - - /* -- Unary ops --------------------------------------------------------- */ - - case BC_MOV: - | // RA = dst, RC = src - | ldr TMP0, [BASE, RC, lsl #3] - | str TMP0, [BASE, RA, lsl #3] - | ins_next - break; - case BC_NOT: - | // RA = dst, RC = src - | ldr TMP0, [BASE, RC, lsl #3] - | mov_false TMP1 - | mov_true TMP2 - | cmp TMP0, TMP1 - | csel TMP0, TMP1, TMP2, lo - | str TMP0, [BASE, RA, lsl #3] - | ins_next - break; - case BC_UNM: - | // RA = dst, RC = src - | ldr TMP0, [BASE, RC, lsl #3] - | asr ITYPE, TMP0, #47 - | cmn ITYPE, #-LJ_TISNUM - | bhi ->vmeta_unm - | eor TMP0, TMP0, #U64x(80000000,00000000) - | bne >5 - | negs TMP0w, TMP0w - | movz CARG3, #0x41e0, lsl #48 // 2^31. - | add TMP0, TMP0, TISNUM - | csel TMP0, TMP0, CARG3, vc - |5: - | str TMP0, [BASE, RA, lsl #3] - | ins_next - break; - case BC_LEN: - | // RA = dst, RC = src - | ldr CARG1, [BASE, RC, lsl #3] - | asr ITYPE, CARG1, #47 - | cmn ITYPE, #-LJ_TSTR - | and CARG1, CARG1, #LJ_GCVMASK - | bne >2 - | ldr CARG1w, STR:CARG1->len - |1: - | add CARG1, CARG1, TISNUM - | str CARG1, [BASE, RA, lsl #3] - | ins_next - | - |2: - | cmn ITYPE, #-LJ_TTAB - | bne ->vmeta_len -#if LJ_52 - | ldr TAB:CARG2, TAB:CARG1->metatable - | cbnz TAB:CARG2, >9 - |3: -#endif - |->BC_LEN_Z: - | bl extern lj_tab_len // (GCtab *t) - | // Returns uint32_t (but less than 2^31). - | b <1 - | -#if LJ_52 - |9: - | ldrb TMP1w, TAB:CARG2->nomm - | tbnz TMP1w, #MM_len, <3 // 'no __len' flag set: done. - | b ->vmeta_len -#endif - break; - - /* -- Binary ops -------------------------------------------------------- */ - - |.macro ins_arithcheck_int, target - | checkint CARG1, target - | checkint CARG2, target - |.endmacro - | - |.macro ins_arithcheck_num, target - | checknum CARG1, target - | checknum CARG2, target - |.endmacro - | - |.macro ins_arithcheck_nzdiv, target - | cbz CARG2w, target - |.endmacro - | - |.macro ins_arithhead - ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); - ||if (vk == 1) { - | and RC, RC, #255 - | decode_RB RB, INS - ||} else { - | decode_RB RB, INS - | and RC, RC, #255 - ||} - |.endmacro - | - |.macro ins_arithload, reg1, reg2 - | // RA = dst, RB = src1, RC = src2 | num_const - ||switch (vk) { - ||case 0: - | ldr reg1, [BASE, RB, lsl #3] - | ldr reg2, [KBASE, RC, lsl #3] - || break; - ||case 1: - | ldr reg1, [KBASE, RC, lsl #3] - | ldr reg2, [BASE, RB, lsl #3] - || break; - ||default: - | ldr reg1, [BASE, RB, lsl #3] - | ldr reg2, [BASE, RC, lsl #3] - || break; - ||} - |.endmacro - | - |.macro ins_arithfallback, ins - ||switch (vk) { - ||case 0: - | ins ->vmeta_arith_vn - || break; - ||case 1: - | ins ->vmeta_arith_nv - || break; - ||default: - | ins ->vmeta_arith_vv - || break; - ||} - |.endmacro - | - |.macro ins_arithmod, res, reg1, reg2 - | fdiv d2, reg1, reg2 - | frintm d2, d2 - | fmsub res, d2, reg2, reg1 - |.endmacro - | - |.macro ins_arithdn, intins, fpins - | ins_arithhead - | ins_arithload CARG1, CARG2 - | ins_arithcheck_int >5 - |.if "intins" == "smull" - | smull CARG1, CARG1w, CARG2w - | cmp CARG1, CARG1, sxtw - | mov CARG1w, CARG1w - | ins_arithfallback bne - |.elif "intins" == "ins_arithmodi" - | ins_arithfallback ins_arithcheck_nzdiv - | bl ->vm_modi - |.else - | intins CARG1w, CARG1w, CARG2w - | ins_arithfallback bvs - |.endif - | add CARG1, CARG1, TISNUM - | str CARG1, [BASE, RA, lsl #3] - |4: - | ins_next - | - |5: // FP variant. - | ins_arithload FARG1, FARG2 - | ins_arithfallback ins_arithcheck_num - | fpins FARG1, FARG1, FARG2 - | str FARG1, [BASE, RA, lsl #3] - | b <4 - |.endmacro - | - |.macro ins_arithfp, fpins - | ins_arithhead - | ins_arithload CARG1, CARG2 - | ins_arithload FARG1, FARG2 - | ins_arithfallback ins_arithcheck_num - |.if "fpins" == "fpow" - | bl extern pow - |.else - | fpins FARG1, FARG1, FARG2 - |.endif - | str FARG1, [BASE, RA, lsl #3] - | ins_next - |.endmacro - - case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: - | ins_arithdn adds, fadd - break; - case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: - | ins_arithdn subs, fsub - break; - case BC_MULVN: case BC_MULNV: case BC_MULVV: - | ins_arithdn smull, fmul - break; - case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: - | ins_arithfp fdiv - break; - case BC_MODVN: case BC_MODNV: case BC_MODVV: - | ins_arithdn ins_arithmodi, ins_arithmod - break; - case BC_POW: - | // NYI: (partial) integer arithmetic. - | ins_arithfp fpow - break; - - case BC_CAT: - | decode_RB RB, INS - | and RC, RC, #255 - | // RA = dst, RB = src_start, RC = src_end - | str BASE, L->base - | sub CARG3, RC, RB - | add CARG2, BASE, RC, lsl #3 - |->BC_CAT_Z: - | // RA = dst, CARG2 = top-1, CARG3 = left - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left) - | // Returns NULL (finished) or TValue * (metamethod). - | ldrb RBw, [PC, #-1] - | ldr BASE, L->base - | cbnz CRET1, ->vmeta_binop - | ldr TMP0, [BASE, RB, lsl #3] - | str TMP0, [BASE, RA, lsl #3] // Copy result to RA. - | ins_next - break; - - /* -- Constant ops ------------------------------------------------------ */ - - case BC_KSTR: - | // RA = dst, RC = str_const (~) - | mvn RC, RC - | ldr TMP0, [KBASE, RC, lsl #3] - | movn TMP1, #~LJ_TSTR - | add TMP0, TMP0, TMP1, lsl #47 - | str TMP0, [BASE, RA, lsl #3] - | ins_next - break; - case BC_KCDATA: - |.if FFI - | // RA = dst, RC = cdata_const (~) - | mvn RC, RC - | ldr TMP0, [KBASE, RC, lsl #3] - | movn TMP1, #~LJ_TCDATA - | add TMP0, TMP0, TMP1, lsl #47 - | str TMP0, [BASE, RA, lsl #3] - | ins_next - |.endif - break; - case BC_KSHORT: - | // RA = dst, RC = int16_literal - | sxth RCw, RCw - | add TMP0, RC, TISNUM - | str TMP0, [BASE, RA, lsl #3] - | ins_next - break; - case BC_KNUM: - | // RA = dst, RC = num_const - | ldr TMP0, [KBASE, RC, lsl #3] - | str TMP0, [BASE, RA, lsl #3] - | ins_next - break; - case BC_KPRI: - | // RA = dst, RC = primitive_type (~) - | mvn TMP0, RC, lsl #47 - | str TMP0, [BASE, RA, lsl #3] - | ins_next - break; - case BC_KNIL: - | // RA = base, RC = end - | add RA, BASE, RA, lsl #3 - | add RC, BASE, RC, lsl #3 - | str TISNIL, [RA], #8 - |1: - | cmp RA, RC - | str TISNIL, [RA], #8 - | blt <1 - | ins_next_ - break; - - /* -- Upvalue and function ops ------------------------------------------ */ - - case BC_UGET: - | // RA = dst, RC = uvnum - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | add RC, RC, #offsetof(GCfuncL, uvptr)/8 - | and LFUNC:CARG2, CARG2, #LJ_GCVMASK - | ldr UPVAL:CARG2, [LFUNC:CARG2, RC, lsl #3] - | ldr CARG2, UPVAL:CARG2->v - | ldr TMP0, [CARG2] - | str TMP0, [BASE, RA, lsl #3] - | ins_next - break; - case BC_USETV: - | // RA = uvnum, RC = src - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | add RA, RA, #offsetof(GCfuncL, uvptr)/8 - | and LFUNC:CARG2, CARG2, #LJ_GCVMASK - | ldr UPVAL:CARG1, [LFUNC:CARG2, RA, lsl #3] - | ldr CARG3, [BASE, RC, lsl #3] - | ldr CARG2, UPVAL:CARG1->v - | ldrb TMP2w, UPVAL:CARG1->marked - | ldrb TMP0w, UPVAL:CARG1->closed - | asr ITYPE, CARG3, #47 - | str CARG3, [CARG2] - | add ITYPE, ITYPE, #-LJ_TISGCV - | tst TMP2w, #LJ_GC_BLACK // isblack(uv) - | ccmp TMP0w, #0, #4, ne // && uv->closed - | ccmn ITYPE, #-(LJ_TNUMX - LJ_TISGCV), #0, ne // && tvisgcv(v) - | bhi >2 - |1: - | ins_next - | - |2: // Check if new value is white. - | and GCOBJ:CARG3, CARG3, #LJ_GCVMASK - | ldrb TMP1w, GCOBJ:CARG3->gch.marked - | tst TMP1w, #LJ_GC_WHITES // iswhite(str) - | beq <1 - | // Crossed a write barrier. Move the barrier forward. - | mov CARG1, GL - | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) - | b <1 - break; - case BC_USETS: - | // RA = uvnum, RC = str_const (~) - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | add RA, RA, #offsetof(GCfuncL, uvptr)/8 - | mvn RC, RC - | and LFUNC:CARG2, CARG2, #LJ_GCVMASK - | ldr UPVAL:CARG1, [LFUNC:CARG2, RA, lsl #3] - | ldr STR:CARG3, [KBASE, RC, lsl #3] - | movn TMP0, #~LJ_TSTR - | ldr CARG2, UPVAL:CARG1->v - | ldrb TMP2w, UPVAL:CARG1->marked - | add TMP0, STR:CARG3, TMP0, lsl #47 - | ldrb TMP1w, STR:CARG3->marked - | str TMP0, [CARG2] - | tbnz TMP2w, #2, >2 // isblack(uv) - |1: - | ins_next - | - |2: // Check if string is white and ensure upvalue is closed. - | ldrb TMP0w, UPVAL:CARG1->closed - | tst TMP1w, #LJ_GC_WHITES // iswhite(str) - | ccmp TMP0w, #0, #0, ne - | beq <1 - | // Crossed a write barrier. Move the barrier forward. - | mov CARG1, GL - | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) - | b <1 - break; - case BC_USETN: - | // RA = uvnum, RC = num_const - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | add RA, RA, #offsetof(GCfuncL, uvptr)/8 - | and LFUNC:CARG2, CARG2, #LJ_GCVMASK - | ldr UPVAL:CARG2, [LFUNC:CARG2, RA, lsl #3] - | ldr TMP0, [KBASE, RC, lsl #3] - | ldr CARG2, UPVAL:CARG2->v - | str TMP0, [CARG2] - | ins_next - break; - case BC_USETP: - | // RA = uvnum, RC = primitive_type (~) - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | add RA, RA, #offsetof(GCfuncL, uvptr)/8 - | and LFUNC:CARG2, CARG2, #LJ_GCVMASK - | ldr UPVAL:CARG2, [LFUNC:CARG2, RA, lsl #3] - | mvn TMP0, RC, lsl #47 - | ldr CARG2, UPVAL:CARG2->v - | str TMP0, [CARG2] - | ins_next - break; - - case BC_UCLO: - | // RA = level, RC = target - | ldr CARG3, L->openupval - | add RC, PC, RC, lsl #2 - | str BASE, L->base - | sub PC, RC, #0x20000 - | cbz CARG3, >1 - | mov CARG1, L - | add CARG2, BASE, RA, lsl #3 - | bl extern lj_func_closeuv // (lua_State *L, TValue *level) - | ldr BASE, L->base - |1: - | ins_next - break; - - case BC_FNEW: - | // RA = dst, RC = proto_const (~) (holding function prototype) - | mvn RC, RC - | str BASE, L->base - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] - | str PC, SAVE_PC - | ldr CARG2, [KBASE, RC, lsl #3] - | mov CARG1, L - | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - | // (lua_State *L, GCproto *pt, GCfuncL *parent) - | bl extern lj_func_newL_gc - | // Returns GCfuncL *. - | ldr BASE, L->base - | movn TMP0, #~LJ_TFUNC - | add CRET1, CRET1, TMP0, lsl #47 - | str CRET1, [BASE, RA, lsl #3] - | ins_next - break; - - /* -- Table ops --------------------------------------------------------- */ - - case BC_TNEW: - case BC_TDUP: - | // RA = dst, RC = (hbits|asize) | tab_const (~) - | ldp CARG3, CARG4, GL->gc.total // Assumes threshold follows total. - | str BASE, L->base - | str PC, SAVE_PC - | mov CARG1, L - | cmp CARG3, CARG4 - | bhs >5 - |1: - if (op == BC_TNEW) { - | and CARG2, RC, #0x7ff - | lsr CARG3, RC, #11 - | cmp CARG2, #0x7ff - | mov TMP0, #0x801 - | csel CARG2, CARG2, TMP0, ne - | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) - | // Returns GCtab *. - } else { - | mvn RC, RC - | ldr CARG2, [KBASE, RC, lsl #3] - | bl extern lj_tab_dup // (lua_State *L, Table *kt) - | // Returns GCtab *. - } - | ldr BASE, L->base - | movk CRET1, #(LJ_TTAB>>1)&0xffff, lsl #48 - | str CRET1, [BASE, RA, lsl #3] - | ins_next - | - |5: - | bl extern lj_gc_step_fixtop // (lua_State *L) - | mov CARG1, L - | b <1 - break; - - case BC_GGET: - | // RA = dst, RC = str_const (~) - case BC_GSET: - | // RA = dst, RC = str_const (~) - | ldr LFUNC:CARG1, [BASE, FRAME_FUNC] - | mvn RC, RC - | and LFUNC:CARG1, CARG1, #LJ_GCVMASK - | ldr TAB:CARG2, LFUNC:CARG1->env - | ldr STR:RC, [KBASE, RC, lsl #3] - if (op == BC_GGET) { - | b ->BC_TGETS_Z - } else { - | b ->BC_TSETS_Z - } - break; - - case BC_TGETV: - | decode_RB RB, INS - | and RC, RC, #255 - | // RA = dst, RB = table, RC = key - | ldr CARG2, [BASE, RB, lsl #3] - | ldr TMP1, [BASE, RC, lsl #3] - | checktab CARG2, ->vmeta_tgetv - | checkint TMP1, >9 // Integer key? - | ldr CARG3, TAB:CARG2->array - | ldr CARG1w, TAB:CARG2->asize - | add CARG3, CARG3, TMP1, uxtw #3 - | cmp TMP1w, CARG1w // In array part? - | bhs ->vmeta_tgetv - | ldr TMP0, [CARG3] - | cmp TMP0, TISNIL - | beq >5 - |1: - | str TMP0, [BASE, RA, lsl #3] - | ins_next - | - |5: // Check for __index if table value is nil. - | ldr TAB:CARG1, TAB:CARG2->metatable - | cbz TAB:CARG1, <1 // No metatable: done. - | ldrb TMP1w, TAB:CARG1->nomm - | tbnz TMP1w, #MM_index, <1 // 'no __index' flag set: done. - | b ->vmeta_tgetv - | - |9: - | asr ITYPE, TMP1, #47 - | cmn ITYPE, #-LJ_TSTR // String key? - | bne ->vmeta_tgetv - | and STR:RC, TMP1, #LJ_GCVMASK - | b ->BC_TGETS_Z - break; - case BC_TGETS: - | decode_RB RB, INS - | and RC, RC, #255 - | // RA = dst, RB = table, RC = str_const (~) - | ldr CARG2, [BASE, RB, lsl #3] - | mvn RC, RC - | ldr STR:RC, [KBASE, RC, lsl #3] - | checktab CARG2, ->vmeta_tgets1 - |->BC_TGETS_Z: - | // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = dst - | ldr TMP1w, TAB:CARG2->hmask - | ldr TMP2w, STR:RC->hash - | ldr NODE:CARG3, TAB:CARG2->node - | and TMP1w, TMP1w, TMP2w // idx = str->hash & tab->hmask - | add TMP1, TMP1, TMP1, lsl #1 - | movn CARG4, #~LJ_TSTR - | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8 - | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for. - |1: - | ldp TMP0, CARG1, NODE:CARG3->val - | ldr NODE:CARG3, NODE:CARG3->next - | cmp CARG1, CARG4 - | bne >4 - | cmp TMP0, TISNIL - | beq >5 - |3: - | str TMP0, [BASE, RA, lsl #3] - | ins_next - | - |4: // Follow hash chain. - | cbnz NODE:CARG3, <1 - | // End of hash chain: key not found, nil result. - | mov TMP0, TISNIL - | - |5: // Check for __index if table value is nil. - | ldr TAB:CARG1, TAB:CARG2->metatable - | cbz TAB:CARG1, <3 // No metatable: done. - | ldrb TMP1w, TAB:CARG1->nomm - | tbnz TMP1w, #MM_index, <3 // 'no __index' flag set: done. - | b ->vmeta_tgets - break; - case BC_TGETB: - | decode_RB RB, INS - | and RC, RC, #255 - | // RA = dst, RB = table, RC = index - | ldr CARG2, [BASE, RB, lsl #3] - | checktab CARG2, ->vmeta_tgetb - | ldr CARG3, TAB:CARG2->array - | ldr CARG1w, TAB:CARG2->asize - | add CARG3, CARG3, RC, lsl #3 - | cmp RCw, CARG1w // In array part? - | bhs ->vmeta_tgetb - | ldr TMP0, [CARG3] - | cmp TMP0, TISNIL - | beq >5 - |1: - | str TMP0, [BASE, RA, lsl #3] - | ins_next - | - |5: // Check for __index if table value is nil. - | ldr TAB:CARG1, TAB:CARG2->metatable - | cbz TAB:CARG1, <1 // No metatable: done. - | ldrb TMP1w, TAB:CARG1->nomm - | tbnz TMP1w, #MM_index, <1 // 'no __index' flag set: done. - | b ->vmeta_tgetb - break; - case BC_TGETR: - | decode_RB RB, INS - | and RC, RC, #255 - | // RA = dst, RB = table, RC = key - | ldr CARG1, [BASE, RB, lsl #3] - | ldr TMP1, [BASE, RC, lsl #3] - | and TAB:CARG1, CARG1, #LJ_GCVMASK - | ldr CARG3, TAB:CARG1->array - | ldr TMP2w, TAB:CARG1->asize - | add CARG3, CARG3, TMP1w, uxtw #3 - | cmp TMP1w, TMP2w // In array part? - | bhs ->vmeta_tgetr - | ldr TMP0, [CARG3] - |->BC_TGETR_Z: - | str TMP0, [BASE, RA, lsl #3] - | ins_next - break; - - case BC_TSETV: - | decode_RB RB, INS - | and RC, RC, #255 - | // RA = src, RB = table, RC = key - | ldr CARG2, [BASE, RB, lsl #3] - | ldr TMP1, [BASE, RC, lsl #3] - | checktab CARG2, ->vmeta_tsetv - | checkint TMP1, >9 // Integer key? - | ldr CARG3, TAB:CARG2->array - | ldr CARG1w, TAB:CARG2->asize - | add CARG3, CARG3, TMP1, uxtw #3 - | cmp TMP1w, CARG1w // In array part? - | bhs ->vmeta_tsetv - | ldr TMP1, [CARG3] - | ldr TMP0, [BASE, RA, lsl #3] - | ldrb TMP2w, TAB:CARG2->marked - | cmp TMP1, TISNIL // Previous value is nil? - | beq >5 - |1: - | str TMP0, [CARG3] - | tbnz TMP2w, #2, >7 // isblack(table) - |2: - | ins_next - | - |5: // Check for __newindex if previous value is nil. - | ldr TAB:CARG1, TAB:CARG2->metatable - | cbz TAB:CARG1, <1 // No metatable: done. - | ldrb TMP1w, TAB:CARG1->nomm - | tbnz TMP1w, #MM_newindex, <1 // 'no __newindex' flag set: done. - | b ->vmeta_tsetv - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP2w, TMP1 - | b <2 - | - |9: - | asr ITYPE, TMP1, #47 - | cmn ITYPE, #-LJ_TSTR // String key? - | bne ->vmeta_tsetv - | and STR:RC, TMP1, #LJ_GCVMASK - | b ->BC_TSETS_Z - break; - case BC_TSETS: - | decode_RB RB, INS - | and RC, RC, #255 - | // RA = dst, RB = table, RC = str_const (~) - | ldr CARG2, [BASE, RB, lsl #3] - | mvn RC, RC - | ldr STR:RC, [KBASE, RC, lsl #3] - | checktab CARG2, ->vmeta_tsets1 - |->BC_TSETS_Z: - | // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = src - | ldr TMP1w, TAB:CARG2->hmask - | ldr TMP2w, STR:RC->hash - | ldr NODE:CARG3, TAB:CARG2->node - | and TMP1w, TMP1w, TMP2w // idx = str->hash & tab->hmask - | add TMP1, TMP1, TMP1, lsl #1 - | movn CARG4, #~LJ_TSTR - | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8 - | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for. - | strb wzr, TAB:CARG2->nomm // Clear metamethod cache. - |1: - | ldp TMP1, CARG1, NODE:CARG3->val - | ldr NODE:TMP3, NODE:CARG3->next - | ldrb TMP2w, TAB:CARG2->marked - | cmp CARG1, CARG4 - | bne >5 - | ldr TMP0, [BASE, RA, lsl #3] - | cmp TMP1, TISNIL // Previous value is nil? - | beq >4 - |2: - | str TMP0, NODE:CARG3->val - | tbnz TMP2w, #2, >7 // isblack(table) - |3: - | ins_next - | - |4: // Check for __newindex if previous value is nil. - | ldr TAB:CARG1, TAB:CARG2->metatable - | cbz TAB:CARG1, <2 // No metatable: done. - | ldrb TMP1w, TAB:CARG1->nomm - | tbnz TMP1w, #MM_newindex, <2 // 'no __newindex' flag set: done. - | b ->vmeta_tsets - | - |5: // Follow hash chain. - | mov NODE:CARG3, NODE:TMP3 - | cbnz NODE:TMP3, <1 - | // End of hash chain: key not found, add a new one. - | - | // But check for __newindex first. - | ldr TAB:CARG1, TAB:CARG2->metatable - | cbz TAB:CARG1, >6 // No metatable: continue. - | ldrb TMP1w, TAB:CARG1->nomm - | // 'no __newindex' flag NOT set: check. - | tbz TMP1w, #MM_newindex, ->vmeta_tsets - |6: - | movn TMP1, #~LJ_TSTR - | str PC, SAVE_PC - | add TMP0, STR:RC, TMP1, lsl #47 - | str BASE, L->base - | mov CARG1, L - | str TMP0, TMPD - | add CARG3, sp, TMPDofs - | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) - | // Returns TValue *. - | ldr BASE, L->base - | ldr TMP0, [BASE, RA, lsl #3] - | str TMP0, [CRET1] - | b <3 // No 2nd write barrier needed. - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP2w, TMP1 - | b <3 - break; - case BC_TSETB: - | decode_RB RB, INS - | and RC, RC, #255 - | // RA = src, RB = table, RC = index - | ldr CARG2, [BASE, RB, lsl #3] - | checktab CARG2, ->vmeta_tsetb - | ldr CARG3, TAB:CARG2->array - | ldr CARG1w, TAB:CARG2->asize - | add CARG3, CARG3, RC, lsl #3 - | cmp RCw, CARG1w // In array part? - | bhs ->vmeta_tsetb - | ldr TMP1, [CARG3] - | ldr TMP0, [BASE, RA, lsl #3] - | ldrb TMP2w, TAB:CARG2->marked - | cmp TMP1, TISNIL // Previous value is nil? - | beq >5 - |1: - | str TMP0, [CARG3] - | tbnz TMP2w, #2, >7 // isblack(table) - |2: - | ins_next - | - |5: // Check for __newindex if previous value is nil. - | ldr TAB:CARG1, TAB:CARG2->metatable - | cbz TAB:CARG1, <1 // No metatable: done. - | ldrb TMP1w, TAB:CARG1->nomm - | tbnz TMP1w, #MM_newindex, <1 // 'no __newindex' flag set: done. - | b ->vmeta_tsetb - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP2w, TMP1 - | b <2 - break; - case BC_TSETR: - | decode_RB RB, INS - | and RC, RC, #255 - | // RA = src, RB = table, RC = key - | ldr CARG2, [BASE, RB, lsl #3] - | ldr TMP1, [BASE, RC, lsl #3] - | and TAB:CARG2, CARG2, #LJ_GCVMASK - | ldr CARG1, TAB:CARG2->array - | ldrb TMP2w, TAB:CARG2->marked - | ldr CARG4w, TAB:CARG2->asize - | add CARG1, CARG1, TMP1, uxtw #3 - | tbnz TMP2w, #2, >7 // isblack(table) - |2: - | cmp TMP1w, CARG4w // In array part? - | bhs ->vmeta_tsetr - |->BC_TSETR_Z: - | ldr TMP0, [BASE, RA, lsl #3] - | str TMP0, [CARG1] - | ins_next - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP2w, TMP0 - | b <2 - break; - - case BC_TSETM: - | // RA = base (table at base-1), RC = num_const (start index) - | add RA, BASE, RA, lsl #3 - |1: - | ldr RBw, SAVE_MULTRES - | ldr TAB:CARG2, [RA, #-8] // Guaranteed to be a table. - | ldr TMP1, [KBASE, RC, lsl #3] // Integer constant is in lo-word. - | sub RB, RB, #8 - | cbz RB, >4 // Nothing to copy? - | and TAB:CARG2, CARG2, #LJ_GCVMASK - | ldr CARG1w, TAB:CARG2->asize - | add CARG3w, TMP1w, RBw, lsr #3 - | ldr CARG4, TAB:CARG2->array - | cmp CARG3, CARG1 - | add RB, RA, RB - | bhi >5 - | add TMP1, CARG4, TMP1w, uxtw #3 - | ldrb TMP2w, TAB:CARG2->marked - |3: // Copy result slots to table. - | ldr TMP0, [RA], #8 - | str TMP0, [TMP1], #8 - | cmp RA, RB - | blo <3 - | tbnz TMP2w, #2, >7 // isblack(table) - |4: - | ins_next - | - |5: // Need to resize array part. - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) - | // Must not reallocate the stack. - | b <1 - | - |7: // Possible table write barrier for any value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP2w, TMP1 - | b <4 - break; - - /* -- Calls and vararg handling ----------------------------------------- */ - - case BC_CALLM: - | // RA = base, (RB = nresults+1,) RC = extra_nargs - | ldr TMP0w, SAVE_MULTRES - | decode_RC8RD NARGS8:RC, RC - | add NARGS8:RC, NARGS8:RC, TMP0 - | b ->BC_CALL_Z - break; - case BC_CALL: - | decode_RC8RD NARGS8:RC, RC - | // RA = base, (RB = nresults+1,) RC = (nargs+1)*8 - |->BC_CALL_Z: - | mov RB, BASE // Save old BASE for vmeta_call. - | add BASE, BASE, RA, lsl #3 - | ldr CARG3, [BASE] - | sub NARGS8:RC, NARGS8:RC, #8 - | add BASE, BASE, #16 - | checkfunc CARG3, ->vmeta_call - | ins_call - break; - - case BC_CALLMT: - | // RA = base, (RB = 0,) RC = extra_nargs - | ldr TMP0w, SAVE_MULTRES - | add NARGS8:RC, TMP0, RC, lsl #3 - | b ->BC_CALLT1_Z - break; - case BC_CALLT: - | lsl NARGS8:RC, RC, #3 - | // RA = base, (RB = 0,) RC = (nargs+1)*8 - |->BC_CALLT1_Z: - | add RA, BASE, RA, lsl #3 - | ldr TMP1, [RA] - | sub NARGS8:RC, NARGS8:RC, #8 - | add RA, RA, #16 - | checktp CARG3, TMP1, LJ_TFUNC, ->vmeta_callt - | ldr PC, [BASE, FRAME_PC] - |->BC_CALLT2_Z: - | mov RB, #0 - | ldrb TMP2w, LFUNC:CARG3->ffid - | tst PC, #FRAME_TYPE - | bne >7 - |1: - | str TMP1, [BASE, FRAME_FUNC] // Copy function down, but keep PC. - | cbz NARGS8:RC, >3 - |2: - | ldr TMP0, [RA, RB] - | add TMP1, RB, #8 - | cmp TMP1, NARGS8:RC - | str TMP0, [BASE, RB] - | mov RB, TMP1 - | bne <2 - |3: - | cmp TMP2, #1 // (> FF_C) Calling a fast function? - | bhi >5 - |4: - | ins_callt - | - |5: // Tailcall to a fast function with a Lua frame below. - | ldrb RAw, [PC, #-3] - | sub CARG1, BASE, RA, lsl #3 - | ldr LFUNC:CARG1, [CARG1, #-32] - | and LFUNC:CARG1, CARG1, #LJ_GCVMASK - | ldr CARG1, LFUNC:CARG1->pc - | ldr KBASE, [CARG1, #PC2PROTO(k)] - | b <4 - | - |7: // Tailcall from a vararg function. - | eor PC, PC, #FRAME_VARG - | tst PC, #FRAME_TYPEP // Vararg frame below? - | csel TMP2, RB, TMP2, ne // Clear ffid if no Lua function below. - | bne <1 - | sub BASE, BASE, PC - | ldr PC, [BASE, FRAME_PC] - | tst PC, #FRAME_TYPE - | csel TMP2, RB, TMP2, ne // Clear ffid if no Lua function below. - | b <1 - break; - - case BC_ITERC: - | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) - | add RA, BASE, RA, lsl #3 - | ldr CARG3, [RA, #-24] - | mov RB, BASE // Save old BASE for vmeta_call. - | ldp CARG1, CARG2, [RA, #-16] - | add BASE, RA, #16 - | mov NARGS8:RC, #16 // Iterators get 2 arguments. - | str CARG3, [RA] // Copy callable. - | stp CARG1, CARG2, [RA, #16] // Copy state and control var. - | checkfunc CARG3, ->vmeta_call - | ins_call - break; - - case BC_ITERN: - | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) - |.if JIT - | // NYI: add hotloop, record BC_ITERN. - |.endif - | add RA, BASE, RA, lsl #3 - | ldr TAB:RB, [RA, #-16] - | ldrh TMP3w, [PC, #2] - | ldr CARG1w, [RA, #-8] // Get index from control var. - | add PC, PC, #4 - | add TMP3, PC, TMP3, lsl #2 - | and TAB:RB, RB, #LJ_GCVMASK - | sub TMP3, TMP3, #0x20000 - | ldr TMP1w, TAB:RB->asize - | ldr CARG2, TAB:RB->array - |1: // Traverse array part. - | subs RC, CARG1, TMP1 - | add CARG3, CARG2, CARG1, lsl #3 - | bhs >5 // Index points after array part? - | ldr TMP0, [CARG3] - | cmp TMP0, TISNIL - | cinc CARG1, CARG1, eq // Skip holes in array part. - | beq <1 - | add CARG1, CARG1, TISNUM - | stp CARG1, TMP0, [RA] - | add CARG1, CARG1, #1 - |3: - | str CARG1w, [RA, #-8] // Update control var. - | mov PC, TMP3 - |4: - | ins_next - | - |5: // Traverse hash part. - | ldr TMP2w, TAB:RB->hmask - | ldr NODE:RB, TAB:RB->node - |6: - | add CARG1, RC, RC, lsl #1 - | cmp RC, TMP2 // End of iteration? Branch to ITERN+1. - | add NODE:CARG3, NODE:RB, CARG1, lsl #3 // node = tab->node + idx*3*8 - | bhi <4 - | ldp TMP0, CARG1, NODE:CARG3->val - | cmp TMP0, TISNIL - | add RC, RC, #1 - | beq <6 // Skip holes in hash part. - | stp CARG1, TMP0, [RA] - | add CARG1, RC, TMP1 - | b <3 - break; - - case BC_ISNEXT: - | // RA = base, RC = target (points to ITERN) - | add RA, BASE, RA, lsl #3 - | ldr CFUNC:CARG1, [RA, #-24] - | add RC, PC, RC, lsl #2 - | ldp TAB:CARG3, CARG4, [RA, #-16] - | sub RC, RC, #0x20000 - | checkfunc CFUNC:CARG1, >5 - | asr TMP0, TAB:CARG3, #47 - | ldrb TMP1w, CFUNC:CARG1->ffid - | cmn TMP0, #-LJ_TTAB - | ccmp CARG4, TISNIL, #0, eq - | ccmp TMP1w, #FF_next_N, #0, eq - | bne >5 - | mov TMP0w, #0xfffe7fff - | lsl TMP0, TMP0, #32 - | str TMP0, [RA, #-8] // Initialize control var. - |1: - | mov PC, RC - | ins_next - | - |5: // Despecialize bytecode if any of the checks fail. - | mov TMP0, #BC_JMP - | mov TMP1, #BC_ITERC - | strb TMP0w, [PC, #-4] - | strb TMP1w, [RC] - | b <1 - break; - - case BC_VARG: - | decode_RB RB, INS - | and RC, RC, #255 - | // RA = base, RB = (nresults+1), RC = numparams - | ldr TMP1, [BASE, FRAME_PC] - | add RC, BASE, RC, lsl #3 - | add RA, BASE, RA, lsl #3 - | add RC, RC, #FRAME_VARG - | add TMP2, RA, RB, lsl #3 - | sub RC, RC, TMP1 // RC = vbase - | // Note: RC may now be even _above_ BASE if nargs was < numparams. - | sub TMP3, BASE, #16 // TMP3 = vtop - | cbz RB, >5 - | sub TMP2, TMP2, #16 - |1: // Copy vararg slots to destination slots. - | cmp RC, TMP3 - | ldr TMP0, [RC], #8 - | csel TMP0, TMP0, TISNIL, lo - | cmp RA, TMP2 - | str TMP0, [RA], #8 - | blo <1 - |2: - | ins_next - | - |5: // Copy all varargs. - | ldr TMP0, L->maxstack - | subs TMP2, TMP3, RC - | csel RB, xzr, TMP2, le // MULTRES = (max(vtop-vbase,0)+1)*8 - | add RB, RB, #8 - | add TMP1, RA, TMP2 - | str RBw, SAVE_MULTRES - | ble <2 // Nothing to copy. - | cmp TMP1, TMP0 - | bhi >7 - |6: - | ldr TMP0, [RC], #8 - | str TMP0, [RA], #8 - | cmp RC, TMP3 - | blo <6 - | b <2 - | - |7: // Grow stack for varargs. - | lsr CARG2, TMP2, #3 - | stp BASE, RA, L->base - | mov CARG1, L - | sub RC, RC, BASE // Need delta, because BASE may change. - | str PC, SAVE_PC - | bl extern lj_state_growstack // (lua_State *L, int n) - | ldp BASE, RA, L->base - | add RC, BASE, RC - | sub TMP3, BASE, #16 - | b <6 - break; - - /* -- Returns ----------------------------------------------------------- */ - - case BC_RETM: - | // RA = results, RC = extra results - | ldr TMP0w, SAVE_MULTRES - | ldr PC, [BASE, FRAME_PC] - | add RA, BASE, RA, lsl #3 - | add RC, TMP0, RC, lsl #3 - | b ->BC_RETM_Z - break; - - case BC_RET: - | // RA = results, RC = nresults+1 - | ldr PC, [BASE, FRAME_PC] - | lsl RC, RC, #3 - | add RA, BASE, RA, lsl #3 - |->BC_RETM_Z: - | str RCw, SAVE_MULTRES - |1: - | ands CARG1, PC, #FRAME_TYPE - | eor CARG2, PC, #FRAME_VARG - | bne ->BC_RETV2_Z - | - |->BC_RET_Z: - | // BASE = base, RA = resultptr, RC = (nresults+1)*8, PC = return - | ldr INSw, [PC, #-4] - | subs TMP1, RC, #8 - | sub CARG3, BASE, #16 - | beq >3 - |2: - | ldr TMP0, [RA], #8 - | add BASE, BASE, #8 - | sub TMP1, TMP1, #8 - | str TMP0, [BASE, #-24] - | cbnz TMP1, <2 - |3: - | decode_RA RA, INS - | sub CARG4, CARG3, RA, lsl #3 - | decode_RB RB, INS - | ldr LFUNC:CARG1, [CARG4, FRAME_FUNC] - |5: - | cmp RC, RB, lsl #3 // More results expected? - | blo >6 - | and LFUNC:CARG1, CARG1, #LJ_GCVMASK - | mov BASE, CARG4 - | ldr CARG2, LFUNC:CARG1->pc - | ldr KBASE, [CARG2, #PC2PROTO(k)] - | ins_next - | - |6: // Fill up results with nil. - | add BASE, BASE, #8 - | add RC, RC, #8 - | str TISNIL, [BASE, #-24] - | b <5 - | - |->BC_RETV1_Z: // Non-standard return case. - | add RA, BASE, RA, lsl #3 - |->BC_RETV2_Z: - | tst CARG2, #FRAME_TYPEP - | bne ->vm_return - | // Return from vararg function: relocate BASE down. - | sub BASE, BASE, CARG2 - | ldr PC, [BASE, FRAME_PC] - | b <1 - break; - - case BC_RET0: case BC_RET1: - | // RA = results, RC = nresults+1 - | ldr PC, [BASE, FRAME_PC] - | lsl RC, RC, #3 - | str RCw, SAVE_MULTRES - | ands CARG1, PC, #FRAME_TYPE - | eor CARG2, PC, #FRAME_VARG - | bne ->BC_RETV1_Z - | ldr INSw, [PC, #-4] - if (op == BC_RET1) { - | ldr TMP0, [BASE, RA, lsl #3] - } - | sub CARG4, BASE, #16 - | decode_RA RA, INS - | sub BASE, CARG4, RA, lsl #3 - if (op == BC_RET1) { - | str TMP0, [CARG4], #8 - } - | decode_RB RB, INS - | ldr LFUNC:CARG1, [BASE, FRAME_FUNC] - |5: - | cmp RC, RB, lsl #3 - | blo >6 - | and LFUNC:CARG1, CARG1, #LJ_GCVMASK - | ldr CARG2, LFUNC:CARG1->pc - | ldr KBASE, [CARG2, #PC2PROTO(k)] - | ins_next - | - |6: // Fill up results with nil. - | add RC, RC, #8 - | str TISNIL, [CARG4], #8 - | b <5 - break; - - /* -- Loops and branches ------------------------------------------------ */ - - |.define FOR_IDX, [RA]; .define FOR_TIDX, [RA, #4] - |.define FOR_STOP, [RA, #8]; .define FOR_TSTOP, [RA, #12] - |.define FOR_STEP, [RA, #16]; .define FOR_TSTEP, [RA, #20] - |.define FOR_EXT, [RA, #24]; .define FOR_TEXT, [RA, #28] - - case BC_FORL: - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_IFORL follows. - break; - - case BC_JFORI: - case BC_JFORL: -#if !LJ_HASJIT - break; -#endif - case BC_FORI: - case BC_IFORL: - | // RA = base, RC = target (after end of loop or start of loop) - vk = (op == BC_IFORL || op == BC_JFORL); - | add RA, BASE, RA, lsl #3 - | ldp CARG1, CARG2, FOR_IDX // CARG1 = IDX, CARG2 = STOP - | ldr CARG3, FOR_STEP // CARG3 = STEP - if (op != BC_JFORL) { - | add RC, PC, RC, lsl #2 - | sub RC, RC, #0x20000 - } - | checkint CARG1, >5 - if (!vk) { - | checkint CARG2, ->vmeta_for - | checkint CARG3, ->vmeta_for - | tbnz CARG3w, #31, >4 - | cmp CARG1w, CARG2w - } else { - | adds CARG1w, CARG1w, CARG3w - | bvs >2 - | add TMP0, CARG1, TISNUM - | tbnz CARG3w, #31, >4 - | cmp CARG1w, CARG2w - } - |1: - if (op == BC_FORI) { - | csel PC, RC, PC, gt - } else if (op == BC_JFORI) { - | mov PC, RC - | ldrh RCw, [RC, #-2] - } else if (op == BC_IFORL) { - | csel PC, RC, PC, le - } - if (vk) { - | str TMP0, FOR_IDX - | str TMP0, FOR_EXT - } else { - | str CARG1, FOR_EXT - } - if (op == BC_JFORI || op == BC_JFORL) { - | ble =>BC_JLOOP - } - |2: - | ins_next - | - |4: // Invert check for negative step. - | cmp CARG2w, CARG1w - | b <1 - | - |5: // FP loop. - | ldp d0, d1, FOR_IDX - | blo ->vmeta_for - if (!vk) { - | checknum CARG2, ->vmeta_for - | checknum CARG3, ->vmeta_for - | str d0, FOR_EXT - } else { - | ldr d2, FOR_STEP - | fadd d0, d0, d2 - } - | tbnz CARG3, #63, >7 - | fcmp d0, d1 - |6: - if (vk) { - | str d0, FOR_IDX - | str d0, FOR_EXT - } - if (op == BC_FORI) { - | csel PC, RC, PC, hi - } else if (op == BC_JFORI) { - | ldrh RCw, [RC, #-2] - | bls =>BC_JLOOP - } else if (op == BC_IFORL) { - | csel PC, RC, PC, ls - } else { - | bls =>BC_JLOOP - } - | b <2 - | - |7: // Invert check for negative step. - | fcmp d1, d0 - | b <6 - break; - - case BC_ITERL: - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_IITERL follows. - break; - - case BC_JITERL: -#if !LJ_HASJIT - break; -#endif - case BC_IITERL: - | // RA = base, RC = target - | ldr CARG1, [BASE, RA, lsl #3] - | add TMP1, BASE, RA, lsl #3 - | cmp CARG1, TISNIL - | beq >1 // Stop if iterator returned nil. - if (op == BC_JITERL) { - | str CARG1, [TMP1, #-8] - | b =>BC_JLOOP - } else { - | add TMP0, PC, RC, lsl #2 // Otherwise save control var + branch. - | sub PC, TMP0, #0x20000 - | str CARG1, [TMP1, #-8] - } - |1: - | ins_next - break; - - case BC_LOOP: - | // RA = base, RC = target (loop extent) - | // Note: RA/RC is only used by trace recorder to determine scope/extent - | // This opcode does NOT jump, it's only purpose is to detect a hot loop. - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_ILOOP follows. - break; - - case BC_ILOOP: - | // RA = base, RC = target (loop extent) - | ins_next - break; - - case BC_JLOOP: - |.if JIT - | // RA = base (ignored), RC = traceno - | ldr CARG1, [GL, #GL_J(trace)] - | mov CARG2, #0 // Traces on ARM64 don't store the trace #, so use 0. - | ldr TRACE:RC, [CARG1, RC, lsl #3] - | st_vmstate CARG2 - | ldr RA, TRACE:RC->mcode - | str BASE, GL->jit_base - | str L, GL->tmpbuf.L - | sub sp, sp, #16 // See SPS_FIXED. Avoids sp adjust in every root trace. - | br RA - |.endif - break; - - case BC_JMP: - | // RA = base (only used by trace recorder), RC = target - | add RC, PC, RC, lsl #2 - | sub PC, RC, #0x20000 - | ins_next - break; - - /* -- Function headers -------------------------------------------------- */ - - case BC_FUNCF: - |.if JIT - | hotcall - |.endif - case BC_FUNCV: /* NYI: compiled vararg functions. */ - | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. - break; - - case BC_JFUNCF: -#if !LJ_HASJIT - break; -#endif - case BC_IFUNCF: - | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8 - | ldr CARG1, L->maxstack - | ldrb TMP1w, [PC, #-4+PC2PROTO(numparams)] - | ldr KBASE, [PC, #-4+PC2PROTO(k)] - | cmp RA, CARG1 - | bhi ->vm_growstack_l - |2: - | cmp NARGS8:RC, TMP1, lsl #3 // Check for missing parameters. - | blo >3 - if (op == BC_JFUNCF) { - | decode_RD RC, INS - | b =>BC_JLOOP - } else { - | ins_next - } - | - |3: // Clear missing parameters. - | str TISNIL, [BASE, NARGS8:RC] - | add NARGS8:RC, NARGS8:RC, #8 - | b <2 - break; - - case BC_JFUNCV: -#if !LJ_HASJIT - break; -#endif - | NYI // NYI: compiled vararg functions - break; /* NYI: compiled vararg functions. */ - - case BC_IFUNCV: - | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8 - | ldr CARG1, L->maxstack - | movn TMP0, #~LJ_TFUNC - | add TMP2, BASE, RC - | add LFUNC:CARG3, CARG3, TMP0, lsl #47 - | add RA, RA, RC - | add TMP0, RC, #16+FRAME_VARG - | str LFUNC:CARG3, [TMP2], #8 // Store (tagged) copy of LFUNC. - | ldr KBASE, [PC, #-4+PC2PROTO(k)] - | cmp RA, CARG1 - | str TMP0, [TMP2], #8 // Store delta + FRAME_VARG. - | bhs ->vm_growstack_l - | sub RC, TMP2, #16 - | ldrb TMP1w, [PC, #-4+PC2PROTO(numparams)] - | mov RA, BASE - | mov BASE, TMP2 - | cbz TMP1, >2 - |1: - | cmp RA, RC // Less args than parameters? - | bhs >3 - | ldr TMP0, [RA] - | sub TMP1, TMP1, #1 - | str TISNIL, [RA], #8 // Clear old fixarg slot (help the GC). - | str TMP0, [TMP2], #8 - | cbnz TMP1, <1 - |2: - | ins_next - | - |3: - | sub TMP1, TMP1, #1 - | str TISNIL, [TMP2], #8 - | cbz TMP1, <2 - | b <3 - break; - - case BC_FUNCC: - case BC_FUNCCW: - | // BASE = new base, RA = BASE+framesize*8, CARG3 = CFUNC, RC = nargs*8 - if (op == BC_FUNCC) { - | ldr CARG4, CFUNC:CARG3->f - } else { - | ldr CARG4, GL->wrapf - } - | add CARG2, RA, NARGS8:RC - | ldr CARG1, L->maxstack - | add RC, BASE, NARGS8:RC - | cmp CARG2, CARG1 - | stp BASE, RC, L->base - if (op == BC_FUNCCW) { - | ldr CARG2, CFUNC:CARG3->f - } - | mv_vmstate TMP0w, C - | mov CARG1, L - | bhi ->vm_growstack_c // Need to grow stack. - | st_vmstate TMP0w - | blr CARG4 // (lua_State *L [, lua_CFunction f]) - | // Returns nresults. - | ldp BASE, TMP1, L->base - | str L, GL->cur_L - | sbfiz RC, CRET1, #3, #32 - | st_vmstate ST_INTERP - | ldr PC, [BASE, FRAME_PC] - | sub RA, TMP1, RC // RA = L->top - nresults*8 - | b ->vm_returnc - break; - - /* ---------------------------------------------------------------------- */ - - default: - fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); - exit(2); - break; - } -} - -static int build_backend(BuildCtx *ctx) -{ - int op; - - dasm_growpc(Dst, BC__MAX); - - build_subroutines(ctx); - - |.code_op - for (op = 0; op < BC__MAX; op++) - build_ins(ctx, (BCOp)op, op); - - return BC__MAX; -} - -/* Emit pseudo frame-info for all assembler functions. */ -static void emit_asm_debug(BuildCtx *ctx) -{ - int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); - int i, cf = CFRAME_SIZE >> 3; - switch (ctx->mode) { - case BUILD_elfasm: - fprintf(ctx->fp, "\t.section .debug_frame,\"\",%%progbits\n"); - fprintf(ctx->fp, - ".Lframe0:\n" - "\t.long .LECIE0-.LSCIE0\n" - ".LSCIE0:\n" - "\t.long 0xffffffff\n" - "\t.byte 0x1\n" - "\t.string \"\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -8\n" - "\t.byte 30\n" /* Return address is in lr. */ - "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */ - "\t.align 3\n" - ".LECIE0:\n\n"); - fprintf(ctx->fp, - ".LSFDE0:\n" - "\t.long .LEFDE0-.LASFDE0\n" - ".LASFDE0:\n" - "\t.long .Lframe0\n" - "\t.quad .Lbegin\n" - "\t.quad %d\n" - "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ - "\t.byte 0x9d\n\t.uleb128 %d\n" /* offset fp */ - "\t.byte 0x9e\n\t.uleb128 %d\n", /* offset lr */ - fcofs, CFRAME_SIZE, cf, cf-1); - for (i = 19; i <= 28; i++) /* offset x19-x28 */ - fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17); - for (i = 8; i <= 15; i++) /* offset d8-d15 */ - fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n", - 64+i, cf-i-4); - fprintf(ctx->fp, - "\t.align 3\n" - ".LEFDE0:\n\n"); -#if LJ_HASFFI - fprintf(ctx->fp, - ".LSFDE1:\n" - "\t.long .LEFDE1-.LASFDE1\n" - ".LASFDE1:\n" - "\t.long .Lframe0\n" - "\t.quad lj_vm_ffi_call\n" - "\t.quad %d\n" - "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */ - "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */ - "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */ - "\t.byte 0x93\n\t.uleb128 2\n" /* offset x19 */ - "\t.align 3\n" - ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); -#endif - fprintf(ctx->fp, "\t.section .eh_frame,\"a\",%%progbits\n"); - fprintf(ctx->fp, - ".Lframe1:\n" - "\t.long .LECIE1-.LSCIE1\n" - ".LSCIE1:\n" - "\t.long 0\n" - "\t.byte 0x1\n" - "\t.string \"zPR\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -8\n" - "\t.byte 30\n" /* Return address is in lr. */ - "\t.uleb128 6\n" /* augmentation length */ - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.long lj_err_unwind_dwarf-.\n" - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */ - "\t.align 3\n" - ".LECIE1:\n\n"); - fprintf(ctx->fp, - ".LSFDE2:\n" - "\t.long .LEFDE2-.LASFDE2\n" - ".LASFDE2:\n" - "\t.long .LASFDE2-.Lframe1\n" - "\t.long .Lbegin-.\n" - "\t.long %d\n" - "\t.uleb128 0\n" /* augmentation length */ - "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ - "\t.byte 0x9d\n\t.uleb128 %d\n" /* offset fp */ - "\t.byte 0x9e\n\t.uleb128 %d\n", /* offset lr */ - fcofs, CFRAME_SIZE, cf, cf-1); - for (i = 19; i <= 28; i++) /* offset x19-x28 */ - fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17); - for (i = 8; i <= 15; i++) /* offset d8-d15 */ - fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n", - 64+i, cf-i-4); - fprintf(ctx->fp, - "\t.align 3\n" - ".LEFDE2:\n\n"); -#if LJ_HASFFI - fprintf(ctx->fp, - ".Lframe2:\n" - "\t.long .LECIE2-.LSCIE2\n" - ".LSCIE2:\n" - "\t.long 0\n" - "\t.byte 0x1\n" - "\t.string \"zR\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -8\n" - "\t.byte 30\n" /* Return address is in lr. */ - "\t.uleb128 1\n" /* augmentation length */ - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */ - "\t.align 3\n" - ".LECIE2:\n\n"); - fprintf(ctx->fp, - ".LSFDE3:\n" - "\t.long .LEFDE3-.LASFDE3\n" - ".LASFDE3:\n" - "\t.long .LASFDE3-.Lframe2\n" - "\t.long lj_vm_ffi_call-.\n" - "\t.long %d\n" - "\t.uleb128 0\n" /* augmentation length */ - "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */ - "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */ - "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */ - "\t.byte 0x93\n\t.uleb128 2\n" /* offset x19 */ - "\t.align 3\n" - ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); -#endif - break; - default: - break; - } -} - diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc deleted file mode 100644 index 1afd61187a..0000000000 --- a/src/vm_mips.dasc +++ /dev/null @@ -1,5264 +0,0 @@ -|// Low-level VM code for MIPS CPUs. -|// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -|// -|// MIPS soft-float support contributed by Djordje Kovacevic and -|// Stefan Pejic from RT-RK.com, sponsored by Cisco Systems, Inc. -| -|.arch mips -|.section code_op, code_sub -| -|.actionlist build_actionlist -|.globals GLOB_ -|.globalnames globnames -|.externnames extnames -| -|// Note: The ragged indentation of the instructions is intentional. -|// The starting columns indicate data dependencies. -| -|//----------------------------------------------------------------------- -| -|// Fixed register assignments for the interpreter. -|// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra -| -|.macro .FPU, a, b -|.if FPU -| a, b -|.endif -|.endmacro -| -|// The following must be C callee-save (but BASE is often refetched). -|.define BASE, r16 // Base of current Lua stack frame. -|.define KBASE, r17 // Constants of current Lua function. -|.define PC, r18 // Next PC. -|.define DISPATCH, r19 // Opcode dispatch table. -|.define LREG, r20 // Register holding lua_State (also in SAVE_L). -|.define MULTRES, r21 // Size of multi-result: (nresults+1)*8. -| -|.define JGL, r30 // On-trace: global_State + 32768. -| -|// Constants for type-comparisons, stores and conversions. C callee-save. -|.define TISNUM, r22 -|.define TISNIL, r30 -|.if FPU -|.define TOBIT, f30 // 2^52 + 2^51. -|.endif -| -|// The following temporaries are not saved across C calls, except for RA. -|.define RA, r23 // Callee-save. -|.define RB, r8 -|.define RC, r9 -|.define RD, r10 -|.define INS, r11 -| -|.define AT, r1 // Assembler temporary. -|.define TMP0, r12 -|.define TMP1, r13 -|.define TMP2, r14 -|.define TMP3, r15 -| -|// MIPS o32 calling convention. -|.define CFUNCADDR, r25 -|.define CARG1, r4 -|.define CARG2, r5 -|.define CARG3, r6 -|.define CARG4, r7 -| -|.define CRET1, r2 -|.define CRET2, r3 -| -|.if ENDIAN_LE -|.define SFRETLO, CRET1 -|.define SFRETHI, CRET2 -|.define SFARG1LO, CARG1 -|.define SFARG1HI, CARG2 -|.define SFARG2LO, CARG3 -|.define SFARG2HI, CARG4 -|.else -|.define SFRETLO, CRET2 -|.define SFRETHI, CRET1 -|.define SFARG1LO, CARG2 -|.define SFARG1HI, CARG1 -|.define SFARG2LO, CARG4 -|.define SFARG2HI, CARG3 -|.endif -| -|.if FPU -|.define FARG1, f12 -|.define FARG2, f14 -| -|.define FRET1, f0 -|.define FRET2, f2 -|.endif -| -|// Stack layout while in interpreter. Must match with lj_frame.h. -|.if FPU // MIPS32 hard-float. -| -|.define CFRAME_SPACE, 112 // Delta for sp. -| -|.define SAVE_ERRF, 124(sp) // 32 bit C frame info. -|.define SAVE_NRES, 120(sp) -|.define SAVE_CFRAME, 116(sp) -|.define SAVE_L, 112(sp) -|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter. -|.define SAVE_GPR_, 72 // .. 72+10*4: 32 bit GPR saves. -|.define SAVE_FPR_, 24 // .. 24+6*8: 64 bit FPR saves. -| -|.else // MIPS32 soft-float -| -|.define CFRAME_SPACE, 64 // Delta for sp. -| -|.define SAVE_ERRF, 76(sp) // 32 bit C frame info. -|.define SAVE_NRES, 72(sp) -|.define SAVE_CFRAME, 68(sp) -|.define SAVE_L, 64(sp) -|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter. -|.define SAVE_GPR_, 24 // .. 24+10*4: 32 bit GPR saves. -| -|.endif -| -|.define SAVE_PC, 20(sp) -|.define ARG5, 16(sp) -|.define CSAVE_4, 12(sp) -|.define CSAVE_3, 8(sp) -|.define CSAVE_2, 4(sp) -|.define CSAVE_1, 0(sp) -|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by callee. -| -|.define ARG5_OFS, 16 -|.define SAVE_MULTRES, ARG5 -| -|//----------------------------------------------------------------------- -| -|.macro saveregs -| addiu sp, sp, -CFRAME_SPACE -| sw ra, SAVE_GPR_+9*4(sp) -| sw r30, SAVE_GPR_+8*4(sp) -| .FPU sdc1 f30, SAVE_FPR_+5*8(sp) -| sw r23, SAVE_GPR_+7*4(sp) -| sw r22, SAVE_GPR_+6*4(sp) -| .FPU sdc1 f28, SAVE_FPR_+4*8(sp) -| sw r21, SAVE_GPR_+5*4(sp) -| sw r20, SAVE_GPR_+4*4(sp) -| .FPU sdc1 f26, SAVE_FPR_+3*8(sp) -| sw r19, SAVE_GPR_+3*4(sp) -| sw r18, SAVE_GPR_+2*4(sp) -| .FPU sdc1 f24, SAVE_FPR_+2*8(sp) -| sw r17, SAVE_GPR_+1*4(sp) -| sw r16, SAVE_GPR_+0*4(sp) -| .FPU sdc1 f22, SAVE_FPR_+1*8(sp) -| .FPU sdc1 f20, SAVE_FPR_+0*8(sp) -|.endmacro -| -|.macro restoreregs_ret -| lw ra, SAVE_GPR_+9*4(sp) -| lw r30, SAVE_GPR_+8*4(sp) -| .FPU ldc1 f30, SAVE_FPR_+5*8(sp) -| lw r23, SAVE_GPR_+7*4(sp) -| lw r22, SAVE_GPR_+6*4(sp) -| .FPU ldc1 f28, SAVE_FPR_+4*8(sp) -| lw r21, SAVE_GPR_+5*4(sp) -| lw r20, SAVE_GPR_+4*4(sp) -| .FPU ldc1 f26, SAVE_FPR_+3*8(sp) -| lw r19, SAVE_GPR_+3*4(sp) -| lw r18, SAVE_GPR_+2*4(sp) -| .FPU ldc1 f24, SAVE_FPR_+2*8(sp) -| lw r17, SAVE_GPR_+1*4(sp) -| lw r16, SAVE_GPR_+0*4(sp) -| .FPU ldc1 f22, SAVE_FPR_+1*8(sp) -| .FPU ldc1 f20, SAVE_FPR_+0*8(sp) -| jr ra -| addiu sp, sp, CFRAME_SPACE -|.endmacro -| -|// Type definitions. Some of these are only used for documentation. -|.type L, lua_State, LREG -|.type GL, global_State -|.type TVALUE, TValue -|.type GCOBJ, GCobj -|.type STR, GCstr -|.type TAB, GCtab -|.type LFUNC, GCfuncL -|.type CFUNC, GCfuncC -|.type PROTO, GCproto -|.type UPVAL, GCupval -|.type NODE, Node -|.type NARGS8, int -|.type TRACE, GCtrace -|.type SBUF, SBuf -| -|//----------------------------------------------------------------------- -| -|// Trap for not-yet-implemented parts. -|.macro NYI; .long 0xf0f0f0f0; .endmacro -| -|// Macros to mark delay slots. -|.macro ., a; a; .endmacro -|.macro ., a,b; a,b; .endmacro -|.macro ., a,b,c; a,b,c; .endmacro -| -|//----------------------------------------------------------------------- -| -|// Endian-specific defines. -|.if ENDIAN_LE -|.define FRAME_PC, -4 -|.define FRAME_FUNC, -8 -|.define HI, 4 -|.define LO, 0 -|.define OFS_RD, 2 -|.define OFS_RA, 1 -|.define OFS_OP, 0 -|.else -|.define FRAME_PC, -8 -|.define FRAME_FUNC, -4 -|.define HI, 0 -|.define LO, 4 -|.define OFS_RD, 0 -|.define OFS_RA, 2 -|.define OFS_OP, 3 -|.endif -| -|// Instruction decode. -|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro -|.macro decode_OP4a, dst, ins; andi dst, ins, 0xff; .endmacro -|.macro decode_OP4b, dst; sll dst, dst, 2; .endmacro -|.macro decode_RC4a, dst, ins; srl dst, ins, 14; .endmacro -|.macro decode_RC4b, dst; andi dst, dst, 0x3fc; .endmacro -|.macro decode_RD4b, dst; sll dst, dst, 2; .endmacro -|.macro decode_RA8a, dst, ins; srl dst, ins, 5; .endmacro -|.macro decode_RA8b, dst; andi dst, dst, 0x7f8; .endmacro -|.macro decode_RB8a, dst, ins; srl dst, ins, 21; .endmacro -|.macro decode_RB8b, dst; andi dst, dst, 0x7f8; .endmacro -|.macro decode_RD8a, dst, ins; srl dst, ins, 16; .endmacro -|.macro decode_RD8b, dst; sll dst, dst, 3; .endmacro -|.macro decode_RDtoRC8, dst, src; andi dst, src, 0x7f8; .endmacro -| -|// Instruction fetch. -|.macro ins_NEXT1 -| lw INS, 0(PC) -| addiu PC, PC, 4 -|.endmacro -|// Instruction decode+dispatch. -|.macro ins_NEXT2 -| decode_OP4a TMP1, INS -| decode_OP4b TMP1 -| addu TMP0, DISPATCH, TMP1 -| decode_RD8a RD, INS -| lw AT, 0(TMP0) -| decode_RA8a RA, INS -| decode_RD8b RD -| jr AT -| decode_RA8b RA -|.endmacro -|.macro ins_NEXT -| ins_NEXT1 -| ins_NEXT2 -|.endmacro -| -|// Instruction footer. -|.if 1 -| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. -| .define ins_next, ins_NEXT -| .define ins_next_, ins_NEXT -| .define ins_next1, ins_NEXT1 -| .define ins_next2, ins_NEXT2 -|.else -| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. -| // Affects only certain kinds of benchmarks (and only with -j off). -| .macro ins_next -| b ->ins_next -| .endmacro -| .macro ins_next1 -| .endmacro -| .macro ins_next2 -| b ->ins_next -| .endmacro -| .macro ins_next_ -| ->ins_next: -| ins_NEXT -| .endmacro -|.endif -| -|// Call decode and dispatch. -|.macro ins_callt -| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC -| lw PC, LFUNC:RB->pc -| lw INS, 0(PC) -| addiu PC, PC, 4 -| decode_OP4a TMP1, INS -| decode_RA8a RA, INS -| decode_OP4b TMP1 -| decode_RA8b RA -| addu TMP0, DISPATCH, TMP1 -| lw TMP0, 0(TMP0) -| jr TMP0 -| addu RA, RA, BASE -|.endmacro -| -|.macro ins_call -| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC -| sw PC, FRAME_PC(BASE) -| ins_callt -|.endmacro -| -|//----------------------------------------------------------------------- -| -|.macro branch_RD -| srl TMP0, RD, 1 -| lui AT, (-(BCBIAS_J*4 >> 16) & 65535) -| addu TMP0, TMP0, AT -| addu PC, PC, TMP0 -|.endmacro -| -|// Assumes DISPATCH is relative to GL. -#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) -#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) -#define GG_DISP2GOT (GG_OFS(got) - GG_OFS(dispatch)) -#define DISPATCH_GOT(name) (GG_DISP2GOT + 4*LJ_GOT_##name) -| -#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) -| -|.macro load_got, func -| lw CFUNCADDR, DISPATCH_GOT(func)(DISPATCH) -|.endmacro -|// Much faster. Sadly, there's no easy way to force the required code layout. -|// .macro call_intern, func; bal extern func; .endmacro -|.macro call_intern, func; jalr CFUNCADDR; .endmacro -|.macro call_extern; jalr CFUNCADDR; .endmacro -|.macro jmp_extern; jr CFUNCADDR; .endmacro -| -|.macro hotcheck, delta, target -| srl TMP1, PC, 1 -| andi TMP1, TMP1, 126 -| addu TMP1, TMP1, DISPATCH -| lhu TMP2, GG_DISP2HOT(TMP1) -| addiu TMP2, TMP2, -delta -| bltz TMP2, target -|. sh TMP2, GG_DISP2HOT(TMP1) -|.endmacro -| -|.macro hotloop -| hotcheck HOTCOUNT_LOOP, ->vm_hotloop -|.endmacro -| -|.macro hotcall -| hotcheck HOTCOUNT_CALL, ->vm_hotcall -|.endmacro -| -|// Set current VM state. Uses TMP0. -|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro -|.macro st_vmstate; sw TMP0, DISPATCH_GL(vmstate)(DISPATCH); .endmacro -| -|// Move table write barrier back. Overwrites mark and tmp. -|.macro barrierback, tab, mark, tmp, target -| lw tmp, DISPATCH_GL(gc.grayagain)(DISPATCH) -| andi mark, mark, ~LJ_GC_BLACK & 255 // black2gray(tab) -| sw tab, DISPATCH_GL(gc.grayagain)(DISPATCH) -| sb mark, tab->marked -| b target -|. sw tmp, tab->gclist -|.endmacro -| -|//----------------------------------------------------------------------- - -/* Generate subroutines used by opcodes and other parts of the VM. */ -/* The .code_sub section should be last to help static branch prediction. */ -static void build_subroutines(BuildCtx *ctx) -{ - |.code_sub - | - |//----------------------------------------------------------------------- - |//-- Return handling ---------------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_returnp: - | // See vm_return. Also: TMP2 = previous base. - | andi AT, PC, FRAME_P - | beqz AT, ->cont_dispatch - |. li TMP1, LJ_TTRUE - | - | // Return from pcall or xpcall fast func. - | lw PC, FRAME_PC(TMP2) // Fetch PC of previous frame. - | move BASE, TMP2 // Restore caller base. - | // Prepending may overwrite the pcall frame, so do it at the end. - | sw TMP1, FRAME_PC(RA) // Prepend true to results. - | addiu RA, RA, -8 - | - |->vm_returnc: - | addiu RD, RD, 8 // RD = (nresults+1)*8. - | andi TMP0, PC, FRAME_TYPE - | beqz RD, ->vm_unwind_c_eh - |. li CRET1, LUA_YIELD - | beqz TMP0, ->BC_RET_Z // Handle regular return to Lua. - |. move MULTRES, RD - | - |->vm_return: - | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return - | // TMP0 = PC & FRAME_TYPE - | li TMP2, -8 - | xori AT, TMP0, FRAME_C - | and TMP2, PC, TMP2 - | bnez AT, ->vm_returnp - | subu TMP2, BASE, TMP2 // TMP2 = previous base. - | - | addiu TMP1, RD, -8 - | sw TMP2, L->base - | li_vmstate C - | lw TMP2, SAVE_NRES - | addiu BASE, BASE, -8 - | st_vmstate - | beqz TMP1, >2 - |. sll TMP2, TMP2, 3 - |1: - | addiu TMP1, TMP1, -8 - | lw SFRETHI, HI(RA) - | lw SFRETLO, LO(RA) - | addiu RA, RA, 8 - | sw SFRETHI, HI(BASE) - | sw SFRETLO, LO(BASE) - | bnez TMP1, <1 - |. addiu BASE, BASE, 8 - | - |2: - | bne TMP2, RD, >6 - |3: - |. sw BASE, L->top // Store new top. - | - |->vm_leave_cp: - | lw TMP0, SAVE_CFRAME // Restore previous C frame. - | move CRET1, r0 // Ok return status for vm_pcall. - | sw TMP0, L->cframe - | - |->vm_leave_unw: - | restoreregs_ret - | - |6: - | lw TMP1, L->maxstack - | slt AT, TMP2, RD - | bnez AT, >7 // Less results wanted? - | // More results wanted. Check stack size and fill up results with nil. - |. slt AT, BASE, TMP1 - | beqz AT, >8 - |. nop - | sw TISNIL, HI(BASE) - | addiu RD, RD, 8 - | b <2 - |. addiu BASE, BASE, 8 - | - |7: // Less results wanted. - | subu TMP0, RD, TMP2 - | subu TMP0, BASE, TMP0 // Either keep top or shrink it. - | b <3 - |. movn BASE, TMP0, TMP2 // LUA_MULTRET+1 case? - | - |8: // Corner case: need to grow stack for filling up results. - | // This can happen if: - | // - A C function grows the stack (a lot). - | // - The GC shrinks the stack in between. - | // - A return back from a lua_call() with (high) nresults adjustment. - | load_got lj_state_growstack - | move MULTRES, RD - | srl CARG2, TMP2, 3 - | call_intern lj_state_growstack // (lua_State *L, int n) - |. move CARG1, L - | lw TMP2, SAVE_NRES - | lw BASE, L->top // Need the (realloced) L->top in BASE. - | move RD, MULTRES - | b <2 - |. sll TMP2, TMP2, 3 - | - |->vm_unwind_c: // Unwind C stack, return from vm_pcall. - | // (void *cframe, int errcode) - | move sp, CARG1 - | move CRET1, CARG2 - |->vm_unwind_c_eh: // Landing pad for external unwinder. - | lw L, SAVE_L - | li TMP0, ~LJ_VMST_C - | lw GL:TMP1, L->glref - | b ->vm_leave_unw - |. sw TMP0, GL:TMP1->vmstate - | - |->vm_unwind_ff: // Unwind C stack, return from ff pcall. - | // (void *cframe) - | li AT, -4 - | and sp, CARG1, AT - |->vm_unwind_ff_eh: // Landing pad for external unwinder. - | lw L, SAVE_L - | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | li TISNIL, LJ_TNIL - | lw BASE, L->base - | lw DISPATCH, L->glref // Setup pointer to dispatch table. - | .FPU mtc1 TMP3, TOBIT - | li TMP1, LJ_TFALSE - | li_vmstate INTERP - | lw PC, FRAME_PC(BASE) // Fetch PC of previous frame. - | .FPU cvt.d.s TOBIT, TOBIT - | addiu RA, BASE, -8 // Results start at BASE-8. - | addiu DISPATCH, DISPATCH, GG_G2DISP - | sw TMP1, HI(RA) // Prepend false to error message. - | st_vmstate - | b ->vm_returnc - |. li RD, 16 // 2 results: false + error message. - | - |//----------------------------------------------------------------------- - |//-- Grow stack for calls ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_growstack_c: // Grow stack for C function. - | b >2 - |. li CARG2, LUA_MINSTACK - | - |->vm_growstack_l: // Grow stack for Lua function. - | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC - | addu RC, BASE, RC - | subu RA, RA, BASE - | sw BASE, L->base - | addiu PC, PC, 4 // Must point after first instruction. - | sw RC, L->top - | srl CARG2, RA, 3 - |2: - | // L->base = new base, L->top = top - | load_got lj_state_growstack - | sw PC, SAVE_PC - | call_intern lj_state_growstack // (lua_State *L, int n) - |. move CARG1, L - | lw BASE, L->base - | lw RC, L->top - | lw LFUNC:RB, FRAME_FUNC(BASE) - | subu RC, RC, BASE - | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC - | ins_callt // Just retry the call. - | - |//----------------------------------------------------------------------- - |//-- Entry points into the assembler VM --------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_resume: // Setup C frame and resume thread. - | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) - | saveregs - | move L, CARG1 - | lw DISPATCH, L->glref // Setup pointer to dispatch table. - | move BASE, CARG2 - | lbu TMP1, L->status - | sw L, SAVE_L - | li PC, FRAME_CP - | addiu TMP0, sp, CFRAME_RESUME - | addiu DISPATCH, DISPATCH, GG_G2DISP - | sw r0, SAVE_NRES - | sw r0, SAVE_ERRF - | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. - | sw r0, SAVE_CFRAME - | beqz TMP1, >3 - |. sw TMP0, L->cframe - | - | // Resume after yield (like a return). - | sw L, DISPATCH_GL(cur_L)(DISPATCH) - | move RA, BASE - | lw BASE, L->base - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | lw TMP1, L->top - | lw PC, FRAME_PC(BASE) - | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | subu RD, TMP1, BASE - | .FPU mtc1 TMP3, TOBIT - | sb r0, L->status - | .FPU cvt.d.s TOBIT, TOBIT - | li_vmstate INTERP - | addiu RD, RD, 8 - | st_vmstate - | move MULTRES, RD - | andi TMP0, PC, FRAME_TYPE - | beqz TMP0, ->BC_RET_Z - |. li TISNIL, LJ_TNIL - | b ->vm_return - |. nop - | - |->vm_pcall: // Setup protected C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) - | saveregs - | sw CARG4, SAVE_ERRF - | b >1 - |. li PC, FRAME_CP - | - |->vm_call: // Setup C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1) - | saveregs - | li PC, FRAME_C - | - |1: // Entry point for vm_pcall above (PC = ftype). - | lw TMP1, L:CARG1->cframe - | move L, CARG1 - | sw CARG3, SAVE_NRES - | lw DISPATCH, L->glref // Setup pointer to dispatch table. - | sw CARG1, SAVE_L - | move BASE, CARG2 - | addiu DISPATCH, DISPATCH, GG_G2DISP - | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. - | sw TMP1, SAVE_CFRAME - | sw sp, L->cframe // Add our C frame to cframe chain. - | - |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). - | sw L, DISPATCH_GL(cur_L)(DISPATCH) - | lw TMP2, L->base // TMP2 = old base (used in vmeta_call). - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | lw TMP1, L->top - | .FPU mtc1 TMP3, TOBIT - | addu PC, PC, BASE - | subu NARGS8:RC, TMP1, BASE - | subu PC, PC, TMP2 // PC = frame delta + frame type - | .FPU cvt.d.s TOBIT, TOBIT - | li_vmstate INTERP - | li TISNIL, LJ_TNIL - | st_vmstate - | - |->vm_call_dispatch: - | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC - | lw TMP0, FRAME_PC(BASE) - | li AT, LJ_TFUNC - | bne TMP0, AT, ->vmeta_call - |. lw LFUNC:RB, FRAME_FUNC(BASE) - | - |->vm_call_dispatch_f: - | ins_call - | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC - | - |->vm_cpcall: // Setup protected C frame, call C. - | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) - | saveregs - | move L, CARG1 - | lw TMP0, L:CARG1->stack - | sw CARG1, SAVE_L - | lw TMP1, L->top - | lw DISPATCH, L->glref // Setup pointer to dispatch table. - | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. - | subu TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). - | lw TMP1, L->cframe - | addiu DISPATCH, DISPATCH, GG_G2DISP - | sw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. - | sw r0, SAVE_ERRF // No error function. - | sw TMP1, SAVE_CFRAME - | sw sp, L->cframe // Add our C frame to cframe chain. - | sw L, DISPATCH_GL(cur_L)(DISPATCH) - | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud) - |. move CFUNCADDR, CARG4 - | move BASE, CRET1 - | bnez CRET1, <3 // Else continue with the call. - |. li PC, FRAME_CP - | b ->vm_leave_cp // No base? Just remove C frame. - |. nop - | - |//----------------------------------------------------------------------- - |//-- Metamethod handling ------------------------------------------------ - |//----------------------------------------------------------------------- - | - |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the - |// stack, so BASE doesn't need to be reloaded across these calls. - | - |//-- Continuation dispatch ---------------------------------------------- - | - |->cont_dispatch: - | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8 - | lw TMP0, -16+LO(BASE) // Continuation. - | move RB, BASE - | move BASE, TMP2 // Restore caller BASE. - | lw LFUNC:TMP1, FRAME_FUNC(TMP2) - |.if FFI - | sltiu AT, TMP0, 2 - |.endif - | lw PC, -16+HI(RB) // Restore PC from [cont|PC]. - | addu TMP2, RA, RD - | lw TMP1, LFUNC:TMP1->pc - |.if FFI - | bnez AT, >1 - |.endif - |. sw TISNIL, -8+HI(TMP2) // Ensure one valid arg. - | // BASE = base, RA = resultptr, RB = meta base - | jr TMP0 // Jump to continuation. - |. lw KBASE, PC2PROTO(k)(TMP1) - | - |.if FFI - |1: - | bnez TMP0, ->cont_ffi_callback // cont = 1: return from FFI callback. - | // cont = 0: tailcall from C function. - |. addiu TMP1, RB, -16 - | b ->vm_call_tail - |. subu RC, TMP1, BASE - |.endif - | - |->cont_cat: // RA = resultptr, RB = meta base - | lw INS, -4(PC) - | addiu CARG2, RB, -16 - | lw SFRETHI, HI(RA) - | lw SFRETLO, LO(RA) - | decode_RB8a MULTRES, INS - | decode_RA8a RA, INS - | decode_RB8b MULTRES - | decode_RA8b RA - | addu TMP1, BASE, MULTRES - | sw BASE, L->base - | subu CARG3, CARG2, TMP1 - | sw SFRETHI, HI(CARG2) - | bne TMP1, CARG2, ->BC_CAT_Z - |. sw SFRETLO, LO(CARG2) - | addu RA, BASE, RA - | sw SFRETHI, HI(RA) - | b ->cont_nop - |. sw SFRETLO, LO(RA) - | - |//-- Table indexing metamethods ----------------------------------------- - | - |->vmeta_tgets1: - | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | li TMP0, LJ_TSTR - | sw STR:RC, LO(CARG3) - | b >1 - |. sw TMP0, HI(CARG3) - | - |->vmeta_tgets: - | addiu CARG2, DISPATCH, DISPATCH_GL(tmptv) - | li TMP0, LJ_TTAB - | sw TAB:RB, LO(CARG2) - | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv2) - | sw TMP0, HI(CARG2) - | li TMP1, LJ_TSTR - | sw STR:RC, LO(CARG3) - | b >1 - |. sw TMP1, HI(CARG3) - | - |->vmeta_tgetb: // TMP0 = index - | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | sw TMP0, LO(CARG3) - | sw TISNUM, HI(CARG3) - | - |->vmeta_tgetv: - |1: - | load_got lj_meta_tget - | sw BASE, L->base - | sw PC, SAVE_PC - | call_intern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) - |. move CARG1, L - | // Returns TValue * (finished) or NULL (metamethod). - | beqz CRET1, >3 - |. addiu TMP1, BASE, -FRAME_CONT - | lw SFARG1HI, HI(CRET1) - | lw SFARG2HI, LO(CRET1) - | ins_next1 - | sw SFARG1HI, HI(RA) - | sw SFARG2HI, LO(RA) - | ins_next2 - | - |3: // Call __index metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k - | lw BASE, L->top - | sw PC, -16+HI(BASE) // [cont|PC] - | subu PC, BASE, TMP1 - | lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. - | b ->vm_call_dispatch_f - |. li NARGS8:RC, 16 // 2 args for func(t, k). - | - |->vmeta_tgetr: - | load_got lj_tab_getinth - | call_intern lj_tab_getinth // (GCtab *t, int32_t key) - |. nop - | // Returns cTValue * or NULL. - | beqz CRET1, ->BC_TGETR_Z - |. move SFARG2HI, TISNIL - | lw SFARG2HI, HI(CRET1) - | b ->BC_TGETR_Z - |. lw SFARG2LO, LO(CRET1) - | - |//----------------------------------------------------------------------- - | - |->vmeta_tsets1: - | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | li TMP0, LJ_TSTR - | sw STR:RC, LO(CARG3) - | b >1 - |. sw TMP0, HI(CARG3) - | - |->vmeta_tsets: - | addiu CARG2, DISPATCH, DISPATCH_GL(tmptv) - | li TMP0, LJ_TTAB - | sw TAB:RB, LO(CARG2) - | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv2) - | sw TMP0, HI(CARG2) - | li TMP1, LJ_TSTR - | sw STR:RC, LO(CARG3) - | b >1 - |. sw TMP1, HI(CARG3) - | - |->vmeta_tsetb: // TMP0 = index - | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | sw TMP0, LO(CARG3) - | sw TISNUM, HI(CARG3) - | - |->vmeta_tsetv: - |1: - | load_got lj_meta_tset - | sw BASE, L->base - | sw PC, SAVE_PC - | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) - |. move CARG1, L - | // Returns TValue * (finished) or NULL (metamethod). - | lw SFARG1HI, HI(RA) - | beqz CRET1, >3 - |. lw SFARG1LO, LO(RA) - | // NOBARRIER: lj_meta_tset ensures the table is not black. - | ins_next1 - | sw SFARG1HI, HI(CRET1) - | sw SFARG1LO, LO(CRET1) - | ins_next2 - | - |3: // Call __newindex metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) - | addiu TMP1, BASE, -FRAME_CONT - | lw BASE, L->top - | sw PC, -16+HI(BASE) // [cont|PC] - | subu PC, BASE, TMP1 - | lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. - | sw SFARG1HI, 16+HI(BASE) // Copy value to third argument. - | sw SFARG1LO, 16+LO(BASE) - | b ->vm_call_dispatch_f - |. li NARGS8:RC, 24 // 3 args for func(t, k, v) - | - |->vmeta_tsetr: - | load_got lj_tab_setinth - | sw BASE, L->base - | sw PC, SAVE_PC - | call_intern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) - |. move CARG1, L - | // Returns TValue *. - | b ->BC_TSETR_Z - |. nop - | - |//-- Comparison metamethods --------------------------------------------- - | - |->vmeta_comp: - | // RA/RD point to o1/o2. - | move CARG2, RA - | move CARG3, RD - | load_got lj_meta_comp - | addiu PC, PC, -4 - | sw BASE, L->base - | sw PC, SAVE_PC - | decode_OP1 CARG4, INS - | call_intern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) - |. move CARG1, L - | // Returns 0/1 or TValue * (metamethod). - |3: - | sltiu AT, CRET1, 2 - | beqz AT, ->vmeta_binop - | negu TMP2, CRET1 - |4: - | lhu RD, OFS_RD(PC) - | addiu PC, PC, 4 - | lui TMP1, (-(BCBIAS_J*4 >> 16) & 65535) - | sll RD, RD, 2 - | addu RD, RD, TMP1 - | and RD, RD, TMP2 - | addu PC, PC, RD - |->cont_nop: - | ins_next - | - |->cont_ra: // RA = resultptr - | lbu TMP1, -4+OFS_RA(PC) - | lw SFRETHI, HI(RA) - | lw SFRETLO, LO(RA) - | sll TMP1, TMP1, 3 - | addu TMP1, BASE, TMP1 - | sw SFRETHI, HI(TMP1) - | b ->cont_nop - |. sw SFRETLO, LO(TMP1) - | - |->cont_condt: // RA = resultptr - | lw TMP0, HI(RA) - | sltiu AT, TMP0, LJ_TISTRUECOND - | b <4 - |. negu TMP2, AT // Branch if result is true. - | - |->cont_condf: // RA = resultptr - | lw TMP0, HI(RA) - | sltiu AT, TMP0, LJ_TISTRUECOND - | b <4 - |. addiu TMP2, AT, -1 // Branch if result is false. - | - |->vmeta_equal: - | // SFARG1LO/SFARG2LO point to o1/o2. TMP0 is set to 0/1. - | load_got lj_meta_equal - | move CARG2, SFARG1LO - | move CARG3, SFARG2LO - | move CARG4, TMP0 - | addiu PC, PC, -4 - | sw BASE, L->base - | sw PC, SAVE_PC - | call_intern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) - |. move CARG1, L - | // Returns 0/1 or TValue * (metamethod). - | b <3 - |. nop - | - |->vmeta_equal_cd: - |.if FFI - | load_got lj_meta_equal_cd - | move CARG2, INS - | addiu PC, PC, -4 - | sw BASE, L->base - | sw PC, SAVE_PC - | call_intern lj_meta_equal_cd // (lua_State *L, BCIns op) - |. move CARG1, L - | // Returns 0/1 or TValue * (metamethod). - | b <3 - |. nop - |.endif - | - |->vmeta_istype: - | load_got lj_meta_istype - | addiu PC, PC, -4 - | sw BASE, L->base - | srl CARG2, RA, 3 - | srl CARG3, RD, 3 - | sw PC, SAVE_PC - | call_intern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) - |. move CARG1, L - | b ->cont_nop - |. nop - | - |//-- Arithmetic metamethods --------------------------------------------- - | - |->vmeta_unm: - | move RC, RB - | - |->vmeta_arith: - | load_got lj_meta_arith - | decode_OP1 TMP0, INS - | sw BASE, L->base - | move CARG2, RA - | sw PC, SAVE_PC - | move CARG3, RB - | move CARG4, RC - | sw TMP0, ARG5 - | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) - |. move CARG1, L - | // Returns NULL (finished) or TValue * (metamethod). - | beqz CRET1, ->cont_nop - |. nop - | - | // Call metamethod for binary op. - |->vmeta_binop: - | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 - | subu TMP1, CRET1, BASE - | sw PC, -16+HI(CRET1) // [cont|PC] - | move TMP2, BASE - | addiu PC, TMP1, FRAME_CONT - | move BASE, CRET1 - | b ->vm_call_dispatch - |. li NARGS8:RC, 16 // 2 args for func(o1, o2). - | - |->vmeta_len: - | // CARG2 already set by BC_LEN. -#if LJ_52 - | move MULTRES, CARG1 -#endif - | load_got lj_meta_len - | sw BASE, L->base - | sw PC, SAVE_PC - | call_intern lj_meta_len // (lua_State *L, TValue *o) - |. move CARG1, L - | // Returns NULL (retry) or TValue * (metamethod base). -#if LJ_52 - | bnez CRET1, ->vmeta_binop // Binop call for compatibility. - |. nop - | b ->BC_LEN_Z - |. move CARG1, MULTRES -#else - | b ->vmeta_binop // Binop call for compatibility. - |. nop -#endif - | - |//-- Call metamethod ---------------------------------------------------- - | - |->vmeta_call: // Resolve and call __call metamethod. - | // TMP2 = old base, BASE = new base, RC = nargs*8 - | load_got lj_meta_call - | sw TMP2, L->base // This is the callers base! - | addiu CARG2, BASE, -8 - | sw PC, SAVE_PC - | addu CARG3, BASE, RC - | move MULTRES, NARGS8:RC - | call_intern lj_meta_call // (lua_State *L, TValue *func, TValue *top) - |. move CARG1, L - | lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. - | addiu NARGS8:RC, MULTRES, 8 // Got one more argument now. - | ins_call - | - |->vmeta_callt: // Resolve __call for BC_CALLT. - | // BASE = old base, RA = new base, RC = nargs*8 - | load_got lj_meta_call - | sw BASE, L->base - | addiu CARG2, RA, -8 - | sw PC, SAVE_PC - | addu CARG3, RA, RC - | move MULTRES, NARGS8:RC - | call_intern lj_meta_call // (lua_State *L, TValue *func, TValue *top) - |. move CARG1, L - | lw TMP1, FRAME_PC(BASE) - | lw LFUNC:RB, FRAME_FUNC(RA) // Guaranteed to be a function here. - | b ->BC_CALLT_Z - |. addiu NARGS8:RC, MULTRES, 8 // Got one more argument now. - | - |//-- Argument coercion for 'for' statement ------------------------------ - | - |->vmeta_for: - | load_got lj_meta_for - | sw BASE, L->base - | move CARG2, RA - | sw PC, SAVE_PC - | move MULTRES, INS - | call_intern lj_meta_for // (lua_State *L, TValue *base) - |. move CARG1, L - |.if JIT - | decode_OP1 TMP0, MULTRES - | li AT, BC_JFORI - |.endif - | decode_RA8a RA, MULTRES - | decode_RD8a RD, MULTRES - | decode_RA8b RA - |.if JIT - | beq TMP0, AT, =>BC_JFORI - |. decode_RD8b RD - | b =>BC_FORI - |. nop - |.else - | b =>BC_FORI - |. decode_RD8b RD - |.endif - | - |//----------------------------------------------------------------------- - |//-- Fast functions ----------------------------------------------------- - |//----------------------------------------------------------------------- - | - |.macro .ffunc, name - |->ff_ .. name: - |.endmacro - | - |.macro .ffunc_1, name - |->ff_ .. name: - | lw SFARG1HI, HI(BASE) - | beqz NARGS8:RC, ->fff_fallback - |. lw SFARG1LO, LO(BASE) - |.endmacro - | - |.macro .ffunc_2, name - |->ff_ .. name: - | sltiu AT, NARGS8:RC, 16 - | lw SFARG1HI, HI(BASE) - | bnez AT, ->fff_fallback - |. lw SFARG2HI, 8+HI(BASE) - | lw SFARG1LO, LO(BASE) - | lw SFARG2LO, 8+LO(BASE) - |.endmacro - | - |.macro .ffunc_n, name // Caveat: has delay slot! - |->ff_ .. name: - | lw SFARG1HI, HI(BASE) - |.if FPU - | ldc1 FARG1, 0(BASE) - |.else - | lw SFARG1LO, LO(BASE) - |.endif - | beqz NARGS8:RC, ->fff_fallback - |. sltiu AT, SFARG1HI, LJ_TISNUM - | beqz AT, ->fff_fallback - |.endmacro - | - |.macro .ffunc_nn, name // Caveat: has delay slot! - |->ff_ .. name: - | sltiu AT, NARGS8:RC, 16 - | lw SFARG1HI, HI(BASE) - | bnez AT, ->fff_fallback - |. lw SFARG2HI, 8+HI(BASE) - | sltiu TMP0, SFARG1HI, LJ_TISNUM - |.if FPU - | ldc1 FARG1, 0(BASE) - |.else - | lw SFARG1LO, LO(BASE) - |.endif - | sltiu TMP1, SFARG2HI, LJ_TISNUM - |.if FPU - | ldc1 FARG2, 8(BASE) - |.else - | lw SFARG2LO, 8+LO(BASE) - |.endif - | and TMP0, TMP0, TMP1 - | beqz TMP0, ->fff_fallback - |.endmacro - | - |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1 and has delay slot! - |.macro ffgccheck - | lw TMP0, DISPATCH_GL(gc.total)(DISPATCH) - | lw TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) - | subu AT, TMP0, TMP1 - | bgezal AT, ->fff_gcstep - |.endmacro - | - |//-- Base library: checks ----------------------------------------------- - | - |.ffunc_1 assert - | sltiu AT, SFARG1HI, LJ_TISTRUECOND - | beqz AT, ->fff_fallback - |. addiu RA, BASE, -8 - | lw PC, FRAME_PC(BASE) - | addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8. - | addu TMP2, RA, NARGS8:RC - | sw SFARG1HI, HI(RA) - | addiu TMP1, BASE, 8 - | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument. - |. sw SFARG1LO, LO(RA) - |1: - | lw SFRETHI, HI(TMP1) - | lw SFRETLO, LO(TMP1) - | sw SFRETHI, -8+HI(TMP1) - | sw SFRETLO, -8+LO(TMP1) - | bne TMP1, TMP2, <1 - |. addiu TMP1, TMP1, 8 - | b ->fff_res - |. nop - | - |.ffunc type - | lw SFARG1HI, HI(BASE) - | beqz NARGS8:RC, ->fff_fallback - |. sltiu TMP0, SFARG1HI, LJ_TISNUM - | movn SFARG1HI, TISNUM, TMP0 - | not TMP1, SFARG1HI - | sll TMP1, TMP1, 3 - | addu TMP1, CFUNC:RB, TMP1 - | lw SFARG1HI, CFUNC:TMP1->upvalue[0].u32.hi - | b ->fff_restv - |. lw SFARG1LO, CFUNC:TMP1->upvalue[0].u32.lo - | - |//-- Base library: getters and setters --------------------------------- - | - |.ffunc_1 getmetatable - | li AT, LJ_TTAB - | bne SFARG1HI, AT, >6 - |. li AT, LJ_TUDATA - |1: // Field metatable must be at same offset for GCtab and GCudata! - | lw TAB:SFARG1LO, TAB:SFARG1LO->metatable - |2: - | lw STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) - | beqz TAB:SFARG1LO, ->fff_restv - |. li SFARG1HI, LJ_TNIL - | lw TMP0, TAB:SFARG1LO->hmask - | li SFARG1HI, LJ_TTAB // Use metatable as default result. - | lw TMP1, STR:RC->hash - | lw NODE:TMP2, TAB:SFARG1LO->node - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask - | sll TMP0, TMP1, 5 - | sll TMP1, TMP1, 3 - | subu TMP1, TMP0, TMP1 - | addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) - | li AT, LJ_TSTR - |3: // Rearranged logic, because we expect _not_ to find the key. - | lw CARG4, offsetof(Node, key)+HI(NODE:TMP2) - | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) - | lw NODE:TMP3, NODE:TMP2->next - | bne CARG4, AT, >4 - |. lw CARG3, offsetof(Node, val)+HI(NODE:TMP2) - | beq TMP0, STR:RC, >5 - |. lw TMP1, offsetof(Node, val)+LO(NODE:TMP2) - |4: - | beqz NODE:TMP3, ->fff_restv // Not found, keep default result. - |. move NODE:TMP2, NODE:TMP3 - | b <3 - |. nop - |5: - | beq CARG3, TISNIL, ->fff_restv // Ditto for nil value. - |. nop - | move SFARG1HI, CARG3 // Return value of mt.__metatable. - | b ->fff_restv - |. move SFARG1LO, TMP1 - | - |6: - | beq SFARG1HI, AT, <1 - |. sltu AT, TISNUM, SFARG1HI - | movz SFARG1HI, TISNUM, AT - | not TMP1, SFARG1HI - | sll TMP1, TMP1, 2 - | addu TMP1, DISPATCH, TMP1 - | b <2 - |. lw TAB:SFARG1LO, DISPATCH_GL(gcroot[GCROOT_BASEMT])(TMP1) - | - |.ffunc_2 setmetatable - | // Fast path: no mt for table yet and not clearing the mt. - | li AT, LJ_TTAB - | bne SFARG1HI, AT, ->fff_fallback - |. addiu SFARG2HI, SFARG2HI, -LJ_TTAB - | lw TAB:TMP1, TAB:SFARG1LO->metatable - | lbu TMP3, TAB:SFARG1LO->marked - | or AT, SFARG2HI, TAB:TMP1 - | bnez AT, ->fff_fallback - |. andi AT, TMP3, LJ_GC_BLACK // isblack(table) - | beqz AT, ->fff_restv - |. sw TAB:SFARG2LO, TAB:SFARG1LO->metatable - | barrierback TAB:SFARG1LO, TMP3, TMP0, ->fff_restv - | - |.ffunc rawget - | lw CARG4, HI(BASE) - | sltiu AT, NARGS8:RC, 16 - | lw TAB:CARG2, LO(BASE) - | load_got lj_tab_get - | addiu CARG4, CARG4, -LJ_TTAB - | or AT, AT, CARG4 - | bnez AT, ->fff_fallback - | addiu CARG3, BASE, 8 - | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) - |. move CARG1, L - | // Returns cTValue *. - | lw SFARG1HI, HI(CRET1) - | b ->fff_restv - |. lw SFARG1LO, LO(CRET1) - | - |//-- Base library: conversions ------------------------------------------ - | - |.ffunc tonumber - | // Only handles the number case inline (without a base argument). - | lw CARG1, HI(BASE) - | xori AT, NARGS8:RC, 8 // Exactly one number argument. - | sltu TMP0, TISNUM, CARG1 - | or AT, AT, TMP0 - | bnez AT, ->fff_fallback - |. lw SFARG1HI, HI(BASE) - | b ->fff_restv - |. lw SFARG1LO, LO(BASE) - | - |.ffunc_1 tostring - | // Only handles the string or number case inline. - | li AT, LJ_TSTR - | // A __tostring method in the string base metatable is ignored. - | beq SFARG1HI, AT, ->fff_restv // String key? - | // Handle numbers inline, unless a number base metatable is present. - |. lw TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) - | sltu TMP0, TISNUM, SFARG1HI - | or TMP0, TMP0, TMP1 - | bnez TMP0, ->fff_fallback - |. sw BASE, L->base // Add frame since C call can throw. - | ffgccheck - |. sw PC, SAVE_PC // Redundant (but a defined value). - | load_got lj_strfmt_number - | move CARG1, L - | call_intern lj_strfmt_number // (lua_State *L, cTValue *o) - |. move CARG2, BASE - | // Returns GCstr *. - | li SFARG1HI, LJ_TSTR - | b ->fff_restv - |. move SFARG1LO, CRET1 - | - |//-- Base library: iterators ------------------------------------------- - | - |.ffunc next - | lw CARG1, HI(BASE) - | lw TAB:CARG2, LO(BASE) - | beqz NARGS8:RC, ->fff_fallback - |. addu TMP2, BASE, NARGS8:RC - | li AT, LJ_TTAB - | sw TISNIL, HI(TMP2) // Set missing 2nd arg to nil. - | bne CARG1, AT, ->fff_fallback - |. lw PC, FRAME_PC(BASE) - | load_got lj_tab_next - | sw BASE, L->base // Add frame since C call can throw. - | sw BASE, L->top // Dummy frame length is ok. - | addiu CARG3, BASE, 8 - | sw PC, SAVE_PC - | call_intern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) - |. move CARG1, L - | // Returns 0 at end of traversal. - | beqz CRET1, ->fff_restv // End of traversal: return nil. - |. li SFARG1HI, LJ_TNIL - | lw TMP0, 8+HI(BASE) - | lw TMP1, 8+LO(BASE) - | addiu RA, BASE, -8 - | lw TMP2, 16+HI(BASE) - | lw TMP3, 16+LO(BASE) - | sw TMP0, HI(RA) - | sw TMP1, LO(RA) - | sw TMP2, 8+HI(RA) - | sw TMP3, 8+LO(RA) - | b ->fff_res - |. li RD, (2+1)*8 - | - |.ffunc_1 pairs - | li AT, LJ_TTAB - | bne SFARG1HI, AT, ->fff_fallback - |. lw PC, FRAME_PC(BASE) -#if LJ_52 - | lw TAB:TMP2, TAB:SFARG1LO->metatable - | lw TMP0, CFUNC:RB->upvalue[0].u32.hi - | lw TMP1, CFUNC:RB->upvalue[0].u32.lo - | bnez TAB:TMP2, ->fff_fallback -#else - | lw TMP0, CFUNC:RB->upvalue[0].u32.hi - | lw TMP1, CFUNC:RB->upvalue[0].u32.lo -#endif - |. addiu RA, BASE, -8 - | sw TISNIL, 8+HI(BASE) - | sw TMP0, HI(RA) - | sw TMP1, LO(RA) - | b ->fff_res - |. li RD, (3+1)*8 - | - |.ffunc ipairs_aux - | sltiu AT, NARGS8:RC, 16 - | lw CARG3, HI(BASE) - | lw TAB:CARG1, LO(BASE) - | lw CARG4, 8+HI(BASE) - | bnez AT, ->fff_fallback - |. addiu CARG3, CARG3, -LJ_TTAB - | xor CARG4, CARG4, TISNUM - | and AT, CARG3, CARG4 - | bnez AT, ->fff_fallback - |. lw PC, FRAME_PC(BASE) - | lw TMP2, 8+LO(BASE) - | lw TMP0, TAB:CARG1->asize - | lw TMP1, TAB:CARG1->array - | addiu TMP2, TMP2, 1 - | sw TISNUM, -8+HI(BASE) - | sltu AT, TMP2, TMP0 - | sw TMP2, -8+LO(BASE) - | beqz AT, >2 // Not in array part? - |. addiu RA, BASE, -8 - | sll TMP3, TMP2, 3 - | addu TMP3, TMP1, TMP3 - | lw TMP1, HI(TMP3) - | lw TMP2, LO(TMP3) - |1: - | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results. - |. li RD, (0+1)*8 - | sw TMP1, 8+HI(RA) - | sw TMP2, 8+LO(RA) - | b ->fff_res - |. li RD, (2+1)*8 - | - |2: // Check for empty hash part first. Otherwise call C function. - | lw TMP0, TAB:CARG1->hmask - | load_got lj_tab_getinth - | beqz TMP0, ->fff_res - |. li RD, (0+1)*8 - | call_intern lj_tab_getinth // (GCtab *t, int32_t key) - |. move CARG2, TMP2 - | // Returns cTValue * or NULL. - | beqz CRET1, ->fff_res - |. li RD, (0+1)*8 - | lw TMP1, HI(CRET1) - | b <1 - |. lw TMP2, LO(CRET1) - | - |.ffunc_1 ipairs - | li AT, LJ_TTAB - | bne SFARG1HI, AT, ->fff_fallback - |. lw PC, FRAME_PC(BASE) -#if LJ_52 - | lw TAB:TMP2, TAB:SFARG1LO->metatable - | lw TMP0, CFUNC:RB->upvalue[0].u32.hi - | lw TMP1, CFUNC:RB->upvalue[0].u32.lo - | bnez TAB:TMP2, ->fff_fallback -#else - | lw TMP0, CFUNC:RB->upvalue[0].u32.hi - | lw TMP1, CFUNC:RB->upvalue[0].u32.lo -#endif - |. addiu RA, BASE, -8 - | sw TISNUM, 8+HI(BASE) - | sw r0, 8+LO(BASE) - | sw TMP0, HI(RA) - | sw TMP1, LO(RA) - | b ->fff_res - |. li RD, (3+1)*8 - | - |//-- Base library: catch errors ---------------------------------------- - | - |.ffunc pcall - | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | beqz NARGS8:RC, ->fff_fallback - | move TMP2, BASE - | addiu BASE, BASE, 8 - | // Remember active hook before pcall. - | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT - | andi TMP3, TMP3, 1 - | addiu PC, TMP3, 8+FRAME_PCALL - | b ->vm_call_dispatch - |. addiu NARGS8:RC, NARGS8:RC, -8 - | - |.ffunc xpcall - | sltiu AT, NARGS8:RC, 16 - | lw CARG4, 8+HI(BASE) - | bnez AT, ->fff_fallback - |. lw CARG3, 8+LO(BASE) - | lw CARG1, LO(BASE) - | lw CARG2, HI(BASE) - | lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH) - | li AT, LJ_TFUNC - | move TMP2, BASE - | bne CARG4, AT, ->fff_fallback // Traceback must be a function. - | addiu BASE, BASE, 16 - | // Remember active hook before pcall. - | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT - | sw CARG3, LO(TMP2) // Swap function and traceback. - | sw CARG4, HI(TMP2) - | andi TMP3, TMP3, 1 - | sw CARG1, 8+LO(TMP2) - | sw CARG2, 8+HI(TMP2) - | addiu PC, TMP3, 16+FRAME_PCALL - | b ->vm_call_dispatch - |. addiu NARGS8:RC, NARGS8:RC, -16 - | - |//-- Coroutine library -------------------------------------------------- - | - |.macro coroutine_resume_wrap, resume - |.if resume - |.ffunc coroutine_resume - | lw CARG3, HI(BASE) - | beqz NARGS8:RC, ->fff_fallback - |. lw CARG1, LO(BASE) - | li AT, LJ_TTHREAD - | bne CARG3, AT, ->fff_fallback - |.else - |.ffunc coroutine_wrap_aux - | lw L:CARG1, CFUNC:RB->upvalue[0].gcr - |.endif - | lbu TMP0, L:CARG1->status - | lw TMP1, L:CARG1->cframe - | lw CARG2, L:CARG1->top - | lw TMP2, L:CARG1->base - | addiu TMP3, TMP0, -LUA_YIELD - | bgtz TMP3, ->fff_fallback // st > LUA_YIELD? - |. xor TMP2, TMP2, CARG2 - | bnez TMP1, ->fff_fallback // cframe != 0? - |. or AT, TMP2, TMP0 - | lw TMP0, L:CARG1->maxstack - | beqz AT, ->fff_fallback // base == top && st == 0? - |. lw PC, FRAME_PC(BASE) - | addu TMP2, CARG2, NARGS8:RC - | sltu AT, TMP0, TMP2 - | bnez AT, ->fff_fallback // Stack overflow? - |. sw PC, SAVE_PC - | sw BASE, L->base - |1: - |.if resume - | addiu BASE, BASE, 8 // Keep resumed thread in stack for GC. - | addiu NARGS8:RC, NARGS8:RC, -8 - | addiu TMP2, TMP2, -8 - |.endif - | sw TMP2, L:CARG1->top - | addu TMP1, BASE, NARGS8:RC - | move CARG3, CARG2 - | sw BASE, L->top - |2: // Move args to coroutine. - | lw SFRETHI, HI(BASE) - | lw SFRETLO, LO(BASE) - | sltu AT, BASE, TMP1 - | beqz AT, >3 - |. addiu BASE, BASE, 8 - | sw SFRETHI, HI(CARG3) - | sw SFRETLO, LO(CARG3) - | b <2 - |. addiu CARG3, CARG3, 8 - |3: - | bal ->vm_resume // (lua_State *L, TValue *base, 0, 0) - |. move L:RA, L:CARG1 - | // Returns thread status. - |4: - | lw TMP2, L:RA->base - | sltiu AT, CRET1, LUA_YIELD+1 - | lw TMP3, L:RA->top - | li_vmstate INTERP - | lw BASE, L->base - | sw L, DISPATCH_GL(cur_L)(DISPATCH) - | st_vmstate - | beqz AT, >8 - |. subu RD, TMP3, TMP2 - | lw TMP0, L->maxstack - | beqz RD, >6 // No results? - |. addu TMP1, BASE, RD - | sltu AT, TMP0, TMP1 - | bnez AT, >9 // Need to grow stack? - |. addu TMP3, TMP2, RD - | sw TMP2, L:RA->top // Clear coroutine stack. - | move TMP1, BASE - |5: // Move results from coroutine. - | lw SFRETHI, HI(TMP2) - | lw SFRETLO, LO(TMP2) - | addiu TMP2, TMP2, 8 - | sltu AT, TMP2, TMP3 - | sw SFRETHI, HI(TMP1) - | sw SFRETLO, LO(TMP1) - | bnez AT, <5 - |. addiu TMP1, TMP1, 8 - |6: - | andi TMP0, PC, FRAME_TYPE - |.if resume - | li TMP1, LJ_TTRUE - | addiu RA, BASE, -8 - | sw TMP1, -8+HI(BASE) // Prepend true to results. - | addiu RD, RD, 16 - |.else - | move RA, BASE - | addiu RD, RD, 8 - |.endif - |7: - | sw PC, SAVE_PC - | beqz TMP0, ->BC_RET_Z - |. move MULTRES, RD - | b ->vm_return - |. nop - | - |8: // Coroutine returned with error (at co->top-1). - |.if resume - | addiu TMP3, TMP3, -8 - | li TMP1, LJ_TFALSE - | lw SFRETHI, HI(TMP3) - | lw SFRETLO, LO(TMP3) - | sw TMP3, L:RA->top // Remove error from coroutine stack. - | li RD, (2+1)*8 - | sw TMP1, -8+HI(BASE) // Prepend false to results. - | addiu RA, BASE, -8 - | sw SFRETHI, HI(BASE) // Copy error message. - | sw SFRETLO, LO(BASE) - | b <7 - |. andi TMP0, PC, FRAME_TYPE - |.else - | load_got lj_ffh_coroutine_wrap_err - | move CARG2, L:RA - | call_intern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) - |. move CARG1, L - |.endif - | - |9: // Handle stack expansion on return from yield. - | load_got lj_state_growstack - | srl CARG2, RD, 3 - | call_intern lj_state_growstack // (lua_State *L, int n) - |. move CARG1, L - | b <4 - |. li CRET1, 0 - |.endmacro - | - | coroutine_resume_wrap 1 // coroutine.resume - | coroutine_resume_wrap 0 // coroutine.wrap - | - |.ffunc coroutine_yield - | lw TMP0, L->cframe - | addu TMP1, BASE, NARGS8:RC - | sw BASE, L->base - | andi TMP0, TMP0, CFRAME_RESUME - | sw TMP1, L->top - | beqz TMP0, ->fff_fallback - |. li CRET1, LUA_YIELD - | sw r0, L->cframe - | b ->vm_leave_unw - |. sb CRET1, L->status - | - |//-- Math library ------------------------------------------------------- - | - |.ffunc_1 math_abs - | bne SFARG1HI, TISNUM, >1 - |. sra TMP0, SFARG1LO, 31 - | xor TMP1, SFARG1LO, TMP0 - | subu SFARG1LO, TMP1, TMP0 - | bgez SFARG1LO, ->fff_restv - |. nop - | lui SFARG1HI, 0x41e0 // 2^31 as a double. - | b ->fff_restv - |. li SFARG1LO, 0 - |1: - | sltiu AT, SFARG1HI, LJ_TISNUM - | beqz AT, ->fff_fallback - |. sll SFARG1HI, SFARG1HI, 1 - | srl SFARG1HI, SFARG1HI, 1 - |// fallthrough - | - |->fff_restv: - | // SFARG1LO/SFARG1HI = TValue result. - | lw PC, FRAME_PC(BASE) - | sw SFARG1HI, -8+HI(BASE) - | addiu RA, BASE, -8 - | sw SFARG1LO, -8+LO(BASE) - |->fff_res1: - | // RA = results, PC = return. - | li RD, (1+1)*8 - |->fff_res: - | // RA = results, RD = (nresults+1)*8, PC = return. - | andi TMP0, PC, FRAME_TYPE - | bnez TMP0, ->vm_return - |. move MULTRES, RD - | lw INS, -4(PC) - | decode_RB8a RB, INS - | decode_RB8b RB - |5: - | sltu AT, RD, RB - | bnez AT, >6 // More results expected? - |. decode_RA8a TMP0, INS - | decode_RA8b TMP0 - | ins_next1 - | // Adjust BASE. KBASE is assumed to be set for the calling frame. - | subu BASE, RA, TMP0 - | ins_next2 - | - |6: // Fill up results with nil. - | addu TMP1, RA, RD - | addiu RD, RD, 8 - | b <5 - |. sw TISNIL, -8+HI(TMP1) - | - |.macro math_extern, func - | .ffunc math_ .. func - | lw SFARG1HI, HI(BASE) - | beqz NARGS8:RC, ->fff_fallback - |. load_got func - | sltiu AT, SFARG1HI, LJ_TISNUM - | beqz AT, ->fff_fallback - |.if FPU - |. ldc1 FARG1, 0(BASE) - |.else - |. lw SFARG1LO, LO(BASE) - |.endif - | call_extern - |. nop - | b ->fff_resn - |. nop - |.endmacro - | - |.macro math_extern2, func - | .ffunc_nn math_ .. func - |. load_got func - | call_extern - |. nop - | b ->fff_resn - |. nop - |.endmacro - | - |// TODO: Return integer type if result is integer (own sf implementation). - |.macro math_round, func - |->ff_math_ .. func: - | lw SFARG1HI, HI(BASE) - | beqz NARGS8:RC, ->fff_fallback - |. lw SFARG1LO, LO(BASE) - | beq SFARG1HI, TISNUM, ->fff_restv - |. sltu AT, SFARG1HI, TISNUM - | beqz AT, ->fff_fallback - |.if FPU - |. ldc1 FARG1, 0(BASE) - | bal ->vm_ .. func - |.else - |. load_got func - | call_extern - |.endif - |. nop - | b ->fff_resn - |. nop - |.endmacro - | - | math_round floor - | math_round ceil - | - |.ffunc math_log - | li AT, 8 - | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. - |. lw SFARG1HI, HI(BASE) - | sltiu AT, SFARG1HI, LJ_TISNUM - | beqz AT, ->fff_fallback - |. load_got log - |.if FPU - | call_extern - |. ldc1 FARG1, 0(BASE) - |.else - | call_extern - |. lw SFARG1LO, LO(BASE) - |.endif - | b ->fff_resn - |. nop - | - | math_extern log10 - | math_extern exp - | math_extern sin - | math_extern cos - | math_extern tan - | math_extern asin - | math_extern acos - | math_extern atan - | math_extern sinh - | math_extern cosh - | math_extern tanh - | math_extern2 pow - | math_extern2 atan2 - | math_extern2 fmod - | - |.if FPU - |.ffunc_n math_sqrt - |. sqrt.d FRET1, FARG1 - |// fallthrough to ->fff_resn - |.else - | math_extern sqrt - |.endif - | - |->fff_resn: - | lw PC, FRAME_PC(BASE) - | addiu RA, BASE, -8 - |.if FPU - | b ->fff_res1 - |. sdc1 FRET1, -8(BASE) - |.else - | sw SFRETHI, -8+HI(BASE) - | b ->fff_res1 - |. sw SFRETLO, -8+LO(BASE) - |.endif - | - | - |.ffunc math_ldexp - | sltiu AT, NARGS8:RC, 16 - | lw SFARG1HI, HI(BASE) - | bnez AT, ->fff_fallback - |. lw CARG4, 8+HI(BASE) - | bne CARG4, TISNUM, ->fff_fallback - | load_got ldexp - |. sltu AT, SFARG1HI, TISNUM - | beqz AT, ->fff_fallback - |.if FPU - |. ldc1 FARG1, 0(BASE) - |.else - |. lw SFARG1LO, LO(BASE) - |.endif - | call_extern - |. lw CARG3, 8+LO(BASE) - | b ->fff_resn - |. nop - | - |.ffunc_n math_frexp - | load_got frexp - | lw PC, FRAME_PC(BASE) - | call_extern - |. addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH) - | addiu RA, BASE, -8 - |.if FPU - | mtc1 TMP1, FARG2 - | sdc1 FRET1, 0(RA) - | cvt.d.w FARG2, FARG2 - | sdc1 FARG2, 8(RA) - |.else - | sw SFRETLO, LO(RA) - | sw SFRETHI, HI(RA) - | sw TMP1, 8+LO(RA) - | sw TISNUM, 8+HI(RA) - |.endif - | b ->fff_res - |. li RD, (2+1)*8 - | - |.ffunc_n math_modf - | load_got modf - | lw PC, FRAME_PC(BASE) - | call_extern - |. addiu CARG3, BASE, -8 - | addiu RA, BASE, -8 - |.if FPU - | sdc1 FRET1, 0(BASE) - |.else - | sw SFRETLO, LO(BASE) - | sw SFRETHI, HI(BASE) - |.endif - | b ->fff_res - |. li RD, (2+1)*8 - | - |.macro math_minmax, name, intins, fpins - | .ffunc_1 name - | addu TMP3, BASE, NARGS8:RC - | bne SFARG1HI, TISNUM, >5 - |. addiu TMP2, BASE, 8 - |1: // Handle integers. - |. lw SFARG2HI, HI(TMP2) - | beq TMP2, TMP3, ->fff_restv - |. lw SFARG2LO, LO(TMP2) - | bne SFARG2HI, TISNUM, >3 - |. slt AT, SFARG1LO, SFARG2LO - | intins SFARG1LO, SFARG2LO, AT - | b <1 - |. addiu TMP2, TMP2, 8 - | - |3: // Convert intermediate result to number and continue with number loop. - | sltiu AT, SFARG2HI, LJ_TISNUM - | beqz AT, ->fff_fallback - |.if FPU - |. mtc1 SFARG1LO, FRET1 - | cvt.d.w FRET1, FRET1 - | b >7 - |. ldc1 FARG1, 0(TMP2) - |.else - |. nop - | bal ->vm_sfi2d_1 - |. nop - | b >7 - |. nop - |.endif - | - |5: - |. sltiu AT, SFARG1HI, LJ_TISNUM - | beqz AT, ->fff_fallback - |.if FPU - |. ldc1 FRET1, 0(BASE) - |.endif - | - |6: // Handle numbers. - |. lw SFARG2HI, HI(TMP2) - |.if FPU - | beq TMP2, TMP3, ->fff_resn - |.else - | beq TMP2, TMP3, ->fff_restv - |.endif - |. sltiu AT, SFARG2HI, LJ_TISNUM - | beqz AT, >8 - |.if FPU - |. ldc1 FARG1, 0(TMP2) - |.else - |. lw SFARG2LO, LO(TMP2) - |.endif - |7: - |.if FPU - | c.olt.d FRET1, FARG1 - | fpins FRET1, FARG1 - |.else - | bal ->vm_sfcmpolt - |. nop - | intins SFARG1LO, SFARG2LO, CRET1 - | intins SFARG1HI, SFARG2HI, CRET1 - |.endif - | b <6 - |. addiu TMP2, TMP2, 8 - | - |8: // Convert integer to number and continue with number loop. - | bne SFARG2HI, TISNUM, ->fff_fallback - |.if FPU - |. lwc1 FARG1, LO(TMP2) - | b <7 - |. cvt.d.w FARG1, FARG1 - |.else - |. nop - | bal ->vm_sfi2d_2 - |. nop - | b <7 - |. nop - |.endif - | - |.endmacro - | - | math_minmax math_min, movz, movf.d - | math_minmax math_max, movn, movt.d - | - |//-- String library ----------------------------------------------------- - | - |.ffunc string_byte // Only handle the 1-arg case here. - | lw CARG3, HI(BASE) - | lw STR:CARG1, LO(BASE) - | xori AT, NARGS8:RC, 8 - | addiu CARG3, CARG3, -LJ_TSTR - | or AT, AT, CARG3 - | bnez AT, ->fff_fallback // Need exactly 1 string argument. - |. nop - | lw TMP0, STR:CARG1->len - | addiu RA, BASE, -8 - | lw PC, FRAME_PC(BASE) - | sltu RD, r0, TMP0 - | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end). - | addiu RD, RD, 1 - | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8 - | sw TISNUM, HI(RA) - | b ->fff_res - |. sw TMP1, LO(RA) - | - |.ffunc string_char // Only handle the 1-arg case here. - | ffgccheck - |. nop - | lw CARG3, HI(BASE) - | lw CARG1, LO(BASE) - | li TMP1, 255 - | xori AT, NARGS8:RC, 8 // Exactly 1 argument. - | xor TMP0, CARG3, TISNUM // Integer. - | sltu TMP1, TMP1, CARG1 // !(255 < n). - | or AT, AT, TMP0 - | or AT, AT, TMP1 - | bnez AT, ->fff_fallback - |. li CARG3, 1 - | addiu CARG2, sp, ARG5_OFS - | sb CARG1, ARG5 - |->fff_newstr: - | load_got lj_str_new - | sw BASE, L->base - | sw PC, SAVE_PC - | call_intern lj_str_new // (lua_State *L, char *str, size_t l) - |. move CARG1, L - | // Returns GCstr *. - | lw BASE, L->base - |->fff_resstr: - | move SFARG1LO, CRET1 - | b ->fff_restv - |. li SFARG1HI, LJ_TSTR - | - |.ffunc string_sub - | ffgccheck - |. nop - | addiu AT, NARGS8:RC, -16 - | lw CARG3, 16+HI(BASE) - | lw TMP0, HI(BASE) - | lw STR:CARG1, LO(BASE) - | bltz AT, ->fff_fallback - |. lw CARG2, 8+HI(BASE) - | beqz AT, >1 - |. li CARG4, -1 - | bne CARG3, TISNUM, ->fff_fallback - |. lw CARG4, 16+LO(BASE) - |1: - | bne CARG2, TISNUM, ->fff_fallback - |. li AT, LJ_TSTR - | bne TMP0, AT, ->fff_fallback - |. lw CARG3, 8+LO(BASE) - | lw CARG2, STR:CARG1->len - | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end - | slt AT, CARG4, r0 - | addiu TMP0, CARG2, 1 - | addu TMP1, CARG4, TMP0 - | slt TMP3, CARG3, r0 - | movn CARG4, TMP1, AT // if (end < 0) end += len+1 - | addu TMP1, CARG3, TMP0 - | movn CARG3, TMP1, TMP3 // if (start < 0) start += len+1 - | li TMP2, 1 - | slt AT, CARG4, r0 - | slt TMP3, r0, CARG3 - | movn CARG4, r0, AT // if (end < 0) end = 0 - | movz CARG3, TMP2, TMP3 // if (start < 1) start = 1 - | slt AT, CARG2, CARG4 - | movn CARG4, CARG2, AT // if (end > len) end = len - | addu CARG2, STR:CARG1, CARG3 - | subu CARG3, CARG4, CARG3 // len = end - start - | addiu CARG2, CARG2, sizeof(GCstr)-1 - | bgez CARG3, ->fff_newstr - |. addiu CARG3, CARG3, 1 // len++ - |->fff_emptystr: // Return empty string. - | addiu STR:SFARG1LO, DISPATCH, DISPATCH_GL(strempty) - | b ->fff_restv - |. li SFARG1HI, LJ_TSTR - | - |.macro ffstring_op, name - | .ffunc string_ .. name - | ffgccheck - |. nop - | lw CARG3, HI(BASE) - | lw STR:CARG2, LO(BASE) - | beqz NARGS8:RC, ->fff_fallback - |. li AT, LJ_TSTR - | bne CARG3, AT, ->fff_fallback - |. addiu SBUF:CARG1, DISPATCH, DISPATCH_GL(tmpbuf) - | load_got lj_buf_putstr_ .. name - | lw TMP0, SBUF:CARG1->b - | sw L, SBUF:CARG1->L - | sw BASE, L->base - | sw TMP0, SBUF:CARG1->p - | call_intern extern lj_buf_putstr_ .. name - |. sw PC, SAVE_PC - | load_got lj_buf_tostr - | call_intern lj_buf_tostr - |. move SBUF:CARG1, SBUF:CRET1 - | b ->fff_resstr - |. lw BASE, L->base - |.endmacro - | - |ffstring_op reverse - |ffstring_op lower - |ffstring_op upper - | - |//-- Bit library -------------------------------------------------------- - | - |->vm_tobit_fb: - | beqz TMP1, ->fff_fallback - |.if FPU - |. ldc1 FARG1, 0(BASE) - | add.d FARG1, FARG1, TOBIT - | jr ra - |. mfc1 CRET1, FARG1 - |.else - |// FP number to bit conversion for soft-float. - |->vm_tobit: - | sll TMP0, SFARG1HI, 1 - | lui AT, 0x0020 - | addu TMP0, TMP0, AT - | slt AT, TMP0, r0 - | movz SFARG1LO, r0, AT - | beqz AT, >2 - |. li TMP1, 0x3e0 - | not TMP1, TMP1 - | sra TMP0, TMP0, 21 - | subu TMP0, TMP1, TMP0 - | slt AT, TMP0, r0 - | bnez AT, >1 - |. sll TMP1, SFARG1HI, 11 - | lui AT, 0x8000 - | or TMP1, TMP1, AT - | srl AT, SFARG1LO, 21 - | or TMP1, TMP1, AT - | slt AT, SFARG1HI, r0 - | beqz AT, >2 - |. srlv SFARG1LO, TMP1, TMP0 - | subu SFARG1LO, r0, SFARG1LO - |2: - | jr ra - |. move CRET1, SFARG1LO - |1: - | addiu TMP0, TMP0, 21 - | srlv TMP1, SFARG1LO, TMP0 - | li AT, 20 - | subu TMP0, AT, TMP0 - | sll SFARG1LO, SFARG1HI, 12 - | sllv AT, SFARG1LO, TMP0 - | or SFARG1LO, TMP1, AT - | slt AT, SFARG1HI, r0 - | beqz AT, <2 - |. nop - | jr ra - |. subu CRET1, r0, SFARG1LO - |.endif - | - |.macro .ffunc_bit, name - | .ffunc_1 bit_..name - | beq SFARG1HI, TISNUM, >6 - |. move CRET1, SFARG1LO - | bal ->vm_tobit_fb - |. sltu TMP1, SFARG1HI, TISNUM - |6: - |.endmacro - | - |.macro .ffunc_bit_op, name, ins - | .ffunc_bit name - | addiu TMP2, BASE, 8 - | addu TMP3, BASE, NARGS8:RC - |1: - | lw SFARG1HI, HI(TMP2) - | beq TMP2, TMP3, ->fff_resi - |. lw SFARG1LO, LO(TMP2) - |.if FPU - | bne SFARG1HI, TISNUM, >2 - |. addiu TMP2, TMP2, 8 - | b <1 - |. ins CRET1, CRET1, SFARG1LO - |2: - | ldc1 FARG1, -8(TMP2) - | sltu TMP1, SFARG1HI, TISNUM - | beqz TMP1, ->fff_fallback - |. add.d FARG1, FARG1, TOBIT - | mfc1 SFARG1LO, FARG1 - | b <1 - |. ins CRET1, CRET1, SFARG1LO - |.else - | beq SFARG1HI, TISNUM, >2 - |. move CRET2, CRET1 - | bal ->vm_tobit_fb - |. sltu TMP1, SFARG1HI, TISNUM - | move SFARG1LO, CRET2 - |2: - | ins CRET1, CRET1, SFARG1LO - | b <1 - |. addiu TMP2, TMP2, 8 - |.endif - |.endmacro - | - |.ffunc_bit_op band, and - |.ffunc_bit_op bor, or - |.ffunc_bit_op bxor, xor - | - |.ffunc_bit bswap - | srl TMP0, CRET1, 24 - | srl TMP2, CRET1, 8 - | sll TMP1, CRET1, 24 - | andi TMP2, TMP2, 0xff00 - | or TMP0, TMP0, TMP1 - | andi CRET1, CRET1, 0xff00 - | or TMP0, TMP0, TMP2 - | sll CRET1, CRET1, 8 - | b ->fff_resi - |. or CRET1, TMP0, CRET1 - | - |.ffunc_bit bnot - | b ->fff_resi - |. not CRET1, CRET1 - | - |.macro .ffunc_bit_sh, name, ins, shmod - | .ffunc_2 bit_..name - | beq SFARG1HI, TISNUM, >1 - |. nop - | bal ->vm_tobit_fb - |. sltu TMP1, SFARG1HI, TISNUM - | move SFARG1LO, CRET1 - |1: - | bne SFARG2HI, TISNUM, ->fff_fallback - |. nop - |.if shmod == 1 - | li AT, 32 - | subu TMP0, AT, SFARG2LO - | sllv SFARG2LO, SFARG1LO, SFARG2LO - | srlv SFARG1LO, SFARG1LO, TMP0 - |.elif shmod == 2 - | li AT, 32 - | subu TMP0, AT, SFARG2LO - | srlv SFARG2LO, SFARG1LO, SFARG2LO - | sllv SFARG1LO, SFARG1LO, TMP0 - |.endif - | b ->fff_resi - |. ins CRET1, SFARG1LO, SFARG2LO - |.endmacro - | - |.ffunc_bit_sh lshift, sllv, 0 - |.ffunc_bit_sh rshift, srlv, 0 - |.ffunc_bit_sh arshift, srav, 0 - |// Can't use rotrv, since it's only in MIPS32R2. - |.ffunc_bit_sh rol, or, 1 - |.ffunc_bit_sh ror, or, 2 - | - |.ffunc_bit tobit - |->fff_resi: - | lw PC, FRAME_PC(BASE) - | addiu RA, BASE, -8 - | sw TISNUM, -8+HI(BASE) - | b ->fff_res1 - |. sw CRET1, -8+LO(BASE) - | - |//----------------------------------------------------------------------- - | - |->fff_fallback: // Call fast function fallback handler. - | // BASE = new base, RB = CFUNC, RC = nargs*8 - | lw TMP3, CFUNC:RB->f - | addu TMP1, BASE, NARGS8:RC - | lw PC, FRAME_PC(BASE) // Fallback may overwrite PC. - | addiu TMP0, TMP1, 8*LUA_MINSTACK - | lw TMP2, L->maxstack - | sw PC, SAVE_PC // Redundant (but a defined value). - | sltu AT, TMP2, TMP0 - | sw BASE, L->base - | sw TMP1, L->top - | bnez AT, >5 // Need to grow stack. - |. move CFUNCADDR, TMP3 - | jalr TMP3 // (lua_State *L) - |. move CARG1, L - | // Either throws an error, or recovers and returns -1, 0 or nresults+1. - | lw BASE, L->base - | sll RD, CRET1, 3 - | bgtz CRET1, ->fff_res // Returned nresults+1? - |. addiu RA, BASE, -8 - |1: // Returned 0 or -1: retry fast path. - | lw TMP0, L->top - | lw LFUNC:RB, FRAME_FUNC(BASE) - | bnez CRET1, ->vm_call_tail // Returned -1? - |. subu NARGS8:RC, TMP0, BASE - | ins_callt // Returned 0: retry fast path. - | - |// Reconstruct previous base for vmeta_call during tailcall. - |->vm_call_tail: - | andi TMP0, PC, FRAME_TYPE - | li AT, -4 - | bnez TMP0, >3 - |. and TMP1, PC, AT - | lbu TMP1, OFS_RA(PC) - | sll TMP1, TMP1, 3 - | addiu TMP1, TMP1, 8 - |3: - | b ->vm_call_dispatch // Resolve again for tailcall. - |. subu TMP2, BASE, TMP1 - | - |5: // Grow stack for fallback handler. - | load_got lj_state_growstack - | li CARG2, LUA_MINSTACK - | call_intern lj_state_growstack // (lua_State *L, int n) - |. move CARG1, L - | lw BASE, L->base - | b <1 - |. li CRET1, 0 // Force retry. - | - |->fff_gcstep: // Call GC step function. - | // BASE = new base, RC = nargs*8 - | move MULTRES, ra - | load_got lj_gc_step - | sw BASE, L->base - | addu TMP0, BASE, NARGS8:RC - | sw PC, SAVE_PC // Redundant (but a defined value). - | sw TMP0, L->top - | call_intern lj_gc_step // (lua_State *L) - |. move CARG1, L - | lw BASE, L->base - | move ra, MULTRES - | lw TMP0, L->top - | lw CFUNC:RB, FRAME_FUNC(BASE) - | jr ra - |. subu NARGS8:RC, TMP0, BASE - | - |//----------------------------------------------------------------------- - |//-- Special dispatch targets ------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_record: // Dispatch target for recording phase. - |.if JIT - | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | andi AT, TMP3, HOOK_VMEVENT // No recording while in vmevent. - | bnez AT, >5 - | // Decrement the hookcount for consistency, but always do the call. - |. lw TMP2, DISPATCH_GL(hookcount)(DISPATCH) - | andi AT, TMP3, HOOK_ACTIVE - | bnez AT, >1 - |. addiu TMP2, TMP2, -1 - | andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT - | beqz AT, >1 - |. nop - | b >1 - |. sw TMP2, DISPATCH_GL(hookcount)(DISPATCH) - |.endif - | - |->vm_rethook: // Dispatch target for return hooks. - | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | andi AT, TMP3, HOOK_ACTIVE // Hook already active? - | beqz AT, >1 - |5: // Re-dispatch to static ins. - |. lw AT, GG_DISP2STATIC(TMP0) // Assumes TMP0 holds DISPATCH+OP*4. - | jr AT - |. nop - | - |->vm_inshook: // Dispatch target for instr/line hooks. - | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | lw TMP2, DISPATCH_GL(hookcount)(DISPATCH) - | andi AT, TMP3, HOOK_ACTIVE // Hook already active? - | bnez AT, <5 - |. andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT - | beqz AT, <5 - |. addiu TMP2, TMP2, -1 - | beqz TMP2, >1 - |. sw TMP2, DISPATCH_GL(hookcount)(DISPATCH) - | andi AT, TMP3, LUA_MASKLINE - | beqz AT, <5 - |1: - |. load_got lj_dispatch_ins - | sw MULTRES, SAVE_MULTRES - | move CARG2, PC - | sw BASE, L->base - | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. - | call_intern lj_dispatch_ins // (lua_State *L, const BCIns *pc) - |. move CARG1, L - |3: - | lw BASE, L->base - |4: // Re-dispatch to static ins. - | lw INS, -4(PC) - | decode_OP4a TMP1, INS - | decode_OP4b TMP1 - | addu TMP0, DISPATCH, TMP1 - | decode_RD8a RD, INS - | lw AT, GG_DISP2STATIC(TMP0) - | decode_RA8a RA, INS - | decode_RD8b RD - | jr AT - | decode_RA8b RA - | - |->cont_hook: // Continue from hook yield. - | addiu PC, PC, 4 - | b <4 - |. lw MULTRES, -24+LO(RB) // Restore MULTRES for *M ins. - | - |->vm_hotloop: // Hot loop counter underflow. - |.if JIT - | lw LFUNC:TMP1, FRAME_FUNC(BASE) - | addiu CARG1, DISPATCH, GG_DISP2J - | sw PC, SAVE_PC - | lw TMP1, LFUNC:TMP1->pc - | move CARG2, PC - | sw L, DISPATCH_J(L)(DISPATCH) - | lbu TMP1, PC2PROTO(framesize)(TMP1) - | load_got lj_trace_hot - | sw BASE, L->base - | sll TMP1, TMP1, 3 - | addu TMP1, BASE, TMP1 - | call_intern lj_trace_hot // (jit_State *J, const BCIns *pc) - |. sw TMP1, L->top - | b <3 - |. nop - |.endif - | - |->vm_callhook: // Dispatch target for call hooks. - |.if JIT - | b >1 - |.endif - |. move CARG2, PC - | - |->vm_hotcall: // Hot call counter underflow. - |.if JIT - | ori CARG2, PC, 1 - |1: - |.endif - | load_got lj_dispatch_call - | addu TMP0, BASE, RC - | sw PC, SAVE_PC - | sw BASE, L->base - | subu RA, RA, BASE - | sw TMP0, L->top - | call_intern lj_dispatch_call // (lua_State *L, const BCIns *pc) - |. move CARG1, L - | // Returns ASMFunction. - | lw BASE, L->base - | lw TMP0, L->top - | sw r0, SAVE_PC // Invalidate for subsequent line hook. - | subu NARGS8:RC, TMP0, BASE - | addu RA, BASE, RA - | lw LFUNC:RB, FRAME_FUNC(BASE) - | jr CRET1 - |. lw INS, -4(PC) - | - |->cont_stitch: // Trace stitching. - |.if JIT - | // RA = resultptr, RB = meta base - | lw INS, -4(PC) - | lw TMP2, -24+LO(RB) // Save previous trace. - | decode_RA8a RC, INS - | addiu AT, MULTRES, -8 - | decode_RA8b RC - | beqz AT, >2 - |. addu RC, BASE, RC // Call base. - |1: // Move results down. - | lw SFRETHI, HI(RA) - | lw SFRETLO, LO(RA) - | addiu AT, AT, -8 - | addiu RA, RA, 8 - | sw SFRETHI, HI(RC) - | sw SFRETLO, LO(RC) - | bnez AT, <1 - |. addiu RC, RC, 8 - |2: - | decode_RA8a RA, INS - | decode_RB8a RB, INS - | decode_RA8b RA - | decode_RB8b RB - | addu RA, RA, RB - | addu RA, BASE, RA - |3: - | sltu AT, RC, RA - | bnez AT, >9 // More results wanted? - |. nop - | - | lhu TMP3, TRACE:TMP2->traceno - | lhu RD, TRACE:TMP2->link - | beq RD, TMP3, ->cont_nop // Blacklisted. - |. load_got lj_dispatch_stitch - | bnez RD, =>BC_JLOOP // Jump to stitched trace. - |. sll RD, RD, 3 - | - | // Stitch a new trace to the previous trace. - | sw TMP3, DISPATCH_J(exitno)(DISPATCH) - | sw L, DISPATCH_J(L)(DISPATCH) - | sw BASE, L->base - | addiu CARG1, DISPATCH, GG_DISP2J - | call_intern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) - |. move CARG2, PC - | b ->cont_nop - |. lw BASE, L->base - | - |9: - | sw TISNIL, HI(RC) - | b <3 - |. addiu RC, RC, 8 - |.endif - | - |->vm_profhook: // Dispatch target for profiler hook. -#if LJ_HASPROFILE - | load_got lj_dispatch_profile - | sw MULTRES, SAVE_MULTRES - | move CARG2, PC - | sw BASE, L->base - | call_intern lj_dispatch_profile // (lua_State *L, const BCIns *pc) - |. move CARG1, L - | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. - | addiu PC, PC, -4 - | b ->cont_nop - |. lw BASE, L->base -#endif - | - |//----------------------------------------------------------------------- - |//-- Trace exit handler ------------------------------------------------- - |//----------------------------------------------------------------------- - | - |.macro savex_, a, b - |.if FPU - | sdc1 f..a, 16+a*8(sp) - | sw r..a, 16+32*8+a*4(sp) - | sw r..b, 16+32*8+b*4(sp) - |.else - | sw r..a, 16+a*4(sp) - | sw r..b, 16+b*4(sp) - |.endif - |.endmacro - | - |->vm_exit_handler: - |.if JIT - |.if FPU - | addiu sp, sp, -(16+32*8+32*4) - |.else - | addiu sp, sp, -(16+32*4) - |.endif - | savex_ 0, 1 - | savex_ 2, 3 - | savex_ 4, 5 - | savex_ 6, 7 - | savex_ 8, 9 - | savex_ 10, 11 - | savex_ 12, 13 - | savex_ 14, 15 - | savex_ 16, 17 - | savex_ 18, 19 - | savex_ 20, 21 - | savex_ 22, 23 - | savex_ 24, 25 - | savex_ 26, 27 - |.if FPU - | sdc1 f28, 16+28*8(sp) - | sdc1 f30, 16+30*8(sp) - | sw r28, 16+32*8+28*4(sp) - | sw r30, 16+32*8+30*4(sp) - | sw r0, 16+32*8+31*4(sp) // Clear RID_TMP. - | addiu TMP2, sp, 16+32*8+32*4 // Recompute original value of sp. - | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP - |.else - | sw r28, 16+28*4(sp) - | sw r30, 16+30*4(sp) - | sw r0, 16+31*4(sp) // Clear RID_TMP. - | addiu TMP2, sp, 16+32*4 // Recompute original value of sp. - | sw TMP2, 16+29*4(sp) // Store sp in RID_SP - |.endif - | li_vmstate EXIT - | addiu DISPATCH, JGL, -GG_DISP2G-32768 - | lw TMP1, 0(TMP2) // Load exit number. - | st_vmstate - | lw L, DISPATCH_GL(cur_L)(DISPATCH) - | lw BASE, DISPATCH_GL(jit_base)(DISPATCH) - | load_got lj_trace_exit - | sw L, DISPATCH_J(L)(DISPATCH) - | sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number. - | sw BASE, L->base - | sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number. - | addiu CARG1, DISPATCH, GG_DISP2J - | sw r0, DISPATCH_GL(jit_base)(DISPATCH) - | call_intern lj_trace_exit // (jit_State *J, ExitState *ex) - |. addiu CARG2, sp, 16 - | // Returns MULTRES (unscaled) or negated error code. - | lw TMP1, L->cframe - | li AT, -4 - | lw BASE, L->base - | and sp, TMP1, AT - | lw PC, SAVE_PC // Get SAVE_PC. - | b >1 - |. sw L, SAVE_L // Set SAVE_L (on-trace resume/yield). - |.endif - |->vm_exit_interp: - |.if JIT - | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set. - | lw L, SAVE_L - | addiu DISPATCH, JGL, -GG_DISP2G-32768 - | sw BASE, L->base - |1: - | bltz CRET1, >9 // Check for error from exit. - |. lw LFUNC:RB, FRAME_FUNC(BASE) - | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | sll MULTRES, CRET1, 3 - | li TISNIL, LJ_TNIL - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | sw MULTRES, SAVE_MULTRES - | .FPU mtc1 TMP3, TOBIT - | lw TMP1, LFUNC:RB->pc - | sw r0, DISPATCH_GL(jit_base)(DISPATCH) - | lw KBASE, PC2PROTO(k)(TMP1) - | .FPU cvt.d.s TOBIT, TOBIT - | // Modified copy of ins_next which handles function header dispatch, too. - | lw INS, 0(PC) - | addiu PC, PC, 4 - | // Assumes TISNIL == ~LJ_VMST_INTERP == -1 - | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) - | decode_OP4a TMP1, INS - | decode_OP4b TMP1 - | sltiu TMP2, TMP1, BC_FUNCF*4 - | addu TMP0, DISPATCH, TMP1 - | decode_RD8a RD, INS - | lw AT, 0(TMP0) - | decode_RA8a RA, INS - | beqz TMP2, >2 - |. decode_RA8b RA - | jr AT - |. decode_RD8b RD - |2: - | sltiu TMP2, TMP1, (BC_FUNCC+2)*4 // Fast function? - | bnez TMP2, >3 - |. lw TMP1, FRAME_PC(BASE) - | // Check frame below fast function. - | andi TMP0, TMP1, FRAME_TYPE - | bnez TMP0, >3 // Trace stitching continuation? - |. nop - | // Otherwise set KBASE for Lua function below fast function. - | lw TMP2, -4(TMP1) - | decode_RA8a TMP0, TMP2 - | decode_RA8b TMP0 - | subu TMP1, BASE, TMP0 - | lw LFUNC:TMP2, -8+FRAME_FUNC(TMP1) - | lw TMP1, LFUNC:TMP2->pc - | lw KBASE, PC2PROTO(k)(TMP1) - |3: - | addiu RC, MULTRES, -8 - | jr AT - |. addu RA, RA, BASE - | - |9: // Rethrow error from the right C frame. - | load_got lj_err_throw - | negu CARG2, CRET1 - | call_intern lj_err_throw // (lua_State *L, int errcode) - |. move CARG1, L - |.endif - | - |//----------------------------------------------------------------------- - |//-- Math helper functions ---------------------------------------------- - |//----------------------------------------------------------------------- - | - |// Hard-float round to integer. - |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1. - |.macro vm_round_hf, func - | lui TMP0, 0x4330 // Hiword of 2^52 (double). - | mtc1 r0, f4 - | mtc1 TMP0, f5 - | abs.d FRET2, FARG1 // |x| - | mfc1 AT, f13 - | c.olt.d 0, FRET2, f4 - | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52 - | bc1f 0, >1 // Truncate only if |x| < 2^52. - |. sub.d FRET1, FRET1, f4 - | slt AT, AT, r0 - |.if "func" == "ceil" - | lui TMP0, 0xbff0 // Hiword of -1 (double). Preserves -0. - |.else - | lui TMP0, 0x3ff0 // Hiword of +1 (double). - |.endif - |.if "func" == "trunc" - | mtc1 TMP0, f5 - | c.olt.d 0, FRET2, FRET1 // |x| < result? - | sub.d FRET2, FRET1, f4 - | movt.d FRET1, FRET2, 0 // If yes, subtract +1. - | neg.d FRET2, FRET1 - | jr ra - |. movn.d FRET1, FRET2, AT // Merge sign bit back in. - |.else - | neg.d FRET2, FRET1 - | mtc1 TMP0, f5 - | movn.d FRET1, FRET2, AT // Merge sign bit back in. - |.if "func" == "ceil" - | c.olt.d 0, FRET1, FARG1 // x > result? - |.else - | c.olt.d 0, FARG1, FRET1 // x < result? - |.endif - | sub.d FRET2, FRET1, f4 // If yes, subtract +-1. - | jr ra - |. movt.d FRET1, FRET2, 0 - |.endif - |1: - | jr ra - |. mov.d FRET1, FARG1 - |.endmacro - | - |.macro vm_round, func - |.if FPU - | vm_round_hf, func - |.endif - |.endmacro - | - |->vm_floor: - | vm_round floor - |->vm_ceil: - | vm_round ceil - |->vm_trunc: - |.if JIT - | vm_round trunc - |.endif - | - |// Soft-float integer to number conversion. - |.macro sfi2d, AHI, ALO - |.if not FPU - | beqz ALO, >9 // Handle zero first. - |. sra TMP0, ALO, 31 - | xor TMP1, ALO, TMP0 - | subu TMP1, TMP1, TMP0 // Absolute value in TMP1. - | clz AHI, TMP1 - | andi TMP0, TMP0, 0x800 // Mask sign bit. - | li AT, 0x3ff+31-1 - | sllv TMP1, TMP1, AHI // Align mantissa left with leading 1. - | subu AHI, AT, AHI // Exponent - 1 in AHI. - | sll ALO, TMP1, 21 - | or AHI, AHI, TMP0 // Sign | Exponent. - | srl TMP1, TMP1, 11 - | sll AHI, AHI, 20 // Align left. - | jr ra - |. addu AHI, AHI, TMP1 // Add mantissa, increment exponent. - |9: - | jr ra - |. li AHI, 0 - |.endif - |.endmacro - | - |// Input SFARG1LO. Output: SFARG1*. Temporaries: AT, TMP0, TMP1. - |->vm_sfi2d_1: - | sfi2d SFARG1HI, SFARG1LO - | - |// Input SFARG2LO. Output: SFARG2*. Temporaries: AT, TMP0, TMP1. - |->vm_sfi2d_2: - | sfi2d SFARG2HI, SFARG2LO - | - |// Soft-float comparison. Equivalent to c.eq.d. - |// Input: SFARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1. - |->vm_sfcmpeq: - |.if not FPU - | sll AT, SFARG1HI, 1 - | sll TMP0, SFARG2HI, 1 - | or CRET1, SFARG1LO, SFARG2LO - | or TMP1, AT, TMP0 - | or TMP1, TMP1, CRET1 - | beqz TMP1, >8 // Both args +-0: return 1. - |. sltu CRET1, r0, SFARG1LO - | lui TMP1, 0xffe0 - | addu AT, AT, CRET1 - | sltu CRET1, r0, SFARG2LO - | sltu AT, TMP1, AT - | addu TMP0, TMP0, CRET1 - | sltu TMP0, TMP1, TMP0 - | or TMP1, AT, TMP0 - | bnez TMP1, >9 // Either arg is NaN: return 0; - |. xor TMP0, SFARG1HI, SFARG2HI - | xor TMP1, SFARG1LO, SFARG2LO - | or AT, TMP0, TMP1 - | jr ra - |. sltiu CRET1, AT, 1 // Same values: return 1. - |8: - | jr ra - |. li CRET1, 1 - |9: - | jr ra - |. li CRET1, 0 - |.endif - | - |// Soft-float comparison. Equivalent to c.ult.d and c.olt.d. - |// Input: SFARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1, CRET2. - |->vm_sfcmpult: - |.if not FPU - | b >1 - |. li CRET2, 1 - |.endif - | - |->vm_sfcmpolt: - |.if not FPU - | li CRET2, 0 - |1: - | sll AT, SFARG1HI, 1 - | sll TMP0, SFARG2HI, 1 - | or CRET1, SFARG1LO, SFARG2LO - | or TMP1, AT, TMP0 - | or TMP1, TMP1, CRET1 - | beqz TMP1, >8 // Both args +-0: return 0. - |. sltu CRET1, r0, SFARG1LO - | lui TMP1, 0xffe0 - | addu AT, AT, CRET1 - | sltu CRET1, r0, SFARG2LO - | sltu AT, TMP1, AT - | addu TMP0, TMP0, CRET1 - | sltu TMP0, TMP1, TMP0 - | or TMP1, AT, TMP0 - | bnez TMP1, >9 // Either arg is NaN: return 0 or 1; - |. and AT, SFARG1HI, SFARG2HI - | bltz AT, >5 // Both args negative? - |. nop - | beq SFARG1HI, SFARG2HI, >8 - |. sltu CRET1, SFARG1LO, SFARG2LO - | jr ra - |. slt CRET1, SFARG1HI, SFARG2HI - |5: // Swap conditions if both operands are negative. - | beq SFARG1HI, SFARG2HI, >8 - |. sltu CRET1, SFARG2LO, SFARG1LO - | jr ra - |. slt CRET1, SFARG2HI, SFARG1HI - |8: - | jr ra - |. nop - |9: - | jr ra - |. move CRET1, CRET2 - |.endif - | - |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a. - |// Input: SFARG*, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1. - |->vm_sfcmpolex: - |.if not FPU - | sll AT, SFARG1HI, 1 - | sll TMP0, SFARG2HI, 1 - | or CRET1, SFARG1LO, SFARG2LO - | or TMP1, AT, TMP0 - | or TMP1, TMP1, CRET1 - | beqz TMP1, >8 // Both args +-0: return 1. - |. sltu CRET1, r0, SFARG1LO - | lui TMP1, 0xffe0 - | addu AT, AT, CRET1 - | sltu CRET1, r0, SFARG2LO - | sltu AT, TMP1, AT - | addu TMP0, TMP0, CRET1 - | sltu TMP0, TMP1, TMP0 - | or TMP1, AT, TMP0 - | bnez TMP1, >9 // Either arg is NaN: return 0; - |. and AT, SFARG1HI, SFARG2HI - | xor AT, AT, TMP3 - | bltz AT, >5 // Both args negative? - |. nop - | beq SFARG1HI, SFARG2HI, >6 - |. sltu CRET1, SFARG2LO, SFARG1LO - | jr ra - |. slt CRET1, SFARG2HI, SFARG1HI - |5: // Swap conditions if both operands are negative. - | beq SFARG1HI, SFARG2HI, >6 - |. sltu CRET1, SFARG1LO, SFARG2LO - | slt CRET1, SFARG1HI, SFARG2HI - |6: - | jr ra - |. nop - |8: - | jr ra - |. li CRET1, 1 - |9: - | jr ra - |. li CRET1, 0 - |.endif - | - |.macro sfmin_max, name, intins - |->vm_sf .. name: - |.if JIT and not FPU - | move TMP2, ra - | bal ->vm_sfcmpolt - |. nop - | move TMP0, CRET1 - | move SFRETHI, SFARG1HI - | move SFRETLO, SFARG1LO - | move ra, TMP2 - | intins SFRETHI, SFARG2HI, TMP0 - | jr ra - |. intins SFRETLO, SFARG2LO, TMP0 - |.endif - |.endmacro - | - | sfmin_max min, movz - | sfmin_max max, movn - | - |//----------------------------------------------------------------------- - |//-- Miscellaneous functions -------------------------------------------- - |//----------------------------------------------------------------------- - | - |//----------------------------------------------------------------------- - |//-- FFI helper functions ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |// Handler for callback functions. Callback slot number in r1, g in r2. - |->vm_ffi_callback: - |.if FFI - |.type CTSTATE, CTState, PC - | saveregs - | lw CTSTATE, GL:r2->ctype_state - | addiu DISPATCH, r2, GG_G2DISP - | load_got lj_ccallback_enter - | sw r1, CTSTATE->cb.slot - | sw CARG1, CTSTATE->cb.gpr[0] - | sw CARG2, CTSTATE->cb.gpr[1] - | .FPU sdc1 FARG1, CTSTATE->cb.fpr[0] - | sw CARG3, CTSTATE->cb.gpr[2] - | sw CARG4, CTSTATE->cb.gpr[3] - | .FPU sdc1 FARG2, CTSTATE->cb.fpr[1] - | addiu TMP0, sp, CFRAME_SPACE+16 - | sw TMP0, CTSTATE->cb.stack - | sw r0, SAVE_PC // Any value outside of bytecode is ok. - | move CARG2, sp - | call_intern lj_ccallback_enter // (CTState *cts, void *cf) - |. move CARG1, CTSTATE - | // Returns lua_State *. - | lw BASE, L:CRET1->base - | lw RC, L:CRET1->top - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | move L, CRET1 - | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | lw LFUNC:RB, FRAME_FUNC(BASE) - | .FPU mtc1 TMP3, TOBIT - | li_vmstate INTERP - | li TISNIL, LJ_TNIL - | subu RC, RC, BASE - | st_vmstate - | .FPU cvt.d.s TOBIT, TOBIT - | ins_callt - |.endif - | - |->cont_ffi_callback: // Return from FFI callback. - |.if FFI - | load_got lj_ccallback_leave - | lw CTSTATE, DISPATCH_GL(ctype_state)(DISPATCH) - | sw BASE, L->base - | sw RB, L->top - | sw L, CTSTATE->L - | move CARG2, RA - | call_intern lj_ccallback_leave // (CTState *cts, TValue *o) - |. move CARG1, CTSTATE - | .FPU ldc1 FRET1, CTSTATE->cb.fpr[0] - | lw CRET1, CTSTATE->cb.gpr[0] - | .FPU ldc1 FRET2, CTSTATE->cb.fpr[1] - | b ->vm_leave_unw - |. lw CRET2, CTSTATE->cb.gpr[1] - |.endif - | - |->vm_ffi_call: // Call C function via FFI. - | // Caveat: needs special frame unwinding, see below. - |.if FFI - | .type CCSTATE, CCallState, CARG1 - | lw TMP1, CCSTATE->spadj - | lbu CARG2, CCSTATE->nsp - | move TMP2, sp - | subu sp, sp, TMP1 - | sw ra, -4(TMP2) - | sll CARG2, CARG2, 2 - | sw r16, -8(TMP2) - | sw CCSTATE, -12(TMP2) - | move r16, TMP2 - | addiu TMP1, CCSTATE, offsetof(CCallState, stack) - | addiu TMP2, sp, 16 - | beqz CARG2, >2 - |. addu TMP3, TMP1, CARG2 - |1: - | lw TMP0, 0(TMP1) - | addiu TMP1, TMP1, 4 - | sltu AT, TMP1, TMP3 - | sw TMP0, 0(TMP2) - | bnez AT, <1 - |. addiu TMP2, TMP2, 4 - |2: - | lw CFUNCADDR, CCSTATE->func - | lw CARG2, CCSTATE->gpr[1] - | lw CARG3, CCSTATE->gpr[2] - | lw CARG4, CCSTATE->gpr[3] - | .FPU ldc1 FARG1, CCSTATE->fpr[0] - | .FPU ldc1 FARG2, CCSTATE->fpr[1] - | jalr CFUNCADDR - |. lw CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. - | lw CCSTATE:TMP1, -12(r16) - | lw TMP2, -8(r16) - | lw ra, -4(r16) - | sw CRET1, CCSTATE:TMP1->gpr[0] - | sw CRET2, CCSTATE:TMP1->gpr[1] - |.if FPU - | sdc1 FRET1, CCSTATE:TMP1->fpr[0] - | sdc1 FRET2, CCSTATE:TMP1->fpr[1] - |.else - | sw CARG1, CCSTATE:TMP1->gpr[2] // Soft-float: complex double .im part. - | sw CARG2, CCSTATE:TMP1->gpr[3] - |.endif - | move sp, r16 - | jr ra - |. move r16, TMP2 - |.endif - |// Note: vm_ffi_call must be the last function in this object file! - | - |//----------------------------------------------------------------------- -} - -/* Generate the code for a single instruction. */ -static void build_ins(BuildCtx *ctx, BCOp op, int defop) -{ - int vk = 0; - |=>defop: - - switch (op) { - - /* -- Comparison ops ---------------------------------------------------- */ - - /* Remember: all ops branch for a true comparison, fall through otherwise. */ - - case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: - | // RA = src1*8, RD = src2*8, JMP with RD = target - |.macro bc_comp, FRA, FRD, RAHI, RALO, RDHI, RDLO, movop, fmovop, fcomp, sfcomp - | addu RA, BASE, RA - | addu RD, BASE, RD - | lw RAHI, HI(RA) - | lw RDHI, HI(RD) - | lhu TMP2, OFS_RD(PC) - | addiu PC, PC, 4 - | bne RAHI, TISNUM, >2 - |. lw RALO, LO(RA) - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | lw RDLO, LO(RD) - | bne RDHI, TISNUM, >5 - |. decode_RD4b TMP2 - | slt AT, SFARG1LO, SFARG2LO - | addu TMP2, TMP2, TMP3 - | movop TMP2, r0, AT - |1: - | addu PC, PC, TMP2 - | ins_next - | - |2: // RA is not an integer. - | sltiu AT, RAHI, LJ_TISNUM - | beqz AT, ->vmeta_comp - |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | sltiu AT, RDHI, LJ_TISNUM - |.if FPU - | ldc1 FRA, 0(RA) - | ldc1 FRD, 0(RD) - |.else - | lw RDLO, LO(RD) - |.endif - | beqz AT, >4 - |. decode_RD4b TMP2 - |3: // RA and RD are both numbers. - |.if FPU - | fcomp f20, f22 - | addu TMP2, TMP2, TMP3 - | b <1 - |. fmovop TMP2, r0 - |.else - | bal sfcomp - |. addu TMP2, TMP2, TMP3 - | b <1 - |. movop TMP2, r0, CRET1 - |.endif - | - |4: // RA is a number, RD is not a number. - | bne RDHI, TISNUM, ->vmeta_comp - | // RA is a number, RD is an integer. Convert RD to a number. - |.if FPU - |. lwc1 FRD, LO(RD) - | b <3 - |. cvt.d.w FRD, FRD - |.else - |. nop - |.if "RDHI" == "SFARG1HI" - | bal ->vm_sfi2d_1 - |.else - | bal ->vm_sfi2d_2 - |.endif - |. nop - | b <3 - |. nop - |.endif - | - |5: // RA is an integer, RD is not an integer - | sltiu AT, RDHI, LJ_TISNUM - | beqz AT, ->vmeta_comp - | // RA is an integer, RD is a number. Convert RA to a number. - |.if FPU - |. mtc1 RALO, FRA - | ldc1 FRD, 0(RD) - | b <3 - | cvt.d.w FRA, FRA - |.else - |. nop - |.if "RAHI" == "SFARG1HI" - | bal ->vm_sfi2d_1 - |.else - | bal ->vm_sfi2d_2 - |.endif - |. nop - | b <3 - |. nop - |.endif - |.endmacro - | - if (op == BC_ISLT) { - | bc_comp f20, f22, SFARG1HI, SFARG1LO, SFARG2HI, SFARG2LO, movz, movf, c.olt.d, ->vm_sfcmpolt - } else if (op == BC_ISGE) { - | bc_comp f20, f22, SFARG1HI, SFARG1LO, SFARG2HI, SFARG2LO, movn, movt, c.olt.d, ->vm_sfcmpolt - } else if (op == BC_ISLE) { - | bc_comp f22, f20, SFARG2HI, SFARG2LO, SFARG1HI, SFARG1LO, movn, movt, c.ult.d, ->vm_sfcmpult - } else { - | bc_comp f22, f20, SFARG2HI, SFARG2LO, SFARG1HI, SFARG1LO, movz, movf, c.ult.d, ->vm_sfcmpult - } - break; - - case BC_ISEQV: case BC_ISNEV: - vk = op == BC_ISEQV; - | // RA = src1*8, RD = src2*8, JMP with RD = target - | addu RA, BASE, RA - | addiu PC, PC, 4 - | addu RD, BASE, RD - | lw SFARG1HI, HI(RA) - | lhu TMP2, -4+OFS_RD(PC) - | lw SFARG2HI, HI(RD) - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | sltu AT, TISNUM, SFARG1HI - | sltu TMP0, TISNUM, SFARG2HI - | or AT, AT, TMP0 - if (vk) { - | beqz AT, ->BC_ISEQN_Z - } else { - | beqz AT, ->BC_ISNEN_Z - } - |. decode_RD4b TMP2 - | // Either or both types are not numbers. - | lw SFARG1LO, LO(RA) - | lw SFARG2LO, LO(RD) - | addu TMP2, TMP2, TMP3 - |.if FFI - | li TMP3, LJ_TCDATA - | beq SFARG1HI, TMP3, ->vmeta_equal_cd - |.endif - |. sltiu AT, SFARG1HI, LJ_TISPRI // Not a primitive? - |.if FFI - | beq SFARG2HI, TMP3, ->vmeta_equal_cd - |.endif - |. xor TMP3, SFARG1LO, SFARG2LO // Same tv? - | xor SFARG2HI, SFARG2HI, SFARG1HI // Same type? - | sltiu TMP0, SFARG1HI, LJ_TISTABUD+1 // Table or userdata? - | movz TMP3, r0, AT // Ignore tv if primitive. - | movn TMP0, r0, SFARG2HI // Tab/ud and same type? - | or AT, SFARG2HI, TMP3 // Same type && (pri||same tv). - | movz TMP0, r0, AT - | beqz TMP0, >1 // Done if not tab/ud or not same type or same tv. - if (vk) { - |. movn TMP2, r0, AT - } else { - |. movz TMP2, r0, AT - } - | // Different tables or userdatas. Need to check __eq metamethod. - | // Field metatable must be at same offset for GCtab and GCudata! - | lw TAB:TMP1, TAB:SFARG1LO->metatable - | beqz TAB:TMP1, >1 // No metatable? - |. nop - | lbu TMP1, TAB:TMP1->nomm - | andi TMP1, TMP1, 1<1 // Or 'no __eq' flag set? - |. nop - | b ->vmeta_equal // Handle __eq metamethod. - |. li TMP0, 1-vk // ne = 0 or 1. - |1: - | addu PC, PC, TMP2 - | ins_next - break; - - case BC_ISEQS: case BC_ISNES: - vk = op == BC_ISEQS; - | // RA = src*8, RD = str_const*8 (~), JMP with RD = target - | addu RA, BASE, RA - | addiu PC, PC, 4 - | lw TMP0, HI(RA) - | srl RD, RD, 1 - | lw STR:TMP3, LO(RA) - | subu RD, KBASE, RD - | lhu TMP2, -4+OFS_RD(PC) - |.if FFI - | li AT, LJ_TCDATA - | beq TMP0, AT, ->vmeta_equal_cd - |.endif - |. lw STR:TMP1, -4(RD) // KBASE-4-str_const*4 - | addiu TMP0, TMP0, -LJ_TSTR - | decode_RD4b TMP2 - | xor TMP1, STR:TMP1, STR:TMP3 - | or TMP0, TMP0, TMP1 - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | addu TMP2, TMP2, TMP3 - if (vk) { - | movn TMP2, r0, TMP0 - } else { - | movz TMP2, r0, TMP0 - } - | addu PC, PC, TMP2 - | ins_next - break; - - case BC_ISEQN: case BC_ISNEN: - vk = op == BC_ISEQN; - | // RA = src*8, RD = num_const*8, JMP with RD = target - | addu RA, BASE, RA - | addu RD, KBASE, RD - | lw SFARG1HI, HI(RA) - | lw SFARG2HI, HI(RD) - | lhu TMP2, OFS_RD(PC) - | addiu PC, PC, 4 - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | decode_RD4b TMP2 - if (vk) { - |->BC_ISEQN_Z: - } else { - |->BC_ISNEN_Z: - } - | bne SFARG1HI, TISNUM, >3 - |. lw SFARG1LO, LO(RA) - | lw SFARG2LO, LO(RD) - | addu TMP2, TMP2, TMP3 - | bne SFARG2HI, TISNUM, >6 - |. xor AT, SFARG1LO, SFARG2LO - if (vk) { - | movn TMP2, r0, AT - |1: - | addu PC, PC, TMP2 - |2: - } else { - | movz TMP2, r0, AT - |1: - |2: - | addu PC, PC, TMP2 - } - | ins_next - | - |3: // RA is not an integer. - | sltiu AT, SFARG1HI, LJ_TISNUM - |.if FFI - | beqz AT, >8 - |.else - | beqz AT, <2 - |.endif - |. addu TMP2, TMP2, TMP3 - | sltiu AT, SFARG2HI, LJ_TISNUM - |.if FPU - | ldc1 f20, 0(RA) - | ldc1 f22, 0(RD) - |.endif - | beqz AT, >5 - |. lw SFARG2LO, LO(RD) - |4: // RA and RD are both numbers. - |.if FPU - | c.eq.d f20, f22 - | b <1 - if (vk) { - |. movf TMP2, r0 - } else { - |. movt TMP2, r0 - } - |.else - | bal ->vm_sfcmpeq - |. nop - | b <1 - if (vk) { - |. movz TMP2, r0, CRET1 - } else { - |. movn TMP2, r0, CRET1 - } - |.endif - | - |5: // RA is a number, RD is not a number. - |.if FFI - | bne SFARG2HI, TISNUM, >9 - |.else - | bne SFARG2HI, TISNUM, <2 - |.endif - | // RA is a number, RD is an integer. Convert RD to a number. - |.if FPU - |. lwc1 f22, LO(RD) - | b <4 - |. cvt.d.w f22, f22 - |.else - |. nop - | bal ->vm_sfi2d_2 - |. nop - | b <4 - |. nop - |.endif - | - |6: // RA is an integer, RD is not an integer - | sltiu AT, SFARG2HI, LJ_TISNUM - |.if FFI - | beqz AT, >9 - |.else - | beqz AT, <2 - |.endif - | // RA is an integer, RD is a number. Convert RA to a number. - |.if FPU - |. mtc1 SFARG1LO, f20 - | ldc1 f22, 0(RD) - | b <4 - | cvt.d.w f20, f20 - |.else - |. nop - | bal ->vm_sfi2d_1 - |. nop - | b <4 - |. nop - |.endif - | - |.if FFI - |8: - | li AT, LJ_TCDATA - | bne SFARG1HI, AT, <2 - |. nop - | b ->vmeta_equal_cd - |. nop - |9: - | li AT, LJ_TCDATA - | bne SFARG2HI, AT, <2 - |. nop - | b ->vmeta_equal_cd - |. nop - |.endif - break; - - case BC_ISEQP: case BC_ISNEP: - vk = op == BC_ISEQP; - | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target - | addu RA, BASE, RA - | srl TMP1, RD, 3 - | lw TMP0, HI(RA) - | lhu TMP2, OFS_RD(PC) - | not TMP1, TMP1 - | addiu PC, PC, 4 - |.if FFI - | li AT, LJ_TCDATA - | beq TMP0, AT, ->vmeta_equal_cd - |.endif - |. xor TMP0, TMP0, TMP1 - | decode_RD4b TMP2 - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | addu TMP2, TMP2, TMP3 - if (vk) { - | movn TMP2, r0, TMP0 - } else { - | movz TMP2, r0, TMP0 - } - | addu PC, PC, TMP2 - | ins_next - break; - - /* -- Unary test and copy ops ------------------------------------------- */ - - case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: - | // RA = dst*8 or unused, RD = src*8, JMP with RD = target - | addu RD, BASE, RD - | lhu TMP2, OFS_RD(PC) - | lw TMP0, HI(RD) - | addiu PC, PC, 4 - if (op == BC_IST || op == BC_ISF) { - | sltiu TMP0, TMP0, LJ_TISTRUECOND - | decode_RD4b TMP2 - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | addu TMP2, TMP2, TMP3 - if (op == BC_IST) { - | movz TMP2, r0, TMP0 - } else { - | movn TMP2, r0, TMP0 - } - | addu PC, PC, TMP2 - } else { - | sltiu TMP0, TMP0, LJ_TISTRUECOND - | lw SFRETHI, HI(RD) - | lw SFRETLO, LO(RD) - if (op == BC_ISTC) { - | beqz TMP0, >1 - } else { - | bnez TMP0, >1 - } - |. addu RA, BASE, RA - | decode_RD4b TMP2 - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | addu TMP2, TMP2, TMP3 - | sw SFRETHI, HI(RA) - | sw SFRETLO, LO(RA) - | addu PC, PC, TMP2 - |1: - } - | ins_next - break; - - case BC_ISTYPE: - | // RA = src*8, RD = -type*8 - | addu TMP2, BASE, RA - | srl TMP1, RD, 3 - | lw TMP0, HI(TMP2) - | ins_next1 - | addu AT, TMP0, TMP1 - | bnez AT, ->vmeta_istype - |. ins_next2 - break; - case BC_ISNUM: - | // RA = src*8, RD = -(TISNUM-1)*8 - | addu TMP2, BASE, RA - | lw TMP0, HI(TMP2) - | ins_next1 - | sltiu AT, TMP0, LJ_TISNUM - | beqz AT, ->vmeta_istype - |. ins_next2 - break; - - /* -- Unary ops --------------------------------------------------------- */ - - case BC_MOV: - | // RA = dst*8, RD = src*8 - | addu RD, BASE, RD - | addu RA, BASE, RA - | lw SFRETHI, HI(RD) - | lw SFRETLO, LO(RD) - | ins_next1 - | sw SFRETHI, HI(RA) - | sw SFRETLO, LO(RA) - | ins_next2 - break; - case BC_NOT: - | // RA = dst*8, RD = src*8 - | addu RD, BASE, RD - | addu RA, BASE, RA - | lw TMP0, HI(RD) - | li TMP1, LJ_TFALSE - | sltiu TMP0, TMP0, LJ_TISTRUECOND - | addiu TMP1, TMP0, LJ_TTRUE - | ins_next1 - | sw TMP1, HI(RA) - | ins_next2 - break; - case BC_UNM: - | // RA = dst*8, RD = src*8 - | addu RB, BASE, RD - | lw SFARG1HI, HI(RB) - | addu RA, BASE, RA - | bne SFARG1HI, TISNUM, >2 - |. lw SFARG1LO, LO(RB) - | lui TMP1, 0x8000 - | beq SFARG1LO, TMP1, ->vmeta_unm // Meta handler deals with -2^31. - |. negu SFARG1LO, SFARG1LO - |1: - | ins_next1 - | sw SFARG1HI, HI(RA) - | sw SFARG1LO, LO(RA) - | ins_next2 - |2: - | sltiu AT, SFARG1HI, LJ_TISNUM - | beqz AT, ->vmeta_unm - |. lui TMP1, 0x8000 - | b <1 - |. xor SFARG1HI, SFARG1HI, TMP1 - break; - case BC_LEN: - | // RA = dst*8, RD = src*8 - | addu CARG2, BASE, RD - | addu RA, BASE, RA - | lw TMP0, HI(CARG2) - | lw CARG1, LO(CARG2) - | li AT, LJ_TSTR - | bne TMP0, AT, >2 - |. li AT, LJ_TTAB - | lw CRET1, STR:CARG1->len - |1: - | ins_next1 - | sw TISNUM, HI(RA) - | sw CRET1, LO(RA) - | ins_next2 - |2: - | bne TMP0, AT, ->vmeta_len - |. nop -#if LJ_52 - | lw TAB:TMP2, TAB:CARG1->metatable - | bnez TAB:TMP2, >9 - |. nop - |3: -#endif - |->BC_LEN_Z: - | load_got lj_tab_len - | call_intern lj_tab_len // (GCtab *t) - |. nop - | // Returns uint32_t (but less than 2^31). - | b <1 - |. nop -#if LJ_52 - |9: - | lbu TMP0, TAB:TMP2->nomm - | andi TMP0, TMP0, 1<vmeta_len - |. nop -#endif - break; - - /* -- Binary ops -------------------------------------------------------- */ - - |.macro fpmod, a, b, c - | bal ->vm_floor // floor(b/c) - |. div.d FARG1, b, c - | mul.d a, FRET1, c - | sub.d a, b, a // b - floor(b/c)*c - |.endmacro - - |.macro sfpmod - | addiu sp, sp, -16 - | - | load_got __divdf3 - | sw SFARG1HI, HI(sp) - | sw SFARG1LO, LO(sp) - | sw SFARG2HI, 8+HI(sp) - | call_extern - |. sw SFARG2LO, 8+LO(sp) - | - | load_got floor - | move SFARG1HI, SFRETHI - | call_extern - |. move SFARG1LO, SFRETLO - | - | load_got __muldf3 - | move SFARG1HI, SFRETHI - | move SFARG1LO, SFRETLO - | lw SFARG2HI, 8+HI(sp) - | call_extern - |. lw SFARG2LO, 8+LO(sp) - | - | load_got __subdf3 - | lw SFARG1HI, HI(sp) - | lw SFARG1LO, LO(sp) - | move SFARG2HI, SFRETHI - | call_extern - |. move SFARG2LO, SFRETLO - | - | addiu sp, sp, 16 - |.endmacro - - |.macro ins_arithpre, label - ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); - | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 - ||switch (vk) { - ||case 0: - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | // RA = dst*8, RB = src1*8, RC = num_const*8 - | addu RB, BASE, RB - |.if "label" ~= "none" - | b label - |.endif - |. addu RC, KBASE, RC - || break; - ||case 1: - | decode_RB8a RC, INS - | decode_RB8b RC - | decode_RDtoRC8 RB, RD - | // RA = dst*8, RB = num_const*8, RC = src1*8 - | addu RC, BASE, RC - |.if "label" ~= "none" - | b label - |.endif - |. addu RB, KBASE, RB - || break; - ||default: - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | // RA = dst*8, RB = src1*8, RC = src2*8 - | addu RB, BASE, RB - |.if "label" ~= "none" - | b label - |.endif - |. addu RC, BASE, RC - || break; - ||} - |.endmacro - | - |.macro ins_arith, intins, fpins, fpcall, label - | ins_arithpre none - | - |.if "label" ~= "none" - |label: - |.endif - | - | lw SFARG1HI, HI(RB) - | lw SFARG2HI, HI(RC) - | - |.if "intins" ~= "div" - | - | // Check for two integers. - | lw SFARG1LO, LO(RB) - | bne SFARG1HI, TISNUM, >5 - |. lw SFARG2LO, LO(RC) - | bne SFARG2HI, TISNUM, >5 - | - |.if "intins" == "addu" - |. intins CRET1, SFARG1LO, SFARG2LO - | xor TMP1, CRET1, SFARG1LO // ((y^a) & (y^b)) < 0: overflow. - | xor TMP2, CRET1, SFARG2LO - | and TMP1, TMP1, TMP2 - | bltz TMP1, ->vmeta_arith - |. addu RA, BASE, RA - |.elif "intins" == "subu" - |. intins CRET1, SFARG1LO, SFARG2LO - | xor TMP1, CRET1, SFARG1LO // ((y^a) & (a^b)) < 0: overflow. - | xor TMP2, SFARG1LO, SFARG2LO - | and TMP1, TMP1, TMP2 - | bltz TMP1, ->vmeta_arith - |. addu RA, BASE, RA - |.elif "intins" == "mult" - |. intins SFARG1LO, SFARG2LO - | mflo CRET1 - | mfhi TMP2 - | sra TMP1, CRET1, 31 - | bne TMP1, TMP2, ->vmeta_arith - |. addu RA, BASE, RA - |.else - |. load_got lj_vm_modi - | beqz SFARG2LO, ->vmeta_arith - |. addu RA, BASE, RA - |.if ENDIAN_BE - | move CARG1, SFARG1LO - |.endif - | call_extern - |. move CARG2, SFARG2LO - |.endif - | - | ins_next1 - | sw TISNUM, HI(RA) - | sw CRET1, LO(RA) - |3: - | ins_next2 - | - |.elif not FPU - | - | lw SFARG1LO, LO(RB) - | lw SFARG2LO, LO(RC) - | - |.endif - | - |5: // Check for two numbers. - | .FPU ldc1 f20, 0(RB) - | sltiu AT, SFARG1HI, LJ_TISNUM - | sltiu TMP0, SFARG2HI, LJ_TISNUM - | .FPU ldc1 f22, 0(RC) - | and AT, AT, TMP0 - | beqz AT, ->vmeta_arith - |. addu RA, BASE, RA - | - |.if FPU - | fpins FRET1, f20, f22 - |.elif "fpcall" == "sfpmod" - | sfpmod - |.else - | load_got fpcall - | call_extern - |. nop - |.endif - | - | ins_next1 - |.if not FPU - | sw SFRETHI, HI(RA) - |.endif - |.if "intins" ~= "div" - | b <3 - |.endif - |.if FPU - |. sdc1 FRET1, 0(RA) - |.else - |. sw SFRETLO, LO(RA) - |.endif - |.if "intins" == "div" - | ins_next2 - |.endif - | - |.endmacro - - case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: - | ins_arith addu, add.d, __adddf3, none - break; - case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: - | ins_arith subu, sub.d, __subdf3, none - break; - case BC_MULVN: case BC_MULNV: case BC_MULVV: - | ins_arith mult, mul.d, __muldf3, none - break; - case BC_DIVVN: - | ins_arith div, div.d, __divdf3, ->BC_DIVVN_Z - break; - case BC_DIVNV: case BC_DIVVV: - | ins_arithpre ->BC_DIVVN_Z - break; - case BC_MODVN: - | ins_arith modi, fpmod, sfpmod, ->BC_MODVN_Z - break; - case BC_MODNV: case BC_MODVV: - | ins_arithpre ->BC_MODVN_Z - break; - case BC_POW: - | ins_arithpre none - | lw SFARG1HI, HI(RB) - | lw SFARG2HI, HI(RC) - | sltiu AT, SFARG1HI, LJ_TISNUM - | sltiu TMP0, SFARG2HI, LJ_TISNUM - | and AT, AT, TMP0 - | load_got pow - | beqz AT, ->vmeta_arith - |. addu RA, BASE, RA - |.if FPU - | ldc1 FARG1, 0(RB) - | ldc1 FARG2, 0(RC) - |.else - | lw SFARG1LO, LO(RB) - | lw SFARG2LO, LO(RC) - |.endif - | call_extern - |. nop - | ins_next1 - |.if FPU - | sdc1 FRET1, 0(RA) - |.else - | sw SFRETHI, HI(RA) - | sw SFRETLO, LO(RA) - |.endif - | ins_next2 - break; - - case BC_CAT: - | // RA = dst*8, RB = src_start*8, RC = src_end*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | subu CARG3, RC, RB - | sw BASE, L->base - | addu CARG2, BASE, RC - | move MULTRES, RB - |->BC_CAT_Z: - | load_got lj_meta_cat - | srl CARG3, CARG3, 3 - | sw PC, SAVE_PC - | call_intern lj_meta_cat // (lua_State *L, TValue *top, int left) - |. move CARG1, L - | // Returns NULL (finished) or TValue * (metamethod). - | bnez CRET1, ->vmeta_binop - |. lw BASE, L->base - | addu RB, BASE, MULTRES - | lw SFRETHI, HI(RB) - | lw SFRETLO, LO(RB) - | addu RA, BASE, RA - | ins_next1 - | sw SFRETHI, HI(RA) - | sw SFRETLO, LO(RA) - | ins_next2 - break; - - /* -- Constant ops ------------------------------------------------------ */ - - case BC_KSTR: - | // RA = dst*8, RD = str_const*8 (~) - | srl TMP1, RD, 1 - | subu TMP1, KBASE, TMP1 - | ins_next1 - | lw TMP0, -4(TMP1) // KBASE-4-str_const*4 - | addu RA, BASE, RA - | li TMP2, LJ_TSTR - | sw TMP0, LO(RA) - | sw TMP2, HI(RA) - | ins_next2 - break; - case BC_KCDATA: - |.if FFI - | // RA = dst*8, RD = cdata_const*8 (~) - | srl TMP1, RD, 1 - | subu TMP1, KBASE, TMP1 - | ins_next1 - | lw TMP0, -4(TMP1) // KBASE-4-cdata_const*4 - | addu RA, BASE, RA - | li TMP2, LJ_TCDATA - | sw TMP0, LO(RA) - | sw TMP2, HI(RA) - | ins_next2 - |.endif - break; - case BC_KSHORT: - | // RA = dst*8, RD = int16_literal*8 - | sra RD, INS, 16 - | addu RA, BASE, RA - | ins_next1 - | sw TISNUM, HI(RA) - | sw RD, LO(RA) - | ins_next2 - break; - case BC_KNUM: - | // RA = dst*8, RD = num_const*8 - | addu RD, KBASE, RD - | addu RA, BASE, RA - | lw SFRETHI, HI(RD) - | lw SFRETLO, LO(RD) - | ins_next1 - | sw SFRETHI, HI(RA) - | sw SFRETLO, LO(RA) - | ins_next2 - break; - case BC_KPRI: - | // RA = dst*8, RD = primitive_type*8 (~) - | srl TMP1, RD, 3 - | addu RA, BASE, RA - | not TMP0, TMP1 - | ins_next1 - | sw TMP0, HI(RA) - | ins_next2 - break; - case BC_KNIL: - | // RA = base*8, RD = end*8 - | addu RA, BASE, RA - | sw TISNIL, HI(RA) - | addiu RA, RA, 8 - | addu RD, BASE, RD - |1: - | sw TISNIL, HI(RA) - | slt AT, RA, RD - | bnez AT, <1 - |. addiu RA, RA, 8 - | ins_next_ - break; - - /* -- Upvalue and function ops ------------------------------------------ */ - - case BC_UGET: - | // RA = dst*8, RD = uvnum*8 - | lw LFUNC:RB, FRAME_FUNC(BASE) - | srl RD, RD, 1 - | addu RD, RD, LFUNC:RB - | lw UPVAL:RB, LFUNC:RD->uvptr - | ins_next1 - | lw TMP1, UPVAL:RB->v - | lw SFRETHI, HI(TMP1) - | lw SFRETLO, LO(TMP1) - | addu RA, BASE, RA - | sw SFRETHI, HI(RA) - | sw SFRETLO, LO(RA) - | ins_next2 - break; - case BC_USETV: - | // RA = uvnum*8, RD = src*8 - | lw LFUNC:RB, FRAME_FUNC(BASE) - | srl RA, RA, 1 - | addu RD, BASE, RD - | addu RA, RA, LFUNC:RB - | lw UPVAL:RB, LFUNC:RA->uvptr - | lw SFRETHI, HI(RD) - | lw SFRETLO, LO(RD) - | lbu TMP3, UPVAL:RB->marked - | lw CARG2, UPVAL:RB->v - | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv) - | lbu TMP0, UPVAL:RB->closed - | sw SFRETHI, HI(CARG2) - | sw SFRETLO, LO(CARG2) - | li AT, LJ_GC_BLACK|1 - | or TMP3, TMP3, TMP0 - | beq TMP3, AT, >2 // Upvalue is closed and black? - |. addiu TMP2, SFRETHI, -(LJ_TNUMX+1) - |1: - | ins_next - | - |2: // Check if new value is collectable. - | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1) - | beqz AT, <1 // tvisgcv(v) - |. nop - | lbu TMP3, GCOBJ:SFRETLO->gch.marked - | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v) - | beqz TMP3, <1 - |. load_got lj_gc_barrieruv - | // Crossed a write barrier. Move the barrier forward. - | call_intern lj_gc_barrieruv // (global_State *g, TValue *tv) - |. addiu CARG1, DISPATCH, GG_DISP2G - | b <1 - |. nop - break; - case BC_USETS: - | // RA = uvnum*8, RD = str_const*8 (~) - | lw LFUNC:RB, FRAME_FUNC(BASE) - | srl RA, RA, 1 - | srl TMP1, RD, 1 - | addu RA, RA, LFUNC:RB - | subu TMP1, KBASE, TMP1 - | lw UPVAL:RB, LFUNC:RA->uvptr - | lw STR:TMP1, -4(TMP1) // KBASE-4-str_const*4 - | lbu TMP2, UPVAL:RB->marked - | lw CARG2, UPVAL:RB->v - | lbu TMP3, STR:TMP1->marked - | andi AT, TMP2, LJ_GC_BLACK // isblack(uv) - | lbu TMP2, UPVAL:RB->closed - | li TMP0, LJ_TSTR - | sw STR:TMP1, LO(CARG2) - | bnez AT, >2 - |. sw TMP0, HI(CARG2) - |1: - | ins_next - | - |2: // Check if string is white and ensure upvalue is closed. - | beqz TMP2, <1 - |. andi AT, TMP3, LJ_GC_WHITES // iswhite(str) - | beqz AT, <1 - |. load_got lj_gc_barrieruv - | // Crossed a write barrier. Move the barrier forward. - | call_intern lj_gc_barrieruv // (global_State *g, TValue *tv) - |. addiu CARG1, DISPATCH, GG_DISP2G - | b <1 - |. nop - break; - case BC_USETN: - | // RA = uvnum*8, RD = num_const*8 - | lw LFUNC:RB, FRAME_FUNC(BASE) - | srl RA, RA, 1 - | addu RD, KBASE, RD - | addu RA, RA, LFUNC:RB - | lw UPVAL:RB, LFUNC:RA->uvptr - | lw SFRETHI, HI(RD) - | lw SFRETLO, LO(RD) - | lw TMP1, UPVAL:RB->v - | ins_next1 - | sw SFRETHI, HI(TMP1) - | sw SFRETLO, LO(TMP1) - | ins_next2 - break; - case BC_USETP: - | // RA = uvnum*8, RD = primitive_type*8 (~) - | lw LFUNC:RB, FRAME_FUNC(BASE) - | srl RA, RA, 1 - | srl TMP0, RD, 3 - | addu RA, RA, LFUNC:RB - | not TMP0, TMP0 - | lw UPVAL:RB, LFUNC:RA->uvptr - | ins_next1 - | lw TMP1, UPVAL:RB->v - | sw TMP0, HI(TMP1) - | ins_next2 - break; - - case BC_UCLO: - | // RA = level*8, RD = target - | lw TMP2, L->openupval - | branch_RD // Do this first since RD is not saved. - | load_got lj_func_closeuv - | sw BASE, L->base - | beqz TMP2, >1 - |. move CARG1, L - | call_intern lj_func_closeuv // (lua_State *L, TValue *level) - |. addu CARG2, BASE, RA - | lw BASE, L->base - |1: - | ins_next - break; - - case BC_FNEW: - | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype) - | srl TMP1, RD, 1 - | load_got lj_func_newL_gc - | subu TMP1, KBASE, TMP1 - | lw CARG3, FRAME_FUNC(BASE) - | lw CARG2, -4(TMP1) // KBASE-4-tab_const*4 - | sw BASE, L->base - | sw PC, SAVE_PC - | // (lua_State *L, GCproto *pt, GCfuncL *parent) - | call_intern lj_func_newL_gc - |. move CARG1, L - | // Returns GCfuncL *. - | lw BASE, L->base - | li TMP0, LJ_TFUNC - | ins_next1 - | addu RA, BASE, RA - | sw LFUNC:CRET1, LO(RA) - | sw TMP0, HI(RA) - | ins_next2 - break; - - /* -- Table ops --------------------------------------------------------- */ - - case BC_TNEW: - case BC_TDUP: - | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~) - | lw TMP0, DISPATCH_GL(gc.total)(DISPATCH) - | lw TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) - | sw BASE, L->base - | sw PC, SAVE_PC - | sltu AT, TMP0, TMP1 - | beqz AT, >5 - |1: - if (op == BC_TNEW) { - | load_got lj_tab_new - | srl CARG2, RD, 3 - | andi CARG2, CARG2, 0x7ff - | li TMP0, 0x801 - | addiu AT, CARG2, -0x7ff - | srl CARG3, RD, 14 - | movz CARG2, TMP0, AT - | // (lua_State *L, int32_t asize, uint32_t hbits) - | call_intern lj_tab_new - |. move CARG1, L - | // Returns Table *. - } else { - | load_got lj_tab_dup - | srl TMP1, RD, 1 - | subu TMP1, KBASE, TMP1 - | move CARG1, L - | call_intern lj_tab_dup // (lua_State *L, Table *kt) - |. lw CARG2, -4(TMP1) // KBASE-4-str_const*4 - | // Returns Table *. - } - | lw BASE, L->base - | ins_next1 - | addu RA, BASE, RA - | li TMP0, LJ_TTAB - | sw TAB:CRET1, LO(RA) - | sw TMP0, HI(RA) - | ins_next2 - |5: - | load_got lj_gc_step_fixtop - | move MULTRES, RD - | call_intern lj_gc_step_fixtop // (lua_State *L) - |. move CARG1, L - | b <1 - |. move RD, MULTRES - break; - - case BC_GGET: - | // RA = dst*8, RD = str_const*8 (~) - case BC_GSET: - | // RA = src*8, RD = str_const*8 (~) - | lw LFUNC:TMP2, FRAME_FUNC(BASE) - | srl TMP1, RD, 1 - | subu TMP1, KBASE, TMP1 - | lw TAB:RB, LFUNC:TMP2->env - | lw STR:RC, -4(TMP1) // KBASE-4-str_const*4 - if (op == BC_GGET) { - | b ->BC_TGETS_Z - } else { - | b ->BC_TSETS_Z - } - |. addu RA, BASE, RA - break; - - case BC_TGETV: - | // RA = dst*8, RB = table*8, RC = key*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | addu CARG2, BASE, RB - | addu CARG3, BASE, RC - | lw TMP1, HI(CARG2) - | lw TMP2, HI(CARG3) - | lw TAB:RB, LO(CARG2) - | li AT, LJ_TTAB - | bne TMP1, AT, ->vmeta_tgetv - |. addu RA, BASE, RA - | bne TMP2, TISNUM, >5 - |. lw RC, LO(CARG3) - | lw TMP0, TAB:RB->asize - | lw TMP1, TAB:RB->array - | sltu AT, RC, TMP0 - | sll TMP2, RC, 3 - | beqz AT, ->vmeta_tgetv // Integer key and in array part? - |. addu TMP2, TMP1, TMP2 - | lw SFRETHI, HI(TMP2) - | beq SFRETHI, TISNIL, >2 - |. lw SFRETLO, LO(TMP2) - |1: - | ins_next1 - | sw SFRETHI, HI(RA) - | sw SFRETLO, LO(RA) - | ins_next2 - | - |2: // Check for __index if table value is nil. - | lw TAB:TMP2, TAB:RB->metatable - | beqz TAB:TMP2, <1 // No metatable: done. - |. nop - | lbu TMP0, TAB:TMP2->nomm - | andi TMP0, TMP0, 1<vmeta_tgetv - |. nop - | - |5: - | li AT, LJ_TSTR - | bne TMP2, AT, ->vmeta_tgetv - |. nop - | b ->BC_TGETS_Z // String key? - |. nop - break; - case BC_TGETS: - | // RA = dst*8, RB = table*8, RC = str_const*4 (~) - | decode_RB8a RB, INS - | decode_RB8b RB - | addu CARG2, BASE, RB - | decode_RC4a RC, INS - | lw TMP0, HI(CARG2) - | decode_RC4b RC - | li AT, LJ_TTAB - | lw TAB:RB, LO(CARG2) - | subu CARG3, KBASE, RC - | lw STR:RC, -4(CARG3) // KBASE-4-str_const*4 - | bne TMP0, AT, ->vmeta_tgets1 - |. addu RA, BASE, RA - |->BC_TGETS_Z: - | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 - | lw TMP0, TAB:RB->hmask - | lw TMP1, STR:RC->hash - | lw NODE:TMP2, TAB:RB->node - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask - | sll TMP0, TMP1, 5 - | sll TMP1, TMP1, 3 - | subu TMP1, TMP0, TMP1 - | addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) - |1: - | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2) - | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) - | lw NODE:TMP1, NODE:TMP2->next - | lw SFRETHI, offsetof(Node, val)+HI(NODE:TMP2) - | addiu CARG1, CARG1, -LJ_TSTR - | xor TMP0, TMP0, STR:RC - | or AT, CARG1, TMP0 - | bnez AT, >4 - |. lw TAB:TMP3, TAB:RB->metatable - | beq SFRETHI, TISNIL, >5 // Key found, but nil value? - |. lw SFRETLO, offsetof(Node, val)+LO(NODE:TMP2) - |3: - | ins_next1 - | sw SFRETHI, HI(RA) - | sw SFRETLO, LO(RA) - | ins_next2 - | - |4: // Follow hash chain. - | bnez NODE:TMP1, <1 - |. move NODE:TMP2, NODE:TMP1 - | // End of hash chain: key not found, nil result. - | - |5: // Check for __index if table value is nil. - | beqz TAB:TMP3, <3 // No metatable: done. - |. li SFRETHI, LJ_TNIL - | lbu TMP0, TAB:TMP3->nomm - | andi TMP0, TMP0, 1<vmeta_tgets - |. nop - break; - case BC_TGETB: - | // RA = dst*8, RB = table*8, RC = index*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | addu CARG2, BASE, RB - | decode_RDtoRC8 RC, RD - | lw CARG1, HI(CARG2) - | li AT, LJ_TTAB - | lw TAB:RB, LO(CARG2) - | addu RA, BASE, RA - | bne CARG1, AT, ->vmeta_tgetb - |. srl TMP0, RC, 3 - | lw TMP1, TAB:RB->asize - | lw TMP2, TAB:RB->array - | sltu AT, TMP0, TMP1 - | beqz AT, ->vmeta_tgetb - |. addu RC, TMP2, RC - | lw SFRETHI, HI(RC) - | beq SFRETHI, TISNIL, >5 - |. lw SFRETLO, LO(RC) - |1: - | ins_next1 - | sw SFRETHI, HI(RA) - | sw SFRETLO, LO(RA) - | ins_next2 - | - |5: // Check for __index if table value is nil. - | lw TAB:TMP2, TAB:RB->metatable - | beqz TAB:TMP2, <1 // No metatable: done. - |. nop - | lbu TMP1, TAB:TMP2->nomm - | andi TMP1, TMP1, 1<vmeta_tgetb // Caveat: preserve TMP0 and CARG2! - |. nop - break; - case BC_TGETR: - | // RA = dst*8, RB = table*8, RC = key*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | addu RB, BASE, RB - | addu RC, BASE, RC - | lw TAB:CARG1, LO(RB) - | lw CARG2, LO(RC) - | addu RA, BASE, RA - | lw TMP0, TAB:CARG1->asize - | lw TMP1, TAB:CARG1->array - | sltu AT, CARG2, TMP0 - | sll TMP2, CARG2, 3 - | beqz AT, ->vmeta_tgetr // In array part? - |. addu CRET1, TMP1, TMP2 - | lw SFARG2HI, HI(CRET1) - | lw SFARG2LO, LO(CRET1) - |->BC_TGETR_Z: - | ins_next1 - | sw SFARG2HI, HI(RA) - | sw SFARG2LO, LO(RA) - | ins_next2 - break; - - case BC_TSETV: - | // RA = src*8, RB = table*8, RC = key*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | addu CARG2, BASE, RB - | addu CARG3, BASE, RC - | lw TMP1, HI(CARG2) - | lw TMP2, HI(CARG3) - | lw TAB:RB, LO(CARG2) - | li AT, LJ_TTAB - | bne TMP1, AT, ->vmeta_tsetv - |. addu RA, BASE, RA - | bne TMP2, TISNUM, >5 - |. lw RC, LO(CARG3) - | lw TMP0, TAB:RB->asize - | lw TMP1, TAB:RB->array - | sltu AT, RC, TMP0 - | sll TMP2, RC, 3 - | beqz AT, ->vmeta_tsetv // Integer key and in array part? - |. addu TMP1, TMP1, TMP2 - | lw TMP0, HI(TMP1) - | lbu TMP3, TAB:RB->marked - | lw SFRETHI, HI(RA) - | beq TMP0, TISNIL, >3 - |. lw SFRETLO, LO(RA) - |1: - | andi AT, TMP3, LJ_GC_BLACK // isblack(table) - | sw SFRETHI, HI(TMP1) - | bnez AT, >7 - |. sw SFRETLO, LO(TMP1) - |2: - | ins_next - | - |3: // Check for __newindex if previous value is nil. - | lw TAB:TMP2, TAB:RB->metatable - | beqz TAB:TMP2, <1 // No metatable: done. - |. nop - | lbu TMP2, TAB:TMP2->nomm - | andi TMP2, TMP2, 1<vmeta_tsetv - |. nop - | - |5: - | li AT, LJ_TSTR - | bne TMP2, AT, ->vmeta_tsetv - |. nop - | b ->BC_TSETS_Z // String key? - |. nop - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, TMP3, TMP0, <2 - break; - case BC_TSETS: - | // RA = src*8, RB = table*8, RC = str_const*8 (~) - | decode_RB8a RB, INS - | decode_RB8b RB - | addu CARG2, BASE, RB - | decode_RC4a RC, INS - | lw TMP0, HI(CARG2) - | decode_RC4b RC - | li AT, LJ_TTAB - | subu CARG3, KBASE, RC - | lw TAB:RB, LO(CARG2) - | lw STR:RC, -4(CARG3) // KBASE-4-str_const*4 - | bne TMP0, AT, ->vmeta_tsets1 - |. addu RA, BASE, RA - |->BC_TSETS_Z: - | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8 - | lw TMP0, TAB:RB->hmask - | lw TMP1, STR:RC->hash - | lw NODE:TMP2, TAB:RB->node - | sb r0, TAB:RB->nomm // Clear metamethod cache. - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask - | sll TMP0, TMP1, 5 - | sll TMP1, TMP1, 3 - | subu TMP1, TMP0, TMP1 - | addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) - |.if FPU - | ldc1 f20, 0(RA) - |.else - | lw SFRETHI, HI(RA) - | lw SFRETLO, LO(RA) - |.endif - |1: - | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2) - | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) - | li AT, LJ_TSTR - | lw NODE:TMP1, NODE:TMP2->next - | bne CARG1, AT, >5 - |. lw CARG2, offsetof(Node, val)+HI(NODE:TMP2) - | bne TMP0, STR:RC, >5 - |. lbu TMP3, TAB:RB->marked - | beq CARG2, TISNIL, >4 // Key found, but nil value? - |. lw TAB:TMP0, TAB:RB->metatable - |2: - | andi AT, TMP3, LJ_GC_BLACK // isblack(table) - |.if FPU - | bnez AT, >7 - |. sdc1 f20, NODE:TMP2->val - |.else - | sw SFRETHI, NODE:TMP2->val.u32.hi - | bnez AT, >7 - |. sw SFRETLO, NODE:TMP2->val.u32.lo - |.endif - |3: - | ins_next - | - |4: // Check for __newindex if previous value is nil. - | beqz TAB:TMP0, <2 // No metatable: done. - |. nop - | lbu TMP0, TAB:TMP0->nomm - | andi TMP0, TMP0, 1<vmeta_tsets - |. nop - | - |5: // Follow hash chain. - | bnez NODE:TMP1, <1 - |. move NODE:TMP2, NODE:TMP1 - | // End of hash chain: key not found, add a new one - | - | // But check for __newindex first. - | lw TAB:TMP2, TAB:RB->metatable - | beqz TAB:TMP2, >6 // No metatable: continue. - |. addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | lbu TMP0, TAB:TMP2->nomm - | andi TMP0, TMP0, 1<vmeta_tsets // 'no __newindex' flag NOT set: check. - |. li AT, LJ_TSTR - |6: - | load_got lj_tab_newkey - | sw STR:RC, LO(CARG3) - | sw AT, HI(CARG3) - | sw BASE, L->base - | move CARG2, TAB:RB - | sw PC, SAVE_PC - | call_intern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k - |. move CARG1, L - | // Returns TValue *. - | lw BASE, L->base - |.if FPU - | b <3 // No 2nd write barrier needed. - |. sdc1 f20, 0(CRET1) - |.else - | lw SFARG1HI, HI(RA) - | lw SFARG1LO, LO(RA) - | sw SFARG1HI, HI(CRET1) - | b <3 // No 2nd write barrier needed. - |. sw SFARG1LO, LO(CRET1) - |.endif - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, TMP3, TMP0, <3 - break; - case BC_TSETB: - | // RA = src*8, RB = table*8, RC = index*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | addu CARG2, BASE, RB - | decode_RDtoRC8 RC, RD - | lw CARG1, HI(CARG2) - | li AT, LJ_TTAB - | lw TAB:RB, LO(CARG2) - | addu RA, BASE, RA - | bne CARG1, AT, ->vmeta_tsetb - |. srl TMP0, RC, 3 - | lw TMP1, TAB:RB->asize - | lw TMP2, TAB:RB->array - | sltu AT, TMP0, TMP1 - | beqz AT, ->vmeta_tsetb - |. addu RC, TMP2, RC - | lw TMP1, HI(RC) - | lbu TMP3, TAB:RB->marked - | beq TMP1, TISNIL, >5 - |1: - |. lw SFRETHI, HI(RA) - | lw SFRETLO, LO(RA) - | andi AT, TMP3, LJ_GC_BLACK // isblack(table) - | sw SFRETHI, HI(RC) - | bnez AT, >7 - |. sw SFRETLO, LO(RC) - |2: - | ins_next - | - |5: // Check for __newindex if previous value is nil. - | lw TAB:TMP2, TAB:RB->metatable - | beqz TAB:TMP2, <1 // No metatable: done. - |. nop - | lbu TMP1, TAB:TMP2->nomm - | andi TMP1, TMP1, 1<vmeta_tsetb // Caveat: preserve TMP0 and CARG2! - |. nop - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, TMP3, TMP0, <2 - break; - case BC_TSETR: - | // RA = dst*8, RB = table*8, RC = key*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | addu CARG1, BASE, RB - | addu CARG3, BASE, RC - | lw TAB:CARG2, LO(CARG1) - | lw CARG3, LO(CARG3) - | lbu TMP3, TAB:CARG2->marked - | lw TMP0, TAB:CARG2->asize - | lw TMP1, TAB:CARG2->array - | andi AT, TMP3, LJ_GC_BLACK // isblack(table) - | bnez AT, >7 - |. addu RA, BASE, RA - |2: - | sltu AT, CARG3, TMP0 - | sll TMP2, CARG3, 3 - | beqz AT, ->vmeta_tsetr // In array part? - |. addu CRET1, TMP1, TMP2 - |->BC_TSETR_Z: - | lw SFARG1HI, HI(RA) - | lw SFARG1LO, LO(RA) - | ins_next1 - | sw SFARG1HI, HI(CRET1) - | sw SFARG1LO, LO(CRET1) - | ins_next2 - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP3, TMP0, <2 - break; - - case BC_TSETM: - | // RA = base*8 (table at base-1), RD = num_const*8 (start index) - | addu RA, BASE, RA - |1: - | addu TMP3, KBASE, RD - | lw TAB:CARG2, -8+LO(RA) // Guaranteed to be a table. - | addiu TMP0, MULTRES, -8 - | lw TMP3, LO(TMP3) // Integer constant is in lo-word. - | beqz TMP0, >4 // Nothing to copy? - |. srl CARG3, TMP0, 3 - | addu CARG3, CARG3, TMP3 - | lw TMP2, TAB:CARG2->asize - | sll TMP1, TMP3, 3 - | lbu TMP3, TAB:CARG2->marked - | lw CARG1, TAB:CARG2->array - | sltu AT, TMP2, CARG3 - | bnez AT, >5 - |. addu TMP2, RA, TMP0 - | addu TMP1, TMP1, CARG1 - | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table) - |3: // Copy result slots to table. - | lw SFRETHI, HI(RA) - | lw SFRETLO, LO(RA) - | addiu RA, RA, 8 - | sltu AT, RA, TMP2 - | sw SFRETHI, HI(TMP1) - | sw SFRETLO, LO(TMP1) - | bnez AT, <3 - |. addiu TMP1, TMP1, 8 - | bnez TMP0, >7 - |. nop - |4: - | ins_next - | - |5: // Need to resize array part. - | load_got lj_tab_reasize - | sw BASE, L->base - | sw PC, SAVE_PC - | move BASE, RD - | call_intern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) - |. move CARG1, L - | // Must not reallocate the stack. - | move RD, BASE - | b <1 - |. lw BASE, L->base // Reload BASE for lack of a saved register. - | - |7: // Possible table write barrier for any value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP3, TMP0, <4 - break; - - /* -- Calls and vararg handling ----------------------------------------- */ - - case BC_CALLM: - | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8 - | decode_RDtoRC8 NARGS8:RC, RD - | b ->BC_CALL_Z - |. addu NARGS8:RC, NARGS8:RC, MULTRES - break; - case BC_CALL: - | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8 - | decode_RDtoRC8 NARGS8:RC, RD - |->BC_CALL_Z: - | move TMP2, BASE - | addu BASE, BASE, RA - | li AT, LJ_TFUNC - | lw TMP0, HI(BASE) - | lw LFUNC:RB, LO(BASE) - | addiu BASE, BASE, 8 - | bne TMP0, AT, ->vmeta_call - |. addiu NARGS8:RC, NARGS8:RC, -8 - | ins_call - break; - - case BC_CALLMT: - | // RA = base*8, (RB = 0,) RC = extra_nargs*8 - | addu NARGS8:RD, NARGS8:RD, MULTRES // BC_CALLT gets RC from RD. - | // Fall through. Assumes BC_CALLT follows. - break; - case BC_CALLT: - | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 - | addu RA, BASE, RA - | li AT, LJ_TFUNC - | lw TMP0, HI(RA) - | lw LFUNC:RB, LO(RA) - | move NARGS8:RC, RD - | lw TMP1, FRAME_PC(BASE) - | addiu RA, RA, 8 - | bne TMP0, AT, ->vmeta_callt - |. addiu NARGS8:RC, NARGS8:RC, -8 - |->BC_CALLT_Z: - | andi TMP0, TMP1, FRAME_TYPE // Caveat: preserve TMP0 until the 'or'. - | lbu TMP3, LFUNC:RB->ffid - | bnez TMP0, >7 - |. xori TMP2, TMP1, FRAME_VARG - |1: - | sw LFUNC:RB, FRAME_FUNC(BASE) // Copy function down, but keep PC. - | sltiu AT, TMP3, 2 // (> FF_C) Calling a fast function? - | move TMP2, BASE - | beqz NARGS8:RC, >3 - |. move TMP3, NARGS8:RC - |2: - | lw SFRETHI, HI(RA) - | lw SFRETLO, LO(RA) - | addiu RA, RA, 8 - | addiu TMP3, TMP3, -8 - | sw SFRETHI, HI(TMP2) - | sw SFRETLO, LO(TMP2) - | bnez TMP3, <2 - |. addiu TMP2, TMP2, 8 - |3: - | or TMP0, TMP0, AT - | beqz TMP0, >5 - |. nop - |4: - | ins_callt - | - |5: // Tailcall to a fast function with a Lua frame below. - | lw INS, -4(TMP1) - | decode_RA8a RA, INS - | decode_RA8b RA - | subu TMP1, BASE, RA - | lw LFUNC:TMP1, -8+FRAME_FUNC(TMP1) - | lw TMP1, LFUNC:TMP1->pc - | b <4 - |. lw KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE. - | - |7: // Tailcall from a vararg function. - | andi AT, TMP2, FRAME_TYPEP - | bnez AT, <1 // Vararg frame below? - |. subu TMP2, BASE, TMP2 // Relocate BASE down. - | move BASE, TMP2 - | lw TMP1, FRAME_PC(TMP2) - | b <1 - |. andi TMP0, TMP1, FRAME_TYPE - break; - - case BC_ITERC: - | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8)) - | move TMP2, BASE - | addu BASE, BASE, RA - | li AT, LJ_TFUNC - | lw TMP1, -24+HI(BASE) - | lw LFUNC:RB, -24+LO(BASE) - | lw SFARG1HI, -16+HI(BASE) - | lw SFARG1LO, -16+LO(BASE) - | lw SFARG2HI, -8+HI(BASE) - | lw SFARG2LO, -8+LO(BASE) - | sw TMP1, HI(BASE) // Copy callable. - | sw LFUNC:RB, LO(BASE) - | sw SFARG1HI, 8+HI(BASE) // Copy state. - | sw SFARG1LO, 8+LO(BASE) - | sw SFARG2HI, 16+HI(BASE) // Copy control var. - | sw SFARG2LO, 16+LO(BASE) - | addiu BASE, BASE, 8 - | bne TMP1, AT, ->vmeta_call - |. li NARGS8:RC, 16 // Iterators get 2 arguments. - | ins_call - break; - - case BC_ITERN: - | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) - |.if JIT - | // NYI: add hotloop, record BC_ITERN. - |.endif - | addu RA, BASE, RA - | lw TAB:RB, -16+LO(RA) - | lw RC, -8+LO(RA) // Get index from control var. - | lw TMP0, TAB:RB->asize - | lw TMP1, TAB:RB->array - | addiu PC, PC, 4 - |1: // Traverse array part. - | sltu AT, RC, TMP0 - | beqz AT, >5 // Index points after array part? - |. sll TMP3, RC, 3 - | addu TMP3, TMP1, TMP3 - | lw SFARG1HI, HI(TMP3) - | lw SFARG1LO, LO(TMP3) - | lhu RD, -4+OFS_RD(PC) - | sw TISNUM, HI(RA) - | sw RC, LO(RA) - | beq SFARG1HI, TISNIL, <1 // Skip holes in array part. - |. addiu RC, RC, 1 - | sw SFARG1HI, 8+HI(RA) - | sw SFARG1LO, 8+LO(RA) - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | decode_RD4b RD - | addu RD, RD, TMP3 - | sw RC, -8+LO(RA) // Update control var. - | addu PC, PC, RD - |3: - | ins_next - | - |5: // Traverse hash part. - | lw TMP1, TAB:RB->hmask - | subu RC, RC, TMP0 - | lw TMP2, TAB:RB->node - |6: - | sltu AT, TMP1, RC // End of iteration? Branch to ITERL+1. - | bnez AT, <3 - |. sll TMP3, RC, 5 - | sll RB, RC, 3 - | subu TMP3, TMP3, RB - | addu NODE:TMP3, TMP3, TMP2 - | lw SFARG1HI, NODE:TMP3->val.u32.hi - | lw SFARG1LO, NODE:TMP3->val.u32.lo - | lhu RD, -4+OFS_RD(PC) - | beq SFARG1HI, TISNIL, <6 // Skip holes in hash part. - |. addiu RC, RC, 1 - | lw SFARG2HI, NODE:TMP3->key.u32.hi - | lw SFARG2LO, NODE:TMP3->key.u32.lo - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | sw SFARG1HI, 8+HI(RA) - | sw SFARG1LO, 8+LO(RA) - | addu RC, RC, TMP0 - | decode_RD4b RD - | addu RD, RD, TMP3 - | sw SFARG2HI, HI(RA) - | sw SFARG2LO, LO(RA) - | addu PC, PC, RD - | b <3 - |. sw RC, -8+LO(RA) // Update control var. - break; - - case BC_ISNEXT: - | // RA = base*8, RD = target (points to ITERN) - | addu RA, BASE, RA - | srl TMP0, RD, 1 - | lw CARG1, -24+HI(RA) - | lw CFUNC:CARG2, -24+LO(RA) - | addu TMP0, PC, TMP0 - | lw CARG3, -16+HI(RA) - | lw CARG4, -8+HI(RA) - | li AT, LJ_TFUNC - | bne CARG1, AT, >5 - |. lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535) - | lbu CARG2, CFUNC:CARG2->ffid - | addiu CARG3, CARG3, -LJ_TTAB - | addiu CARG4, CARG4, -LJ_TNIL - | or CARG3, CARG3, CARG4 - | addiu CARG2, CARG2, -FF_next_N - | or CARG2, CARG2, CARG3 - | bnez CARG2, >5 - |. lui TMP1, 0xfffe - | addu PC, TMP0, TMP2 - | ori TMP1, TMP1, 0x7fff - | sw r0, -8+LO(RA) // Initialize control var. - | sw TMP1, -8+HI(RA) - |1: - | ins_next - |5: // Despecialize bytecode if any of the checks fail. - | li TMP3, BC_JMP - | li TMP1, BC_ITERC - | sb TMP3, -4+OFS_OP(PC) - | addu PC, TMP0, TMP2 - | b <1 - |. sb TMP1, OFS_OP(PC) - break; - - case BC_VARG: - | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 - | lw TMP0, FRAME_PC(BASE) - | decode_RDtoRC8 RC, RD - | decode_RB8a RB, INS - | addu RC, BASE, RC - | decode_RB8b RB - | addu RA, BASE, RA - | addiu RC, RC, FRAME_VARG - | addu TMP2, RA, RB - | addiu TMP3, BASE, -8 // TMP3 = vtop - | subu RC, RC, TMP0 // RC = vbase - | // Note: RC may now be even _above_ BASE if nargs was < numparams. - | beqz RB, >5 // Copy all varargs? - |. subu TMP1, TMP3, RC - | addiu TMP2, TMP2, -16 - |1: // Copy vararg slots to destination slots. - | lw CARG1, HI(RC) - | sltu AT, RC, TMP3 - | lw CARG2, LO(RC) - | addiu RC, RC, 8 - | movz CARG1, TISNIL, AT - | sw CARG1, HI(RA) - | sw CARG2, LO(RA) - | sltu AT, RA, TMP2 - | bnez AT, <1 - |. addiu RA, RA, 8 - |3: - | ins_next - | - |5: // Copy all varargs. - | lw TMP0, L->maxstack - | blez TMP1, <3 // No vararg slots? - |. li MULTRES, 8 // MULTRES = (0+1)*8 - | addu TMP2, RA, TMP1 - | sltu AT, TMP0, TMP2 - | bnez AT, >7 - |. addiu MULTRES, TMP1, 8 - |6: - | lw SFRETHI, HI(RC) - | lw SFRETLO, LO(RC) - | addiu RC, RC, 8 - | sw SFRETHI, HI(RA) - | sw SFRETLO, LO(RA) - | sltu AT, RC, TMP3 - | bnez AT, <6 // More vararg slots? - |. addiu RA, RA, 8 - | b <3 - |. nop - | - |7: // Grow stack for varargs. - | load_got lj_state_growstack - | sw RA, L->top - | subu RA, RA, BASE - | sw BASE, L->base - | subu BASE, RC, BASE // Need delta, because BASE may change. - | sw PC, SAVE_PC - | srl CARG2, TMP1, 3 - | call_intern lj_state_growstack // (lua_State *L, int n) - |. move CARG1, L - | move RC, BASE - | lw BASE, L->base - | addu RA, BASE, RA - | addu RC, BASE, RC - | b <6 - |. addiu TMP3, BASE, -8 - break; - - /* -- Returns ----------------------------------------------------------- */ - - case BC_RETM: - | // RA = results*8, RD = extra_nresults*8 - | addu RD, RD, MULTRES // MULTRES >= 8, so RD >= 8. - | // Fall through. Assumes BC_RET follows. - break; - - case BC_RET: - | // RA = results*8, RD = (nresults+1)*8 - | lw PC, FRAME_PC(BASE) - | addu RA, BASE, RA - | move MULTRES, RD - |1: - | andi TMP0, PC, FRAME_TYPE - | bnez TMP0, ->BC_RETV_Z - |. xori TMP1, PC, FRAME_VARG - | - |->BC_RET_Z: - | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return - | lw INS, -4(PC) - | addiu TMP2, BASE, -8 - | addiu RC, RD, -8 - | decode_RA8a TMP0, INS - | decode_RB8a RB, INS - | decode_RA8b TMP0 - | decode_RB8b RB - | addu TMP3, TMP2, RB - | beqz RC, >3 - |. subu BASE, TMP2, TMP0 - |2: - | lw SFRETHI, HI(RA) - | lw SFRETLO, LO(RA) - | addiu RA, RA, 8 - | addiu RC, RC, -8 - | sw SFRETHI, HI(TMP2) - | sw SFRETLO, LO(TMP2) - | bnez RC, <2 - |. addiu TMP2, TMP2, 8 - |3: - | addiu TMP3, TMP3, -8 - |5: - | sltu AT, TMP2, TMP3 - | bnez AT, >6 - |. lw LFUNC:TMP1, FRAME_FUNC(BASE) - | ins_next1 - | lw TMP1, LFUNC:TMP1->pc - | lw KBASE, PC2PROTO(k)(TMP1) - | ins_next2 - | - |6: // Fill up results with nil. - | sw TISNIL, HI(TMP2) - | b <5 - |. addiu TMP2, TMP2, 8 - | - |->BC_RETV_Z: // Non-standard return case. - | andi TMP2, TMP1, FRAME_TYPEP - | bnez TMP2, ->vm_return - |. nop - | // Return from vararg function: relocate BASE down. - | subu BASE, BASE, TMP1 - | b <1 - |. lw PC, FRAME_PC(BASE) - break; - - case BC_RET0: case BC_RET1: - | // RA = results*8, RD = (nresults+1)*8 - | lw PC, FRAME_PC(BASE) - | addu RA, BASE, RA - | move MULTRES, RD - | andi TMP0, PC, FRAME_TYPE - | bnez TMP0, ->BC_RETV_Z - |. xori TMP1, PC, FRAME_VARG - | - | lw INS, -4(PC) - | addiu TMP2, BASE, -8 - if (op == BC_RET1) { - | lw SFRETHI, HI(RA) - | lw SFRETLO, LO(RA) - } - | decode_RB8a RB, INS - | decode_RA8a RA, INS - | decode_RB8b RB - | decode_RA8b RA - if (op == BC_RET1) { - | sw SFRETHI, HI(TMP2) - | sw SFRETLO, LO(TMP2) - } - | subu BASE, TMP2, RA - |5: - | sltu AT, RD, RB - | bnez AT, >6 - |. lw LFUNC:TMP1, FRAME_FUNC(BASE) - | ins_next1 - | lw TMP1, LFUNC:TMP1->pc - | lw KBASE, PC2PROTO(k)(TMP1) - | ins_next2 - | - |6: // Fill up results with nil. - | addiu TMP2, TMP2, 8 - | addiu RD, RD, 8 - | b <5 - if (op == BC_RET1) { - |. sw TISNIL, HI(TMP2) - } else { - |. sw TISNIL, -8+HI(TMP2) - } - break; - - /* -- Loops and branches ------------------------------------------------ */ - - case BC_FORL: - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_IFORL follows. - break; - - case BC_JFORI: - case BC_JFORL: -#if !LJ_HASJIT - break; -#endif - case BC_FORI: - case BC_IFORL: - | // RA = base*8, RD = target (after end of loop or start of loop) - vk = (op == BC_IFORL || op == BC_JFORL); - | addu RA, BASE, RA - | lw SFARG1HI, FORL_IDX*8+HI(RA) - | lw SFARG1LO, FORL_IDX*8+LO(RA) - if (op != BC_JFORL) { - | srl RD, RD, 1 - | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535) - | addu TMP2, RD, TMP2 - } - if (!vk) { - | lw SFARG2HI, FORL_STOP*8+HI(RA) - | lw SFARG2LO, FORL_STOP*8+LO(RA) - | bne SFARG1HI, TISNUM, >5 - |. lw SFRETHI, FORL_STEP*8+HI(RA) - | xor AT, SFARG2HI, TISNUM - | lw SFRETLO, FORL_STEP*8+LO(RA) - | xor TMP0, SFRETHI, TISNUM - | or AT, AT, TMP0 - | bnez AT, ->vmeta_for - |. slt AT, SFRETLO, r0 - | slt CRET1, SFARG2LO, SFARG1LO - | slt TMP1, SFARG1LO, SFARG2LO - | movn CRET1, TMP1, AT - } else { - | bne SFARG1HI, TISNUM, >5 - |. lw SFARG2LO, FORL_STEP*8+LO(RA) - | lw SFRETLO, FORL_STOP*8+LO(RA) - | move TMP3, SFARG1LO - | addu SFARG1LO, SFARG1LO, SFARG2LO - | xor TMP0, SFARG1LO, TMP3 - | xor TMP1, SFARG1LO, SFARG2LO - | and TMP0, TMP0, TMP1 - | slt TMP1, SFARG1LO, SFRETLO - | slt CRET1, SFRETLO, SFARG1LO - | slt AT, SFARG2LO, r0 - | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow. - | movn CRET1, TMP1, AT - | or CRET1, CRET1, TMP0 - } - |1: - if (op == BC_FORI) { - | movz TMP2, r0, CRET1 - | addu PC, PC, TMP2 - } else if (op == BC_JFORI) { - | addu PC, PC, TMP2 - | lhu RD, -4+OFS_RD(PC) - } else if (op == BC_IFORL) { - | movn TMP2, r0, CRET1 - | addu PC, PC, TMP2 - } - if (vk) { - | sw SFARG1HI, FORL_IDX*8+HI(RA) - | sw SFARG1LO, FORL_IDX*8+LO(RA) - } - | ins_next1 - | sw SFARG1HI, FORL_EXT*8+HI(RA) - | sw SFARG1LO, FORL_EXT*8+LO(RA) - |2: - if (op == BC_JFORI) { - | beqz CRET1, =>BC_JLOOP - |. decode_RD8b RD - } else if (op == BC_JFORL) { - | beqz CRET1, =>BC_JLOOP - } - | ins_next2 - | - |5: // FP loop. - |.if FPU - if (!vk) { - | ldc1 f0, FORL_IDX*8(RA) - | ldc1 f2, FORL_STOP*8(RA) - | sltiu TMP0, SFARG1HI, LJ_TISNUM - | sltiu TMP1, SFARG2HI, LJ_TISNUM - | sltiu AT, SFRETHI, LJ_TISNUM - | and TMP0, TMP0, TMP1 - | and AT, AT, TMP0 - | beqz AT, ->vmeta_for - |. slt TMP3, SFRETHI, r0 - | c.ole.d 0, f0, f2 - | c.ole.d 1, f2, f0 - | li CRET1, 1 - | movt CRET1, r0, 0 - | movt AT, r0, 1 - | b <1 - |. movn CRET1, AT, TMP3 - } else { - | ldc1 f0, FORL_IDX*8(RA) - | ldc1 f4, FORL_STEP*8(RA) - | ldc1 f2, FORL_STOP*8(RA) - | lw SFARG2HI, FORL_STEP*8+HI(RA) - | add.d f0, f0, f4 - | c.ole.d 0, f0, f2 - | c.ole.d 1, f2, f0 - | slt TMP3, SFARG2HI, r0 - | li CRET1, 1 - | li AT, 1 - | movt CRET1, r0, 0 - | movt AT, r0, 1 - | movn CRET1, AT, TMP3 - if (op == BC_IFORL) { - | movn TMP2, r0, CRET1 - | addu PC, PC, TMP2 - } - | sdc1 f0, FORL_IDX*8(RA) - | ins_next1 - | b <2 - |. sdc1 f0, FORL_EXT*8(RA) - } - |.else - if (!vk) { - | sltiu TMP0, SFARG1HI, LJ_TISNUM - | sltiu TMP1, SFARG2HI, LJ_TISNUM - | sltiu AT, SFRETHI, LJ_TISNUM - | and TMP0, TMP0, TMP1 - | and AT, AT, TMP0 - | beqz AT, ->vmeta_for - |. nop - | bal ->vm_sfcmpolex - |. move TMP3, SFRETHI - | b <1 - |. nop - } else { - | lw SFARG2HI, FORL_STEP*8+HI(RA) - | load_got __adddf3 - | call_extern - |. sw TMP2, ARG5 - | lw SFARG2HI, FORL_STOP*8+HI(RA) - | lw SFARG2LO, FORL_STOP*8+LO(RA) - | move SFARG1HI, SFRETHI - | move SFARG1LO, SFRETLO - | bal ->vm_sfcmpolex - |. lw TMP3, FORL_STEP*8+HI(RA) - if ( op == BC_JFORL ) { - | lhu RD, -4+OFS_RD(PC) - | lw TMP2, ARG5 - | b <1 - |. decode_RD8b RD - } else { - | b <1 - |. lw TMP2, ARG5 - } - } - |.endif - break; - - case BC_ITERL: - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_IITERL follows. - break; - - case BC_JITERL: -#if !LJ_HASJIT - break; -#endif - case BC_IITERL: - | // RA = base*8, RD = target - | addu RA, BASE, RA - | lw TMP1, HI(RA) - | beq TMP1, TISNIL, >1 // Stop if iterator returned nil. - |. lw TMP2, LO(RA) - if (op == BC_JITERL) { - | sw TMP1, -8+HI(RA) - | b =>BC_JLOOP - |. sw TMP2, -8+LO(RA) - } else { - | branch_RD // Otherwise save control var + branch. - | sw TMP1, -8+HI(RA) - | sw TMP2, -8+LO(RA) - } - |1: - | ins_next - break; - - case BC_LOOP: - | // RA = base*8, RD = target (loop extent) - | // Note: RA/RD is only used by trace recorder to determine scope/extent - | // This opcode does NOT jump, it's only purpose is to detect a hot loop. - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_ILOOP follows. - break; - - case BC_ILOOP: - | // RA = base*8, RD = target (loop extent) - | ins_next - break; - - case BC_JLOOP: - |.if JIT - | // RA = base*8 (ignored), RD = traceno*8 - | lw TMP1, DISPATCH_J(trace)(DISPATCH) - | srl RD, RD, 1 - | li AT, 0 - | addu TMP1, TMP1, RD - | // Traces on MIPS don't store the trace number, so use 0. - | sw AT, DISPATCH_GL(vmstate)(DISPATCH) - | lw TRACE:TMP2, 0(TMP1) - | sw BASE, DISPATCH_GL(jit_base)(DISPATCH) - | lw TMP2, TRACE:TMP2->mcode - | sw L, DISPATCH_GL(tmpbuf.L)(DISPATCH) - | jr TMP2 - |. addiu JGL, DISPATCH, GG_DISP2G+32768 - |.endif - break; - - case BC_JMP: - | // RA = base*8 (only used by trace recorder), RD = target - | branch_RD - | ins_next - break; - - /* -- Function headers -------------------------------------------------- */ - - case BC_FUNCF: - |.if JIT - | hotcall - |.endif - case BC_FUNCV: /* NYI: compiled vararg functions. */ - | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. - break; - - case BC_JFUNCF: -#if !LJ_HASJIT - break; -#endif - case BC_IFUNCF: - | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 - | lw TMP2, L->maxstack - | lbu TMP1, -4+PC2PROTO(numparams)(PC) - | lw KBASE, -4+PC2PROTO(k)(PC) - | sltu AT, TMP2, RA - | bnez AT, ->vm_growstack_l - |. sll TMP1, TMP1, 3 - if (op != BC_JFUNCF) { - | ins_next1 - } - |2: - | sltu AT, NARGS8:RC, TMP1 // Check for missing parameters. - | bnez AT, >3 - |. addu AT, BASE, NARGS8:RC - if (op == BC_JFUNCF) { - | decode_RD8a RD, INS - | b =>BC_JLOOP - |. decode_RD8b RD - } else { - | ins_next2 - } - | - |3: // Clear missing parameters. - | sw TISNIL, HI(AT) - | b <2 - |. addiu NARGS8:RC, NARGS8:RC, 8 - break; - - case BC_JFUNCV: -#if !LJ_HASJIT - break; -#endif - | NYI // NYI: compiled vararg functions - break; /* NYI: compiled vararg functions. */ - - case BC_IFUNCV: - | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 - | addu TMP1, BASE, RC - | lw TMP2, L->maxstack - | addu TMP0, RA, RC - | sw LFUNC:RB, LO(TMP1) // Store copy of LFUNC. - | addiu TMP3, RC, 8+FRAME_VARG - | sltu AT, TMP0, TMP2 - | lw KBASE, -4+PC2PROTO(k)(PC) - | beqz AT, ->vm_growstack_l - |. sw TMP3, HI(TMP1) // Store delta + FRAME_VARG. - | lbu TMP2, -4+PC2PROTO(numparams)(PC) - | move RA, BASE - | move RC, TMP1 - | ins_next1 - | beqz TMP2, >3 - |. addiu BASE, TMP1, 8 - |1: - | lw TMP0, HI(RA) - | lw TMP3, LO(RA) - | sltu AT, RA, RC // Less args than parameters? - | move CARG1, TMP0 - | movz TMP0, TISNIL, AT // Clear missing parameters. - | movn CARG1, TISNIL, AT // Clear old fixarg slot (help the GC). - | sw TMP3, 8+LO(TMP1) - | addiu TMP2, TMP2, -1 - | sw TMP0, 8+HI(TMP1) - | addiu TMP1, TMP1, 8 - | sw CARG1, HI(RA) - | bnez TMP2, <1 - |. addiu RA, RA, 8 - |3: - | ins_next2 - break; - - case BC_FUNCC: - case BC_FUNCCW: - | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8 - if (op == BC_FUNCC) { - | lw CFUNCADDR, CFUNC:RB->f - } else { - | lw CFUNCADDR, DISPATCH_GL(wrapf)(DISPATCH) - } - | addu TMP1, RA, NARGS8:RC - | lw TMP2, L->maxstack - | addu RC, BASE, NARGS8:RC - | sw BASE, L->base - | sltu AT, TMP2, TMP1 - | sw RC, L->top - | li_vmstate C - if (op == BC_FUNCCW) { - | lw CARG2, CFUNC:RB->f - } - | bnez AT, ->vm_growstack_c // Need to grow stack. - |. move CARG1, L - | jalr CFUNCADDR // (lua_State *L [, lua_CFunction f]) - |. st_vmstate - | // Returns nresults. - | lw BASE, L->base - | sll RD, CRET1, 3 - | lw TMP1, L->top - | li_vmstate INTERP - | lw PC, FRAME_PC(BASE) // Fetch PC of caller. - | subu RA, TMP1, RD // RA = L->top - nresults*8 - | sw L, DISPATCH_GL(cur_L)(DISPATCH) - | b ->vm_returnc - |. st_vmstate - break; - - /* ---------------------------------------------------------------------- */ - - default: - fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); - exit(2); - break; - } -} - -static int build_backend(BuildCtx *ctx) -{ - int op; - - dasm_growpc(Dst, BC__MAX); - - build_subroutines(ctx); - - |.code_op - for (op = 0; op < BC__MAX; op++) - build_ins(ctx, (BCOp)op, op); - - return BC__MAX; -} - -/* Emit pseudo frame-info for all assembler functions. */ -static void emit_asm_debug(BuildCtx *ctx) -{ - int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); - int i; - switch (ctx->mode) { - case BUILD_elfasm: - fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); - fprintf(ctx->fp, - ".Lframe0:\n" - "\t.4byte .LECIE0-.LSCIE0\n" - ".LSCIE0:\n" - "\t.4byte 0xffffffff\n" - "\t.byte 0x1\n" - "\t.string \"\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -4\n" - "\t.byte 31\n" - "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 0\n" - "\t.align 2\n" - ".LECIE0:\n\n"); - fprintf(ctx->fp, - ".LSFDE0:\n" - "\t.4byte .LEFDE0-.LASFDE0\n" - ".LASFDE0:\n" - "\t.4byte .Lframe0\n" - "\t.4byte .Lbegin\n" - "\t.4byte %d\n" - "\t.byte 0xe\n\t.uleb128 %d\n" - "\t.byte 0x9f\n\t.sleb128 1\n" - "\t.byte 0x9e\n\t.sleb128 2\n", - fcofs, CFRAME_SIZE); - for (i = 23; i >= 16; i--) - fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i); -#if !LJ_SOFTFP - for (i = 30; i >= 20; i -= 2) - fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i); -#endif - fprintf(ctx->fp, - "\t.align 2\n" - ".LEFDE0:\n\n"); -#if LJ_HASFFI - fprintf(ctx->fp, - ".LSFDE1:\n" - "\t.4byte .LEFDE1-.LASFDE1\n" - ".LASFDE1:\n" - "\t.4byte .Lframe0\n" - "\t.4byte lj_vm_ffi_call\n" - "\t.4byte %d\n" - "\t.byte 0x9f\n\t.uleb128 1\n" - "\t.byte 0x90\n\t.uleb128 2\n" - "\t.byte 0xd\n\t.uleb128 0x10\n" - "\t.align 2\n" - ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); -#endif -#if !LJ_NO_UNWIND - fprintf(ctx->fp, "\t.section .eh_frame,\"aw\",@progbits\n"); - fprintf(ctx->fp, - "\t.globl lj_err_unwind_dwarf\n" - ".Lframe1:\n" - "\t.4byte .LECIE1-.LSCIE1\n" - ".LSCIE1:\n" - "\t.4byte 0\n" - "\t.byte 0x1\n" - "\t.string \"zPR\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -4\n" - "\t.byte 31\n" - "\t.uleb128 6\n" /* augmentation length */ - "\t.byte 0\n" - "\t.4byte lj_err_unwind_dwarf\n" - "\t.byte 0\n" - "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 0\n" - "\t.align 2\n" - ".LECIE1:\n\n"); - fprintf(ctx->fp, - ".LSFDE2:\n" - "\t.4byte .LEFDE2-.LASFDE2\n" - ".LASFDE2:\n" - "\t.4byte .LASFDE2-.Lframe1\n" - "\t.4byte .Lbegin\n" - "\t.4byte %d\n" - "\t.uleb128 0\n" /* augmentation length */ - "\t.byte 0xe\n\t.uleb128 %d\n" - "\t.byte 0x9f\n\t.sleb128 1\n" - "\t.byte 0x9e\n\t.sleb128 2\n", - fcofs, CFRAME_SIZE); - for (i = 23; i >= 16; i--) - fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i); -#if !LJ_SOFTFP - for (i = 30; i >= 20; i -= 2) - fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i); -#endif - fprintf(ctx->fp, - "\t.align 2\n" - ".LEFDE2:\n\n"); -#if LJ_HASFFI - fprintf(ctx->fp, - ".Lframe2:\n" - "\t.4byte .LECIE2-.LSCIE2\n" - ".LSCIE2:\n" - "\t.4byte 0\n" - "\t.byte 0x1\n" - "\t.string \"zR\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -4\n" - "\t.byte 31\n" - "\t.uleb128 1\n" /* augmentation length */ - "\t.byte 0\n" - "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 0\n" - "\t.align 2\n" - ".LECIE2:\n\n"); - fprintf(ctx->fp, - ".LSFDE3:\n" - "\t.4byte .LEFDE3-.LASFDE3\n" - ".LASFDE3:\n" - "\t.4byte .LASFDE3-.Lframe2\n" - "\t.4byte lj_vm_ffi_call\n" - "\t.4byte %d\n" - "\t.uleb128 0\n" /* augmentation length */ - "\t.byte 0x9f\n\t.uleb128 1\n" - "\t.byte 0x90\n\t.uleb128 2\n" - "\t.byte 0xd\n\t.uleb128 0x10\n" - "\t.align 2\n" - ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); -#endif -#endif - break; - default: - break; - } -} - diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc deleted file mode 100644 index f0c22a74df..0000000000 --- a/src/vm_mips64.dasc +++ /dev/null @@ -1,5062 +0,0 @@ -|// Low-level VM code for MIPS64 CPUs. -|// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -|// -|// Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. -|// Sponsored by Cisco Systems, Inc. -| -|.arch mips64 -|.section code_op, code_sub -| -|.actionlist build_actionlist -|.globals GLOB_ -|.globalnames globnames -|.externnames extnames -| -|// Note: The ragged indentation of the instructions is intentional. -|// The starting columns indicate data dependencies. -| -|//----------------------------------------------------------------------- -| -|// Fixed register assignments for the interpreter. -|// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra -| -|.macro .FPU, a, b -|.if FPU -| a, b -|.endif -|.endmacro -| -|// The following must be C callee-save (but BASE is often refetched). -|.define BASE, r16 // Base of current Lua stack frame. -|.define KBASE, r17 // Constants of current Lua function. -|.define PC, r18 // Next PC. -|.define DISPATCH, r19 // Opcode dispatch table. -|.define LREG, r20 // Register holding lua_State (also in SAVE_L). -|.define MULTRES, r21 // Size of multi-result: (nresults+1)*8. -| -|.define JGL, r30 // On-trace: global_State + 32768. -| -|// Constants for type-comparisons, stores and conversions. C callee-save. -|.define TISNIL, r30 -|.define TISNUM, r22 -|.if FPU -|.define TOBIT, f30 // 2^52 + 2^51. -|.endif -| -|// The following temporaries are not saved across C calls, except for RA. -|.define RA, r23 // Callee-save. -|.define RB, r8 -|.define RC, r9 -|.define RD, r10 -|.define INS, r11 -| -|.define AT, r1 // Assembler temporary. -|.define TMP0, r12 -|.define TMP1, r13 -|.define TMP2, r14 -|.define TMP3, r15 -| -|// MIPS n64 calling convention. -|.define CFUNCADDR, r25 -|.define CARG1, r4 -|.define CARG2, r5 -|.define CARG3, r6 -|.define CARG4, r7 -|.define CARG5, r8 -|.define CARG6, r9 -|.define CARG7, r10 -|.define CARG8, r11 -| -|.define CRET1, r2 -|.define CRET2, r3 -| -|.if FPU -|.define FARG1, f12 -|.define FARG2, f13 -|.define FARG3, f14 -|.define FARG4, f15 -|.define FARG5, f16 -|.define FARG6, f17 -|.define FARG7, f18 -|.define FARG8, f19 -| -|.define FRET1, f0 -|.define FRET2, f2 -|.endif -| -|// Stack layout while in interpreter. Must match with lj_frame.h. -|.if FPU // MIPS64 hard-float. -| -|.define CFRAME_SPACE, 192 // Delta for sp. -| -|//----- 16 byte aligned, <-- sp entering interpreter -|.define SAVE_ERRF, 188(sp) // 32 bit values. -|.define SAVE_NRES, 184(sp) -|.define SAVE_CFRAME, 176(sp) // 64 bit values. -|.define SAVE_L, 168(sp) -|.define SAVE_PC, 160(sp) -|//----- 16 byte aligned -|.define SAVE_GPR_, 80 // .. 80+10*8: 64 bit GPR saves. -|.define SAVE_FPR_, 16 // .. 16+8*8: 64 bit FPR saves. -| -|.else // MIPS64 soft-float -| -|.define CFRAME_SPACE, 128 // Delta for sp. -| -|//----- 16 byte aligned, <-- sp entering interpreter -|.define SAVE_ERRF, 124(sp) // 32 bit values. -|.define SAVE_NRES, 120(sp) -|.define SAVE_CFRAME, 112(sp) // 64 bit values. -|.define SAVE_L, 104(sp) -|.define SAVE_PC, 96(sp) -|//----- 16 byte aligned -|.define SAVE_GPR_, 16 // .. 16+10*8: 64 bit GPR saves. -| -|.endif -| -|.define TMPX, 8(sp) // Unused by interpreter, temp for JIT code. -|.define TMPD, 0(sp) -|//----- 16 byte aligned -| -|.define TMPD_OFS, 0 -| -|.define SAVE_MULTRES, TMPD -| -|//----------------------------------------------------------------------- -| -|.macro saveregs -| daddiu sp, sp, -CFRAME_SPACE -| sd ra, SAVE_GPR_+9*8(sp) -| sd r30, SAVE_GPR_+8*8(sp) -| .FPU sdc1 f31, SAVE_FPR_+7*8(sp) -| sd r23, SAVE_GPR_+7*8(sp) -| .FPU sdc1 f30, SAVE_FPR_+6*8(sp) -| sd r22, SAVE_GPR_+6*8(sp) -| .FPU sdc1 f29, SAVE_FPR_+5*8(sp) -| sd r21, SAVE_GPR_+5*8(sp) -| .FPU sdc1 f28, SAVE_FPR_+4*8(sp) -| sd r20, SAVE_GPR_+4*8(sp) -| .FPU sdc1 f27, SAVE_FPR_+3*8(sp) -| sd r19, SAVE_GPR_+3*8(sp) -| .FPU sdc1 f26, SAVE_FPR_+2*8(sp) -| sd r18, SAVE_GPR_+2*8(sp) -| .FPU sdc1 f25, SAVE_FPR_+1*8(sp) -| sd r17, SAVE_GPR_+1*8(sp) -| .FPU sdc1 f24, SAVE_FPR_+0*8(sp) -| sd r16, SAVE_GPR_+0*8(sp) -|.endmacro -| -|.macro restoreregs_ret -| ld ra, SAVE_GPR_+9*8(sp) -| ld r30, SAVE_GPR_+8*8(sp) -| ld r23, SAVE_GPR_+7*8(sp) -| .FPU ldc1 f31, SAVE_FPR_+7*8(sp) -| ld r22, SAVE_GPR_+6*8(sp) -| .FPU ldc1 f30, SAVE_FPR_+6*8(sp) -| ld r21, SAVE_GPR_+5*8(sp) -| .FPU ldc1 f29, SAVE_FPR_+5*8(sp) -| ld r20, SAVE_GPR_+4*8(sp) -| .FPU ldc1 f28, SAVE_FPR_+4*8(sp) -| ld r19, SAVE_GPR_+3*8(sp) -| .FPU ldc1 f27, SAVE_FPR_+3*8(sp) -| ld r18, SAVE_GPR_+2*8(sp) -| .FPU ldc1 f26, SAVE_FPR_+2*8(sp) -| ld r17, SAVE_GPR_+1*8(sp) -| .FPU ldc1 f25, SAVE_FPR_+1*8(sp) -| ld r16, SAVE_GPR_+0*8(sp) -| .FPU ldc1 f24, SAVE_FPR_+0*8(sp) -| jr ra -| daddiu sp, sp, CFRAME_SPACE -|.endmacro -| -|// Type definitions. Some of these are only used for documentation. -|.type L, lua_State, LREG -|.type GL, global_State -|.type TVALUE, TValue -|.type GCOBJ, GCobj -|.type STR, GCstr -|.type TAB, GCtab -|.type LFUNC, GCfuncL -|.type CFUNC, GCfuncC -|.type PROTO, GCproto -|.type UPVAL, GCupval -|.type NODE, Node -|.type NARGS8, int -|.type TRACE, GCtrace -|.type SBUF, SBuf -| -|//----------------------------------------------------------------------- -| -|// Trap for not-yet-implemented parts. -|.macro NYI; .long 0xf0f0f0f0; .endmacro -| -|// Macros to mark delay slots. -|.macro ., a; a; .endmacro -|.macro ., a,b; a,b; .endmacro -|.macro ., a,b,c; a,b,c; .endmacro -|.macro ., a,b,c,d; a,b,c,d; .endmacro -| -|.define FRAME_PC, -8 -|.define FRAME_FUNC, -16 -| -|//----------------------------------------------------------------------- -| -|// Endian-specific defines. -|.if ENDIAN_LE -|.define HI, 4 -|.define LO, 0 -|.define OFS_RD, 2 -|.define OFS_RA, 1 -|.define OFS_OP, 0 -|.else -|.define HI, 0 -|.define LO, 4 -|.define OFS_RD, 0 -|.define OFS_RA, 2 -|.define OFS_OP, 3 -|.endif -| -|// Instruction decode. -|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro -|.macro decode_OP8a, dst, ins; andi dst, ins, 0xff; .endmacro -|.macro decode_OP8b, dst; sll dst, dst, 3; .endmacro -|.macro decode_RC8a, dst, ins; srl dst, ins, 13; .endmacro -|.macro decode_RC8b, dst; andi dst, dst, 0x7f8; .endmacro -|.macro decode_RD4b, dst; sll dst, dst, 2; .endmacro -|.macro decode_RA8a, dst, ins; srl dst, ins, 5; .endmacro -|.macro decode_RA8b, dst; andi dst, dst, 0x7f8; .endmacro -|.macro decode_RB8a, dst, ins; srl dst, ins, 21; .endmacro -|.macro decode_RB8b, dst; andi dst, dst, 0x7f8; .endmacro -|.macro decode_RD8a, dst, ins; srl dst, ins, 16; .endmacro -|.macro decode_RD8b, dst; sll dst, dst, 3; .endmacro -|.macro decode_RDtoRC8, dst, src; andi dst, src, 0x7f8; .endmacro -| -|// Instruction fetch. -|.macro ins_NEXT1 -| lw INS, 0(PC) -| daddiu PC, PC, 4 -|.endmacro -|// Instruction decode+dispatch. -|.macro ins_NEXT2 -| decode_OP8a TMP1, INS -| decode_OP8b TMP1 -| daddu TMP0, DISPATCH, TMP1 -| decode_RD8a RD, INS -| ld AT, 0(TMP0) -| decode_RA8a RA, INS -| decode_RD8b RD -| jr AT -| decode_RA8b RA -|.endmacro -|.macro ins_NEXT -| ins_NEXT1 -| ins_NEXT2 -|.endmacro -| -|// Instruction footer. -|.if 1 -| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. -| .define ins_next, ins_NEXT -| .define ins_next_, ins_NEXT -| .define ins_next1, ins_NEXT1 -| .define ins_next2, ins_NEXT2 -|.else -| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. -| // Affects only certain kinds of benchmarks (and only with -j off). -| .macro ins_next -| b ->ins_next -| .endmacro -| .macro ins_next1 -| .endmacro -| .macro ins_next2 -| b ->ins_next -| .endmacro -| .macro ins_next_ -| ->ins_next: -| ins_NEXT -| .endmacro -|.endif -| -|// Call decode and dispatch. -|.macro ins_callt -| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC -| ld PC, LFUNC:RB->pc -| lw INS, 0(PC) -| daddiu PC, PC, 4 -| decode_OP8a TMP1, INS -| decode_RA8a RA, INS -| decode_OP8b TMP1 -| decode_RA8b RA -| daddu TMP0, DISPATCH, TMP1 -| ld TMP0, 0(TMP0) -| jr TMP0 -| daddu RA, RA, BASE -|.endmacro -| -|.macro ins_call -| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC -| sd PC, FRAME_PC(BASE) -| ins_callt -|.endmacro -| -|//----------------------------------------------------------------------- -| -|.macro branch_RD -| srl TMP0, RD, 1 -| lui AT, (-(BCBIAS_J*4 >> 16) & 65535) -| addu TMP0, TMP0, AT -| daddu PC, PC, TMP0 -|.endmacro -| -|// Assumes DISPATCH is relative to GL. -#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) -#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) -#define GG_DISP2GOT (GG_OFS(got) - GG_OFS(dispatch)) -#define DISPATCH_GOT(name) (GG_DISP2GOT + sizeof(void*)*LJ_GOT_##name) -| -#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) -| -|.macro load_got, func -| ld CFUNCADDR, DISPATCH_GOT(func)(DISPATCH) -|.endmacro -|// Much faster. Sadly, there's no easy way to force the required code layout. -|// .macro call_intern, func; bal extern func; .endmacro -|.macro call_intern, func; jalr CFUNCADDR; .endmacro -|.macro call_extern; jalr CFUNCADDR; .endmacro -|.macro jmp_extern; jr CFUNCADDR; .endmacro -| -|.macro hotcheck, delta, target -| dsrl TMP1, PC, 1 -| andi TMP1, TMP1, 126 -| daddu TMP1, TMP1, DISPATCH -| lhu TMP2, GG_DISP2HOT(TMP1) -| addiu TMP2, TMP2, -delta -| bltz TMP2, target -|. sh TMP2, GG_DISP2HOT(TMP1) -|.endmacro -| -|.macro hotloop -| hotcheck HOTCOUNT_LOOP, ->vm_hotloop -|.endmacro -| -|.macro hotcall -| hotcheck HOTCOUNT_CALL, ->vm_hotcall -|.endmacro -| -|// Set current VM state. Uses TMP0. -|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro -|.macro st_vmstate; sw TMP0, DISPATCH_GL(vmstate)(DISPATCH); .endmacro -| -|// Move table write barrier back. Overwrites mark and tmp. -|.macro barrierback, tab, mark, tmp, target -| ld tmp, DISPATCH_GL(gc.grayagain)(DISPATCH) -| andi mark, mark, ~LJ_GC_BLACK & 255 // black2gray(tab) -| sd tab, DISPATCH_GL(gc.grayagain)(DISPATCH) -| sb mark, tab->marked -| b target -|. sd tmp, tab->gclist -|.endmacro -| -|// Clear type tag. Isolate lowest 14+32+1=47 bits of reg. -|.macro cleartp, reg; dextm reg, reg, 0, 14; .endmacro -|.macro cleartp, dst, reg; dextm dst, reg, 0, 14; .endmacro -| -|// Set type tag: Merge 17 type bits into bits [15+32=47, 31+32+1=64) of dst. -|.macro settp, dst, tp; dinsu dst, tp, 15, 31; .endmacro -| -|// Extract (negative) type tag. -|.macro gettp, dst, src; dsra dst, src, 47; .endmacro -| -|// Macros to check the TValue type and extract the GCobj. Branch on failure. -|.macro checktp, reg, tp, target -| gettp AT, reg -| daddiu AT, AT, tp -| bnez AT, target -|. cleartp reg -|.endmacro -|.macro checktp, dst, reg, tp, target -| gettp AT, reg -| daddiu AT, AT, tp -| bnez AT, target -|. cleartp dst, reg -|.endmacro -|.macro checkstr, reg, target; checktp reg, -LJ_TSTR, target; .endmacro -|.macro checktab, reg, target; checktp reg, -LJ_TTAB, target; .endmacro -|.macro checkfunc, reg, target; checktp reg, -LJ_TFUNC, target; .endmacro -|.macro checkint, reg, target // Caveat: has delay slot! -| gettp AT, reg -| bne AT, TISNUM, target -|.endmacro -|.macro checknum, reg, target // Caveat: has delay slot! -| gettp AT, reg -| sltiu AT, AT, LJ_TISNUM -| beqz AT, target -|.endmacro -| -|.macro mov_false, reg -| lu reg, 0x8000 -| dsll reg, reg, 32 -| not reg, reg -|.endmacro -|.macro mov_true, reg -| li reg, 0x0001 -| dsll reg, reg, 48 -| not reg, reg -|.endmacro -| -|//----------------------------------------------------------------------- - -/* Generate subroutines used by opcodes and other parts of the VM. */ -/* The .code_sub section should be last to help static branch prediction. */ -static void build_subroutines(BuildCtx *ctx) -{ - |.code_sub - | - |//----------------------------------------------------------------------- - |//-- Return handling ---------------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_returnp: - | // See vm_return. Also: TMP2 = previous base. - | andi AT, PC, FRAME_P - | beqz AT, ->cont_dispatch - | - | // Return from pcall or xpcall fast func. - |. mov_true TMP1 - | ld PC, FRAME_PC(TMP2) // Fetch PC of previous frame. - | move BASE, TMP2 // Restore caller base. - | // Prepending may overwrite the pcall frame, so do it at the end. - | sd TMP1, -8(RA) // Prepend true to results. - | daddiu RA, RA, -8 - | - |->vm_returnc: - | addiu RD, RD, 8 // RD = (nresults+1)*8. - | andi TMP0, PC, FRAME_TYPE - | beqz RD, ->vm_unwind_c_eh - |. li CRET1, LUA_YIELD - | beqz TMP0, ->BC_RET_Z // Handle regular return to Lua. - |. move MULTRES, RD - | - |->vm_return: - | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return - | // TMP0 = PC & FRAME_TYPE - | li TMP2, -8 - | xori AT, TMP0, FRAME_C - | and TMP2, PC, TMP2 - | bnez AT, ->vm_returnp - | dsubu TMP2, BASE, TMP2 // TMP2 = previous base. - | - | addiu TMP1, RD, -8 - | sd TMP2, L->base - | li_vmstate C - | lw TMP2, SAVE_NRES - | daddiu BASE, BASE, -16 - | st_vmstate - | beqz TMP1, >2 - |. sll TMP2, TMP2, 3 - |1: - | addiu TMP1, TMP1, -8 - | ld CRET1, 0(RA) - | daddiu RA, RA, 8 - | sd CRET1, 0(BASE) - | bnez TMP1, <1 - |. daddiu BASE, BASE, 8 - | - |2: - | bne TMP2, RD, >6 - |3: - |. sd BASE, L->top // Store new top. - | - |->vm_leave_cp: - | ld TMP0, SAVE_CFRAME // Restore previous C frame. - | move CRET1, r0 // Ok return status for vm_pcall. - | sd TMP0, L->cframe - | - |->vm_leave_unw: - | restoreregs_ret - | - |6: - | ld TMP1, L->maxstack - | slt AT, TMP2, RD - | bnez AT, >7 // Less results wanted? - | // More results wanted. Check stack size and fill up results with nil. - |. slt AT, BASE, TMP1 - | beqz AT, >8 - |. nop - | sd TISNIL, 0(BASE) - | addiu RD, RD, 8 - | b <2 - |. daddiu BASE, BASE, 8 - | - |7: // Less results wanted. - | subu TMP0, RD, TMP2 - | dsubu TMP0, BASE, TMP0 // Either keep top or shrink it. - | b <3 - |. movn BASE, TMP0, TMP2 // LUA_MULTRET+1 case? - | - |8: // Corner case: need to grow stack for filling up results. - | // This can happen if: - | // - A C function grows the stack (a lot). - | // - The GC shrinks the stack in between. - | // - A return back from a lua_call() with (high) nresults adjustment. - | load_got lj_state_growstack - | move MULTRES, RD - | srl CARG2, TMP2, 3 - | call_intern lj_state_growstack // (lua_State *L, int n) - |. move CARG1, L - | lw TMP2, SAVE_NRES - | ld BASE, L->top // Need the (realloced) L->top in BASE. - | move RD, MULTRES - | b <2 - |. sll TMP2, TMP2, 3 - | - |->vm_unwind_c: // Unwind C stack, return from vm_pcall. - | // (void *cframe, int errcode) - | move sp, CARG1 - | move CRET1, CARG2 - |->vm_unwind_c_eh: // Landing pad for external unwinder. - | ld L, SAVE_L - | li TMP0, ~LJ_VMST_C - | ld GL:TMP1, L->glref - | b ->vm_leave_unw - |. sw TMP0, GL:TMP1->vmstate - | - |->vm_unwind_ff: // Unwind C stack, return from ff pcall. - | // (void *cframe) - | li AT, -4 - | and sp, CARG1, AT - |->vm_unwind_ff_eh: // Landing pad for external unwinder. - | ld L, SAVE_L - | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | li TISNIL, LJ_TNIL - | li TISNUM, LJ_TISNUM - | ld BASE, L->base - | ld DISPATCH, L->glref // Setup pointer to dispatch table. - | .FPU mtc1 TMP3, TOBIT - | mov_false TMP1 - | li_vmstate INTERP - | ld PC, FRAME_PC(BASE) // Fetch PC of previous frame. - | .FPU cvt.d.s TOBIT, TOBIT - | daddiu RA, BASE, -8 // Results start at BASE-8. - | daddiu DISPATCH, DISPATCH, GG_G2DISP - | sd TMP1, 0(RA) // Prepend false to error message. - | st_vmstate - | b ->vm_returnc - |. li RD, 16 // 2 results: false + error message. - | - |//----------------------------------------------------------------------- - |//-- Grow stack for calls ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_growstack_c: // Grow stack for C function. - | b >2 - |. li CARG2, LUA_MINSTACK - | - |->vm_growstack_l: // Grow stack for Lua function. - | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC - | daddu RC, BASE, RC - | dsubu RA, RA, BASE - | sd BASE, L->base - | daddiu PC, PC, 4 // Must point after first instruction. - | sd RC, L->top - | srl CARG2, RA, 3 - |2: - | // L->base = new base, L->top = top - | load_got lj_state_growstack - | sd PC, SAVE_PC - | call_intern lj_state_growstack // (lua_State *L, int n) - |. move CARG1, L - | ld BASE, L->base - | ld RC, L->top - | ld LFUNC:RB, FRAME_FUNC(BASE) - | dsubu RC, RC, BASE - | cleartp LFUNC:RB - | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC - | ins_callt // Just retry the call. - | - |//----------------------------------------------------------------------- - |//-- Entry points into the assembler VM --------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_resume: // Setup C frame and resume thread. - | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) - | saveregs - | move L, CARG1 - | ld DISPATCH, L->glref // Setup pointer to dispatch table. - | move BASE, CARG2 - | lbu TMP1, L->status - | sd L, SAVE_L - | li PC, FRAME_CP - | daddiu TMP0, sp, CFRAME_RESUME - | daddiu DISPATCH, DISPATCH, GG_G2DISP - | sw r0, SAVE_NRES - | sw r0, SAVE_ERRF - | sd CARG1, SAVE_PC // Any value outside of bytecode is ok. - | sd r0, SAVE_CFRAME - | beqz TMP1, >3 - |. sd TMP0, L->cframe - | - | // Resume after yield (like a return). - | sd L, DISPATCH_GL(cur_L)(DISPATCH) - | move RA, BASE - | ld BASE, L->base - | ld TMP1, L->top - | ld PC, FRAME_PC(BASE) - | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | dsubu RD, TMP1, BASE - | .FPU mtc1 TMP3, TOBIT - | sb r0, L->status - | .FPU cvt.d.s TOBIT, TOBIT - | li_vmstate INTERP - | daddiu RD, RD, 8 - | st_vmstate - | move MULTRES, RD - | andi TMP0, PC, FRAME_TYPE - | li TISNIL, LJ_TNIL - | beqz TMP0, ->BC_RET_Z - |. li TISNUM, LJ_TISNUM - | b ->vm_return - |. nop - | - |->vm_pcall: // Setup protected C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) - | saveregs - | sw CARG4, SAVE_ERRF - | b >1 - |. li PC, FRAME_CP - | - |->vm_call: // Setup C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1) - | saveregs - | li PC, FRAME_C - | - |1: // Entry point for vm_pcall above (PC = ftype). - | ld TMP1, L:CARG1->cframe - | move L, CARG1 - | sw CARG3, SAVE_NRES - | ld DISPATCH, L->glref // Setup pointer to dispatch table. - | sd CARG1, SAVE_L - | move BASE, CARG2 - | daddiu DISPATCH, DISPATCH, GG_G2DISP - | sd CARG1, SAVE_PC // Any value outside of bytecode is ok. - | sd TMP1, SAVE_CFRAME - | sd sp, L->cframe // Add our C frame to cframe chain. - | - |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). - | sd L, DISPATCH_GL(cur_L)(DISPATCH) - | ld TMP2, L->base // TMP2 = old base (used in vmeta_call). - | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | ld TMP1, L->top - | .FPU mtc1 TMP3, TOBIT - | daddu PC, PC, BASE - | dsubu NARGS8:RC, TMP1, BASE - | li TISNUM, LJ_TISNUM - | dsubu PC, PC, TMP2 // PC = frame delta + frame type - | .FPU cvt.d.s TOBIT, TOBIT - | li_vmstate INTERP - | li TISNIL, LJ_TNIL - | st_vmstate - | - |->vm_call_dispatch: - | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC - | ld LFUNC:RB, FRAME_FUNC(BASE) - | checkfunc LFUNC:RB, ->vmeta_call - | - |->vm_call_dispatch_f: - | ins_call - | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC - | - |->vm_cpcall: // Setup protected C frame, call C. - | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) - | saveregs - | move L, CARG1 - | ld TMP0, L:CARG1->stack - | sd CARG1, SAVE_L - | ld TMP1, L->top - | ld DISPATCH, L->glref // Setup pointer to dispatch table. - | sd CARG1, SAVE_PC // Any value outside of bytecode is ok. - | dsubu TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). - | ld TMP1, L->cframe - | daddiu DISPATCH, DISPATCH, GG_G2DISP - | sw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. - | sw r0, SAVE_ERRF // No error function. - | sd TMP1, SAVE_CFRAME - | sd sp, L->cframe // Add our C frame to cframe chain. - | sd L, DISPATCH_GL(cur_L)(DISPATCH) - | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud) - |. move CFUNCADDR, CARG4 - | move BASE, CRET1 - | bnez CRET1, <3 // Else continue with the call. - |. li PC, FRAME_CP - | b ->vm_leave_cp // No base? Just remove C frame. - |. nop - | - |//----------------------------------------------------------------------- - |//-- Metamethod handling ------------------------------------------------ - |//----------------------------------------------------------------------- - | - |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the - |// stack, so BASE doesn't need to be reloaded across these calls. - | - |//-- Continuation dispatch ---------------------------------------------- - | - |->cont_dispatch: - | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8 - | ld TMP0, -32(BASE) // Continuation. - | move RB, BASE - | move BASE, TMP2 // Restore caller BASE. - | ld LFUNC:TMP1, FRAME_FUNC(TMP2) - |.if FFI - | sltiu AT, TMP0, 2 - |.endif - | ld PC, -24(RB) // Restore PC from [cont|PC]. - | cleartp LFUNC:TMP1 - | daddu TMP2, RA, RD - | ld TMP1, LFUNC:TMP1->pc - |.if FFI - | bnez AT, >1 - |.endif - |. sd TISNIL, -8(TMP2) // Ensure one valid arg. - | // BASE = base, RA = resultptr, RB = meta base - | jr TMP0 // Jump to continuation. - |. ld KBASE, PC2PROTO(k)(TMP1) - | - |.if FFI - |1: - | bnez TMP0, ->cont_ffi_callback // cont = 1: return from FFI callback. - | // cont = 0: tailcall from C function. - |. daddiu TMP1, RB, -32 - | b ->vm_call_tail - |. dsubu RC, TMP1, BASE - |.endif - | - |->cont_cat: // RA = resultptr, RB = meta base - | lw INS, -4(PC) - | daddiu CARG2, RB, -32 - | ld CRET1, 0(RA) - | decode_RB8a MULTRES, INS - | decode_RA8a RA, INS - | decode_RB8b MULTRES - | decode_RA8b RA - | daddu TMP1, BASE, MULTRES - | sd BASE, L->base - | dsubu CARG3, CARG2, TMP1 - | bne TMP1, CARG2, ->BC_CAT_Z - |. sd CRET1, 0(CARG2) - | daddu RA, BASE, RA - | b ->cont_nop - |. sd CRET1, 0(RA) - | - |//-- Table indexing metamethods ----------------------------------------- - | - |->vmeta_tgets1: - | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | li TMP0, LJ_TSTR - | settp STR:RC, TMP0 - | b >1 - |. sd STR:RC, 0(CARG3) - | - |->vmeta_tgets: - | daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv) - | li TMP0, LJ_TTAB - | li TMP1, LJ_TSTR - | settp TAB:RB, TMP0 - | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv2) - | sd TAB:RB, 0(CARG2) - | settp STR:RC, TMP1 - | b >1 - |. sd STR:RC, 0(CARG3) - | - |->vmeta_tgetb: // TMP0 = index - | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | settp TMP0, TISNUM - | sd TMP0, 0(CARG3) - | - |->vmeta_tgetv: - |1: - | load_got lj_meta_tget - | sd BASE, L->base - | sd PC, SAVE_PC - | call_intern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) - |. move CARG1, L - | // Returns TValue * (finished) or NULL (metamethod). - | beqz CRET1, >3 - |. daddiu TMP1, BASE, -FRAME_CONT - | ld CARG1, 0(CRET1) - | ins_next1 - | sd CARG1, 0(RA) - | ins_next2 - | - |3: // Call __index metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k - | ld BASE, L->top - | sd PC, -24(BASE) // [cont|PC] - | dsubu PC, BASE, TMP1 - | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. - | cleartp LFUNC:RB - | b ->vm_call_dispatch_f - |. li NARGS8:RC, 16 // 2 args for func(t, k). - | - |->vmeta_tgetr: - | load_got lj_tab_getinth - | call_intern lj_tab_getinth // (GCtab *t, int32_t key) - |. nop - | // Returns cTValue * or NULL. - | beqz CRET1, ->BC_TGETR_Z - |. move CARG2, TISNIL - | b ->BC_TGETR_Z - |. ld CARG2, 0(CRET1) - | - |//----------------------------------------------------------------------- - | - |->vmeta_tsets1: - | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | li TMP0, LJ_TSTR - | settp STR:RC, TMP0 - | b >1 - |. sd STR:RC, 0(CARG3) - | - |->vmeta_tsets: - | daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv) - | li TMP0, LJ_TTAB - | li TMP1, LJ_TSTR - | settp TAB:RB, TMP0 - | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv2) - | sd TAB:RB, 0(CARG2) - | settp STR:RC, TMP1 - | b >1 - |. sd STR:RC, 0(CARG3) - | - |->vmeta_tsetb: // TMP0 = index - | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | settp TMP0, TISNUM - | sd TMP0, 0(CARG3) - | - |->vmeta_tsetv: - |1: - | load_got lj_meta_tset - | sd BASE, L->base - | sd PC, SAVE_PC - | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) - |. move CARG1, L - | // Returns TValue * (finished) or NULL (metamethod). - | beqz CRET1, >3 - |. ld CARG1, 0(RA) - | // NOBARRIER: lj_meta_tset ensures the table is not black. - | ins_next1 - | sd CARG1, 0(CRET1) - | ins_next2 - | - |3: // Call __newindex metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) - | daddiu TMP1, BASE, -FRAME_CONT - | ld BASE, L->top - | sd PC, -24(BASE) // [cont|PC] - | dsubu PC, BASE, TMP1 - | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. - | cleartp LFUNC:RB - | sd CARG1, 16(BASE) // Copy value to third argument. - | b ->vm_call_dispatch_f - |. li NARGS8:RC, 24 // 3 args for func(t, k, v) - | - |->vmeta_tsetr: - | load_got lj_tab_setinth - | sd BASE, L->base - | sd PC, SAVE_PC - | call_intern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) - |. move CARG1, L - | // Returns TValue *. - | b ->BC_TSETR_Z - |. nop - | - |//-- Comparison metamethods --------------------------------------------- - | - |->vmeta_comp: - | // RA/RD point to o1/o2. - | move CARG2, RA - | move CARG3, RD - | load_got lj_meta_comp - | daddiu PC, PC, -4 - | sd BASE, L->base - | sd PC, SAVE_PC - | decode_OP1 CARG4, INS - | call_intern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) - |. move CARG1, L - | // Returns 0/1 or TValue * (metamethod). - |3: - | sltiu AT, CRET1, 2 - | beqz AT, ->vmeta_binop - | negu TMP2, CRET1 - |4: - | lhu RD, OFS_RD(PC) - | daddiu PC, PC, 4 - | lui TMP1, (-(BCBIAS_J*4 >> 16) & 65535) - | sll RD, RD, 2 - | addu RD, RD, TMP1 - | and RD, RD, TMP2 - | daddu PC, PC, RD - |->cont_nop: - | ins_next - | - |->cont_ra: // RA = resultptr - | lbu TMP1, -4+OFS_RA(PC) - | ld CRET1, 0(RA) - | sll TMP1, TMP1, 3 - | daddu TMP1, BASE, TMP1 - | b ->cont_nop - |. sd CRET1, 0(TMP1) - | - |->cont_condt: // RA = resultptr - | ld TMP0, 0(RA) - | gettp TMP0, TMP0 - | sltiu AT, TMP0, LJ_TISTRUECOND - | b <4 - |. negu TMP2, AT // Branch if result is true. - | - |->cont_condf: // RA = resultptr - | ld TMP0, 0(RA) - | gettp TMP0, TMP0 - | sltiu AT, TMP0, LJ_TISTRUECOND - | b <4 - |. addiu TMP2, AT, -1 // Branch if result is false. - | - |->vmeta_equal: - | // CARG1/CARG2 point to o1/o2. TMP0 is set to 0/1. - | load_got lj_meta_equal - | cleartp LFUNC:CARG3, CARG2 - | cleartp LFUNC:CARG2, CARG1 - | move CARG4, TMP0 - | daddiu PC, PC, -4 - | sd BASE, L->base - | sd PC, SAVE_PC - | call_intern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) - |. move CARG1, L - | // Returns 0/1 or TValue * (metamethod). - | b <3 - |. nop - | - |->vmeta_equal_cd: - |.if FFI - | load_got lj_meta_equal_cd - | move CARG2, INS - | daddiu PC, PC, -4 - | sd BASE, L->base - | sd PC, SAVE_PC - | call_intern lj_meta_equal_cd // (lua_State *L, BCIns op) - |. move CARG1, L - | // Returns 0/1 or TValue * (metamethod). - | b <3 - |. nop - |.endif - | - |->vmeta_istype: - | load_got lj_meta_istype - | daddiu PC, PC, -4 - | sd BASE, L->base - | srl CARG2, RA, 3 - | srl CARG3, RD, 3 - | sd PC, SAVE_PC - | call_intern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) - |. move CARG1, L - | b ->cont_nop - |. nop - | - |//-- Arithmetic metamethods --------------------------------------------- - | - |->vmeta_unm: - | move RC, RB - | - |->vmeta_arith: - | load_got lj_meta_arith - | sd BASE, L->base - | move CARG2, RA - | sd PC, SAVE_PC - | move CARG3, RB - | move CARG4, RC - | decode_OP1 CARG5, INS // CARG5 == RB. - | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) - |. move CARG1, L - | // Returns NULL (finished) or TValue * (metamethod). - | beqz CRET1, ->cont_nop - |. nop - | - | // Call metamethod for binary op. - |->vmeta_binop: - | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 - | dsubu TMP1, CRET1, BASE - | sd PC, -24(CRET1) // [cont|PC] - | move TMP2, BASE - | daddiu PC, TMP1, FRAME_CONT - | move BASE, CRET1 - | b ->vm_call_dispatch - |. li NARGS8:RC, 16 // 2 args for func(o1, o2). - | - |->vmeta_len: - | // CARG2 already set by BC_LEN. -#if LJ_52 - | move MULTRES, CARG1 -#endif - | load_got lj_meta_len - | sd BASE, L->base - | sd PC, SAVE_PC - | call_intern lj_meta_len // (lua_State *L, TValue *o) - |. move CARG1, L - | // Returns NULL (retry) or TValue * (metamethod base). -#if LJ_52 - | bnez CRET1, ->vmeta_binop // Binop call for compatibility. - |. nop - | b ->BC_LEN_Z - |. move CARG1, MULTRES -#else - | b ->vmeta_binop // Binop call for compatibility. - |. nop -#endif - | - |//-- Call metamethod ---------------------------------------------------- - | - |->vmeta_call: // Resolve and call __call metamethod. - | // TMP2 = old base, BASE = new base, RC = nargs*8 - | load_got lj_meta_call - | sd TMP2, L->base // This is the callers base! - | daddiu CARG2, BASE, -16 - | sd PC, SAVE_PC - | daddu CARG3, BASE, RC - | move MULTRES, NARGS8:RC - | call_intern lj_meta_call // (lua_State *L, TValue *func, TValue *top) - |. move CARG1, L - | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. - | daddiu NARGS8:RC, MULTRES, 8 // Got one more argument now. - | cleartp LFUNC:RB - | ins_call - | - |->vmeta_callt: // Resolve __call for BC_CALLT. - | // BASE = old base, RA = new base, RC = nargs*8 - | load_got lj_meta_call - | sd BASE, L->base - | daddiu CARG2, RA, -16 - | sd PC, SAVE_PC - | daddu CARG3, RA, RC - | move MULTRES, NARGS8:RC - | call_intern lj_meta_call // (lua_State *L, TValue *func, TValue *top) - |. move CARG1, L - | ld RB, FRAME_FUNC(RA) // Guaranteed to be a function here. - | ld TMP1, FRAME_PC(BASE) - | daddiu NARGS8:RC, MULTRES, 8 // Got one more argument now. - | b ->BC_CALLT_Z - |. cleartp LFUNC:CARG3, RB - | - |//-- Argument coercion for 'for' statement ------------------------------ - | - |->vmeta_for: - | load_got lj_meta_for - | sd BASE, L->base - | move CARG2, RA - | sd PC, SAVE_PC - | move MULTRES, INS - | call_intern lj_meta_for // (lua_State *L, TValue *base) - |. move CARG1, L - |.if JIT - | decode_OP1 TMP0, MULTRES - | li AT, BC_JFORI - |.endif - | decode_RA8a RA, MULTRES - | decode_RD8a RD, MULTRES - | decode_RA8b RA - |.if JIT - | beq TMP0, AT, =>BC_JFORI - |. decode_RD8b RD - | b =>BC_FORI - |. nop - |.else - | b =>BC_FORI - |. decode_RD8b RD - |.endif - | - |//----------------------------------------------------------------------- - |//-- Fast functions ----------------------------------------------------- - |//----------------------------------------------------------------------- - | - |.macro .ffunc, name - |->ff_ .. name: - |.endmacro - | - |.macro .ffunc_1, name - |->ff_ .. name: - | beqz NARGS8:RC, ->fff_fallback - |. ld CARG1, 0(BASE) - |.endmacro - | - |.macro .ffunc_2, name - |->ff_ .. name: - | sltiu AT, NARGS8:RC, 16 - | ld CARG1, 0(BASE) - | bnez AT, ->fff_fallback - |. ld CARG2, 8(BASE) - |.endmacro - | - |.macro .ffunc_n, name // Caveat: has delay slot! - |->ff_ .. name: - | ld CARG1, 0(BASE) - | beqz NARGS8:RC, ->fff_fallback - | // Either ldc1 or the 1st instruction of checknum is in the delay slot. - | .FPU ldc1 FARG1, 0(BASE) - | checknum CARG1, ->fff_fallback - |.endmacro - | - |.macro .ffunc_nn, name // Caveat: has delay slot! - |->ff_ .. name: - | ld CARG1, 0(BASE) - | sltiu AT, NARGS8:RC, 16 - | ld CARG2, 8(BASE) - | bnez AT, ->fff_fallback - |. gettp TMP0, CARG1 - | gettp TMP1, CARG2 - | sltiu TMP0, TMP0, LJ_TISNUM - | sltiu TMP1, TMP1, LJ_TISNUM - | .FPU ldc1 FARG1, 0(BASE) - | and TMP0, TMP0, TMP1 - | .FPU ldc1 FARG2, 8(BASE) - | beqz TMP0, ->fff_fallback - |.endmacro - | - |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1 and has delay slot! - |.macro ffgccheck - | ld TMP0, DISPATCH_GL(gc.total)(DISPATCH) - | ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) - | dsubu AT, TMP0, TMP1 - | bgezal AT, ->fff_gcstep - |.endmacro - | - |//-- Base library: checks ----------------------------------------------- - |.ffunc_1 assert - | gettp AT, CARG1 - | sltiu AT, AT, LJ_TISTRUECOND - | beqz AT, ->fff_fallback - |. daddiu RA, BASE, -16 - | ld PC, FRAME_PC(BASE) - | addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8. - | daddu TMP2, RA, RD - | daddiu TMP1, BASE, 8 - | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument. - |. sd CARG1, 0(RA) - |1: - | ld CRET1, 0(TMP1) - | sd CRET1, -16(TMP1) - | bne TMP1, TMP2, <1 - |. daddiu TMP1, TMP1, 8 - | b ->fff_res - |. nop - | - |.ffunc_1 type - | gettp TMP0, CARG1 - | sltu TMP1, TISNUM, TMP0 - | not TMP2, TMP0 - | li TMP3, ~LJ_TISNUM - | movz TMP2, TMP3, TMP1 - | dsll TMP2, TMP2, 3 - | daddu TMP2, CFUNC:RB, TMP2 - | b ->fff_restv - |. ld CARG1, CFUNC:TMP2->upvalue - | - |//-- Base library: getters and setters --------------------------------- - | - |.ffunc_1 getmetatable - | gettp TMP2, CARG1 - | daddiu TMP0, TMP2, -LJ_TTAB - | daddiu TMP1, TMP2, -LJ_TUDATA - | movn TMP0, TMP1, TMP0 - | bnez TMP0, >6 - |. cleartp TAB:CARG1 - |1: // Field metatable must be at same offset for GCtab and GCudata! - | ld TAB:RB, TAB:CARG1->metatable - |2: - | ld STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) - | beqz TAB:RB, ->fff_restv - |. li CARG1, LJ_TNIL - | lw TMP0, TAB:RB->hmask - | lw TMP1, STR:RC->hash - | ld NODE:TMP2, TAB:RB->node - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask - | dsll TMP0, TMP1, 5 - | dsll TMP1, TMP1, 3 - | dsubu TMP1, TMP0, TMP1 - | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) - | li CARG4, LJ_TSTR - | settp STR:RC, CARG4 // Tagged key to look for. - |3: // Rearranged logic, because we expect _not_ to find the key. - | ld TMP0, NODE:TMP2->key - | ld CARG1, NODE:TMP2->val - | ld NODE:TMP2, NODE:TMP2->next - | beq RC, TMP0, >5 - |. li AT, LJ_TTAB - | bnez NODE:TMP2, <3 - |. nop - |4: - | move CARG1, RB - | b ->fff_restv // Not found, keep default result. - |. settp CARG1, AT - |5: - | bne CARG1, TISNIL, ->fff_restv - |. nop - | b <4 // Ditto for nil value. - |. nop - | - |6: - | sltiu AT, TMP2, LJ_TISNUM - | movn TMP2, TISNUM, AT - | dsll TMP2, TMP2, 3 - | dsubu TMP0, DISPATCH, TMP2 - | b <2 - |. ld TAB:RB, DISPATCH_GL(gcroot[GCROOT_BASEMT])-8(TMP0) - | - |.ffunc_2 setmetatable - | // Fast path: no mt for table yet and not clearing the mt. - | checktp TMP1, CARG1, -LJ_TTAB, ->fff_fallback - | gettp TMP3, CARG2 - | ld TAB:TMP0, TAB:TMP1->metatable - | lbu TMP2, TAB:TMP1->marked - | daddiu AT, TMP3, -LJ_TTAB - | cleartp TAB:CARG2 - | or AT, AT, TAB:TMP0 - | bnez AT, ->fff_fallback - |. andi AT, TMP2, LJ_GC_BLACK // isblack(table) - | beqz AT, ->fff_restv - |. sd TAB:CARG2, TAB:TMP1->metatable - | barrierback TAB:TMP1, TMP2, TMP0, ->fff_restv - | - |.ffunc rawget - | ld CARG2, 0(BASE) - | sltiu AT, NARGS8:RC, 16 - | load_got lj_tab_get - | gettp TMP0, CARG2 - | cleartp CARG2 - | daddiu TMP0, TMP0, -LJ_TTAB - | or AT, AT, TMP0 - | bnez AT, ->fff_fallback - |. daddiu CARG3, BASE, 8 - | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) - |. move CARG1, L - | b ->fff_restv - |. ld CARG1, 0(CRET1) - | - |//-- Base library: conversions ------------------------------------------ - | - |.ffunc tonumber - | // Only handles the number case inline (without a base argument). - | ld CARG1, 0(BASE) - | xori AT, NARGS8:RC, 8 // Exactly one number argument. - | gettp TMP1, CARG1 - | sltu TMP0, TISNUM, TMP1 - | or AT, AT, TMP0 - | bnez AT, ->fff_fallback - |. nop - | b ->fff_restv - |. nop - | - |.ffunc_1 tostring - | // Only handles the string or number case inline. - | gettp TMP0, CARG1 - | daddiu AT, TMP0, -LJ_TSTR - | // A __tostring method in the string base metatable is ignored. - | beqz AT, ->fff_restv // String key? - | // Handle numbers inline, unless a number base metatable is present. - |. ld TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) - | sltu TMP0, TISNUM, TMP0 - | or TMP0, TMP0, TMP1 - | bnez TMP0, ->fff_fallback - |. sd BASE, L->base // Add frame since C call can throw. - | ffgccheck - |. sd PC, SAVE_PC // Redundant (but a defined value). - | load_got lj_strfmt_number - | move CARG1, L - | call_intern lj_strfmt_number // (lua_State *L, cTValue *o) - |. move CARG2, BASE - | // Returns GCstr *. - | li AT, LJ_TSTR - | settp CRET1, AT - | b ->fff_restv - |. move CARG1, CRET1 - | - |//-- Base library: iterators ------------------------------------------- - | - |.ffunc_1 next - | checktp CARG2, CARG1, -LJ_TTAB, ->fff_fallback - | daddu TMP2, BASE, NARGS8:RC - | sd TISNIL, 0(TMP2) // Set missing 2nd arg to nil. - | ld PC, FRAME_PC(BASE) - | load_got lj_tab_next - | sd BASE, L->base // Add frame since C call can throw. - | sd BASE, L->top // Dummy frame length is ok. - | daddiu CARG3, BASE, 8 - | sd PC, SAVE_PC - | call_intern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) - |. move CARG1, L - | // Returns 0 at end of traversal. - | beqz CRET1, ->fff_restv // End of traversal: return nil. - |. move CARG1, TISNIL - | ld TMP0, 8(BASE) - | daddiu RA, BASE, -16 - | ld TMP2, 16(BASE) - | sd TMP0, 0(RA) - | sd TMP2, 8(RA) - | b ->fff_res - |. li RD, (2+1)*8 - | - |.ffunc_1 pairs - | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback - | ld PC, FRAME_PC(BASE) -#if LJ_52 - | ld TAB:TMP2, TAB:TMP1->metatable - | ld TMP0, CFUNC:RB->upvalue[0] - | bnez TAB:TMP2, ->fff_fallback -#else - | ld TMP0, CFUNC:RB->upvalue[0] -#endif - |. daddiu RA, BASE, -16 - | sd TISNIL, 0(BASE) - | sd CARG1, -8(BASE) - | sd TMP0, 0(RA) - | b ->fff_res - |. li RD, (3+1)*8 - | - |.ffunc_2 ipairs_aux - | checktab CARG1, ->fff_fallback - | checkint CARG2, ->fff_fallback - |. lw TMP0, TAB:CARG1->asize - | ld TMP1, TAB:CARG1->array - | ld PC, FRAME_PC(BASE) - | sextw TMP2, CARG2 - | addiu TMP2, TMP2, 1 - | sltu AT, TMP2, TMP0 - | daddiu RA, BASE, -16 - | zextw TMP0, TMP2 - | settp TMP0, TISNUM - | beqz AT, >2 // Not in array part? - |. sd TMP0, 0(RA) - | dsll TMP3, TMP2, 3 - | daddu TMP3, TMP1, TMP3 - | ld TMP1, 0(TMP3) - |1: - | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results. - |. li RD, (0+1)*8 - | sd TMP1, -8(BASE) - | b ->fff_res - |. li RD, (2+1)*8 - |2: // Check for empty hash part first. Otherwise call C function. - | lw TMP0, TAB:CARG1->hmask - | load_got lj_tab_getinth - | beqz TMP0, ->fff_res - |. li RD, (0+1)*8 - | call_intern lj_tab_getinth // (GCtab *t, int32_t key) - |. move CARG2, TMP2 - | // Returns cTValue * or NULL. - | beqz CRET1, ->fff_res - |. li RD, (0+1)*8 - | b <1 - |. ld TMP1, 0(CRET1) - | - |.ffunc_1 ipairs - | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback - | ld PC, FRAME_PC(BASE) -#if LJ_52 - | ld TAB:TMP2, TAB:TMP1->metatable - | ld CFUNC:TMP0, CFUNC:RB->upvalue[0] - | bnez TAB:TMP2, ->fff_fallback -#else - | ld TMP0, CFUNC:RB->upvalue[0] -#endif - | daddiu RA, BASE, -16 - | dsll AT, TISNUM, 47 - | sd CARG1, -8(BASE) - | sd AT, 0(BASE) - | sd CFUNC:TMP0, 0(RA) - | b ->fff_res - |. li RD, (3+1)*8 - | - |//-- Base library: catch errors ---------------------------------------- - | - |.ffunc pcall - | daddiu NARGS8:RC, NARGS8:RC, -8 - | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | bltz NARGS8:RC, ->fff_fallback - |. move TMP2, BASE - | daddiu BASE, BASE, 16 - | // Remember active hook before pcall. - | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT - | andi TMP3, TMP3, 1 - | daddiu PC, TMP3, 16+FRAME_PCALL - | beqz NARGS8:RC, ->vm_call_dispatch - |1: - |. daddu TMP0, BASE, NARGS8:RC - |2: - | ld TMP1, -16(TMP0) - | sd TMP1, -8(TMP0) - | daddiu TMP0, TMP0, -8 - | bne TMP0, BASE, <2 - |. nop - | b ->vm_call_dispatch - |. nop - | - |.ffunc xpcall - | daddiu NARGS8:RC, NARGS8:RC, -16 - | ld CARG1, 0(BASE) - | ld CARG2, 8(BASE) - | bltz NARGS8:RC, ->fff_fallback - |. lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH) - | gettp AT, CARG2 - | daddiu AT, AT, -LJ_TFUNC - | bnez AT, ->fff_fallback // Traceback must be a function. - |. move TMP2, BASE - | daddiu BASE, BASE, 24 - | // Remember active hook before pcall. - | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT - | sd CARG2, 0(TMP2) // Swap function and traceback. - | andi TMP3, TMP3, 1 - | sd CARG1, 8(TMP2) - | beqz NARGS8:RC, ->vm_call_dispatch - |. daddiu PC, TMP3, 24+FRAME_PCALL - | b <1 - |. nop - | - |//-- Coroutine library -------------------------------------------------- - | - |.macro coroutine_resume_wrap, resume - |.if resume - |.ffunc_1 coroutine_resume - | checktp CARG1, CARG1, -LJ_TTHREAD, ->fff_fallback - |.else - |.ffunc coroutine_wrap_aux - | ld L:CARG1, CFUNC:RB->upvalue[0].gcr - | cleartp L:CARG1 - |.endif - | lbu TMP0, L:CARG1->status - | ld TMP1, L:CARG1->cframe - | ld CARG2, L:CARG1->top - | ld TMP2, L:CARG1->base - | addiu AT, TMP0, -LUA_YIELD - | daddu CARG3, CARG2, TMP0 - | daddiu TMP3, CARG2, 8 - | bgtz AT, ->fff_fallback // st > LUA_YIELD? - |. movn CARG2, TMP3, AT - | xor TMP2, TMP2, CARG3 - | bnez TMP1, ->fff_fallback // cframe != 0? - |. or AT, TMP2, TMP0 - | ld TMP0, L:CARG1->maxstack - | beqz AT, ->fff_fallback // base == top && st == 0? - |. ld PC, FRAME_PC(BASE) - | daddu TMP2, CARG2, NARGS8:RC - | sltu AT, TMP0, TMP2 - | bnez AT, ->fff_fallback // Stack overflow? - |. sd PC, SAVE_PC - | sd BASE, L->base - |1: - |.if resume - | daddiu BASE, BASE, 8 // Keep resumed thread in stack for GC. - | daddiu NARGS8:RC, NARGS8:RC, -8 - | daddiu TMP2, TMP2, -8 - |.endif - | sd TMP2, L:CARG1->top - | daddu TMP1, BASE, NARGS8:RC - | move CARG3, CARG2 - | sd BASE, L->top - |2: // Move args to coroutine. - | ld CRET1, 0(BASE) - | sltu AT, BASE, TMP1 - | beqz AT, >3 - |. daddiu BASE, BASE, 8 - | sd CRET1, 0(CARG3) - | b <2 - |. daddiu CARG3, CARG3, 8 - |3: - | bal ->vm_resume // (lua_State *L, TValue *base, 0, 0) - |. move L:RA, L:CARG1 - | // Returns thread status. - |4: - | ld TMP2, L:RA->base - | sltiu AT, CRET1, LUA_YIELD+1 - | ld TMP3, L:RA->top - | li_vmstate INTERP - | ld BASE, L->base - | sd L, DISPATCH_GL(cur_L)(DISPATCH) - | st_vmstate - | beqz AT, >8 - |. dsubu RD, TMP3, TMP2 - | ld TMP0, L->maxstack - | beqz RD, >6 // No results? - |. daddu TMP1, BASE, RD - | sltu AT, TMP0, TMP1 - | bnez AT, >9 // Need to grow stack? - |. daddu TMP3, TMP2, RD - | sd TMP2, L:RA->top // Clear coroutine stack. - | move TMP1, BASE - |5: // Move results from coroutine. - | ld CRET1, 0(TMP2) - | daddiu TMP2, TMP2, 8 - | sltu AT, TMP2, TMP3 - | sd CRET1, 0(TMP1) - | bnez AT, <5 - |. daddiu TMP1, TMP1, 8 - |6: - | andi TMP0, PC, FRAME_TYPE - |.if resume - | mov_true TMP1 - | daddiu RA, BASE, -8 - | sd TMP1, -8(BASE) // Prepend true to results. - | daddiu RD, RD, 16 - |.else - | move RA, BASE - | daddiu RD, RD, 8 - |.endif - |7: - | sd PC, SAVE_PC - | beqz TMP0, ->BC_RET_Z - |. move MULTRES, RD - | b ->vm_return - |. nop - | - |8: // Coroutine returned with error (at co->top-1). - |.if resume - | daddiu TMP3, TMP3, -8 - | mov_false TMP1 - | ld CRET1, 0(TMP3) - | sd TMP3, L:RA->top // Remove error from coroutine stack. - | li RD, (2+1)*8 - | sd TMP1, -8(BASE) // Prepend false to results. - | daddiu RA, BASE, -8 - | sd CRET1, 0(BASE) // Copy error message. - | b <7 - |. andi TMP0, PC, FRAME_TYPE - |.else - | load_got lj_ffh_coroutine_wrap_err - | move CARG2, L:RA - | call_intern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) - |. move CARG1, L - |.endif - | - |9: // Handle stack expansion on return from yield. - | load_got lj_state_growstack - | srl CARG2, RD, 3 - | call_intern lj_state_growstack // (lua_State *L, int n) - |. move CARG1, L - | b <4 - |. li CRET1, 0 - |.endmacro - | - | coroutine_resume_wrap 1 // coroutine.resume - | coroutine_resume_wrap 0 // coroutine.wrap - | - |.ffunc coroutine_yield - | ld TMP0, L->cframe - | daddu TMP1, BASE, NARGS8:RC - | sd BASE, L->base - | andi TMP0, TMP0, CFRAME_RESUME - | sd TMP1, L->top - | beqz TMP0, ->fff_fallback - |. li CRET1, LUA_YIELD - | sd r0, L->cframe - | b ->vm_leave_unw - |. sb CRET1, L->status - | - |//-- Math library ------------------------------------------------------- - | - |.ffunc_1 math_abs - | gettp CARG2, CARG1 - | daddiu AT, CARG2, -LJ_TISNUM - | bnez AT, >1 - |. sextw TMP1, CARG1 - | sra TMP0, TMP1, 31 // Extract sign. - | xor TMP1, TMP1, TMP0 - | dsubu CARG1, TMP1, TMP0 - | dsll TMP3, CARG1, 32 - | bgez TMP3, ->fff_restv - |. settp CARG1, TISNUM - | li CARG1, 0x41e0 // 2^31 as a double. - | b ->fff_restv - |. dsll CARG1, CARG1, 48 - |1: - | sltiu AT, CARG2, LJ_TISNUM - | beqz AT, ->fff_fallback - |. dextm CARG1, CARG1, 0, 30 - |// fallthrough - | - |->fff_restv: - | // CARG1 = TValue result. - | ld PC, FRAME_PC(BASE) - | daddiu RA, BASE, -16 - | sd CARG1, -16(BASE) - |->fff_res1: - | // RA = results, PC = return. - | li RD, (1+1)*8 - |->fff_res: - | // RA = results, RD = (nresults+1)*8, PC = return. - | andi TMP0, PC, FRAME_TYPE - | bnez TMP0, ->vm_return - |. move MULTRES, RD - | lw INS, -4(PC) - | decode_RB8a RB, INS - | decode_RB8b RB - |5: - | sltu AT, RD, RB - | bnez AT, >6 // More results expected? - |. decode_RA8a TMP0, INS - | decode_RA8b TMP0 - | ins_next1 - | // Adjust BASE. KBASE is assumed to be set for the calling frame. - | dsubu BASE, RA, TMP0 - | ins_next2 - | - |6: // Fill up results with nil. - | daddu TMP1, RA, RD - | daddiu RD, RD, 8 - | b <5 - |. sd TISNIL, -8(TMP1) - | - |.macro math_extern, func - | .ffunc_n math_ .. func - | load_got func - | call_extern - |. nop - | b ->fff_resn - |. nop - |.endmacro - | - |.macro math_extern2, func - | .ffunc_nn math_ .. func - |. load_got func - | call_extern - |. nop - | b ->fff_resn - |. nop - |.endmacro - | - |// TODO: Return integer type if result is integer (own sf implementation). - |.macro math_round, func - |->ff_math_ .. func: - | ld CARG1, 0(BASE) - | beqz NARGS8:RC, ->fff_fallback - |. gettp TMP0, CARG1 - | beq TMP0, TISNUM, ->fff_restv - |. sltu AT, TMP0, TISNUM - | beqz AT, ->fff_fallback - |.if FPU - |. ldc1 FARG1, 0(BASE) - | bal ->vm_ .. func - |. nop - |.else - |. load_got func - | call_extern - |. nop - |.endif - | b ->fff_resn - |. nop - |.endmacro - | - | math_round floor - | math_round ceil - | - |.ffunc math_log - | li AT, 8 - | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. - |. ld CARG1, 0(BASE) - | checknum CARG1, ->fff_fallback - |. load_got log - |.if FPU - | call_extern - |. ldc1 FARG1, 0(BASE) - |.else - | call_extern - |. nop - |.endif - | b ->fff_resn - |. nop - | - | math_extern log10 - | math_extern exp - | math_extern sin - | math_extern cos - | math_extern tan - | math_extern asin - | math_extern acos - | math_extern atan - | math_extern sinh - | math_extern cosh - | math_extern tanh - | math_extern2 pow - | math_extern2 atan2 - | math_extern2 fmod - | - |.if FPU - |.ffunc_n math_sqrt - |. sqrt.d FRET1, FARG1 - |// fallthrough to ->fff_resn - |.else - | math_extern sqrt - |.endif - | - |->fff_resn: - | ld PC, FRAME_PC(BASE) - | daddiu RA, BASE, -16 - | b ->fff_res1 - |.if FPU - |. sdc1 FRET1, 0(RA) - |.else - |. sd CRET1, 0(RA) - |.endif - | - | - |.ffunc_2 math_ldexp - | checknum CARG1, ->fff_fallback - | checkint CARG2, ->fff_fallback - |. load_got ldexp - | .FPU ldc1 FARG1, 0(BASE) - | call_extern - |. lw CARG2, 8+LO(BASE) - | b ->fff_resn - |. nop - | - |.ffunc_n math_frexp - | load_got frexp - | ld PC, FRAME_PC(BASE) - | call_extern - |. daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv) - | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH) - | daddiu RA, BASE, -16 - |.if FPU - | mtc1 TMP1, FARG2 - | sdc1 FRET1, 0(RA) - | cvt.d.w FARG2, FARG2 - | sdc1 FARG2, 8(RA) - |.else - | sd CRET1, 0(RA) - | zextw TMP1, TMP1 - | settp TMP1, TISNUM - | sd TMP1, 8(RA) - |.endif - | b ->fff_res - |. li RD, (2+1)*8 - | - |.ffunc_n math_modf - | load_got modf - | ld PC, FRAME_PC(BASE) - | call_extern - |. daddiu CARG2, BASE, -16 - | daddiu RA, BASE, -16 - |.if FPU - | sdc1 FRET1, -8(BASE) - |.else - | sd CRET1, -8(BASE) - |.endif - | b ->fff_res - |. li RD, (2+1)*8 - | - |.macro math_minmax, name, intins, fpins - | .ffunc_1 name - | daddu TMP3, BASE, NARGS8:RC - | checkint CARG1, >5 - |. daddiu TMP2, BASE, 8 - |1: // Handle integers. - | beq TMP2, TMP3, ->fff_restv - |. ld CARG2, 0(TMP2) - | checkint CARG2, >3 - |. sextw CARG1, CARG1 - | lw CARG2, LO(TMP2) - |. slt AT, CARG1, CARG2 - | intins CARG1, CARG2, AT - | daddiu TMP2, TMP2, 8 - | zextw CARG1, CARG1 - | b <1 - |. settp CARG1, TISNUM - | - |3: // Convert intermediate result to number and continue with number loop. - | checknum CARG2, ->fff_fallback - |.if FPU - |. mtc1 CARG1, FRET1 - | cvt.d.w FRET1, FRET1 - | b >7 - |. ldc1 FARG1, 0(TMP2) - |.else - |. nop - | bal ->vm_sfi2d_1 - |. nop - | b >7 - |. nop - |.endif - | - |5: - | .FPU ldc1 FRET1, 0(BASE) - | checknum CARG1, ->fff_fallback - |6: // Handle numbers. - |. ld CARG2, 0(TMP2) - | beq TMP2, TMP3, ->fff_resn - |.if FPU - | ldc1 FARG1, 0(TMP2) - |.else - | move CRET1, CARG1 - |.endif - | checknum CARG2, >8 - |. nop - |7: - |.if FPU - | c.olt.d FRET1, FARG1 - | fpins FRET1, FARG1 - |.else - | bal ->vm_sfcmpolt - |. nop - | intins CARG1, CARG2, CRET1 - |.endif - | b <6 - |. daddiu TMP2, TMP2, 8 - | - |8: // Convert integer to number and continue with number loop. - | checkint CARG2, ->fff_fallback - |.if FPU - |. lwc1 FARG1, LO(TMP2) - | b <7 - |. cvt.d.w FARG1, FARG1 - |.else - |. lw CARG2, LO(TMP2) - | bal ->vm_sfi2d_2 - |. nop - | b <7 - |. nop - |.endif - | - |.endmacro - | - | math_minmax math_min, movz, movf.d - | math_minmax math_max, movn, movt.d - | - |//-- String library ----------------------------------------------------- - | - |.ffunc string_byte // Only handle the 1-arg case here. - | ld CARG1, 0(BASE) - | gettp TMP0, CARG1 - | xori AT, NARGS8:RC, 8 - | daddiu TMP0, TMP0, -LJ_TSTR - | or AT, AT, TMP0 - | bnez AT, ->fff_fallback // Need exactly 1 string argument. - |. cleartp STR:CARG1 - | lw TMP0, STR:CARG1->len - | daddiu RA, BASE, -16 - | ld PC, FRAME_PC(BASE) - | sltu RD, r0, TMP0 - | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end). - | addiu RD, RD, 1 - | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8 - | settp TMP1, TISNUM - | b ->fff_res - |. sd TMP1, 0(RA) - | - |.ffunc string_char // Only handle the 1-arg case here. - | ffgccheck - |. nop - | ld CARG1, 0(BASE) - | gettp TMP0, CARG1 - | xori AT, NARGS8:RC, 8 // Exactly 1 argument. - | daddiu TMP0, TMP0, -LJ_TISNUM // Integer. - | li TMP1, 255 - | sextw CARG1, CARG1 - | or AT, AT, TMP0 - | sltu TMP1, TMP1, CARG1 // !(255 < n). - | or AT, AT, TMP1 - | bnez AT, ->fff_fallback - |. li CARG3, 1 - | daddiu CARG2, sp, TMPD_OFS - | sb CARG1, TMPD - |->fff_newstr: - | load_got lj_str_new - | sd BASE, L->base - | sd PC, SAVE_PC - | call_intern lj_str_new // (lua_State *L, char *str, size_t l) - |. move CARG1, L - | // Returns GCstr *. - | ld BASE, L->base - |->fff_resstr: - | li AT, LJ_TSTR - | settp CRET1, AT - | b ->fff_restv - |. move CARG1, CRET1 - | - |.ffunc string_sub - | ffgccheck - |. nop - | addiu AT, NARGS8:RC, -16 - | ld TMP0, 0(BASE) - | bltz AT, ->fff_fallback - |. gettp TMP3, TMP0 - | cleartp STR:CARG1, TMP0 - | ld CARG2, 8(BASE) - | beqz AT, >1 - |. li CARG4, -1 - | ld CARG3, 16(BASE) - | checkint CARG3, ->fff_fallback - |. sextw CARG4, CARG3 - |1: - | checkint CARG2, ->fff_fallback - |. li AT, LJ_TSTR - | bne TMP3, AT, ->fff_fallback - |. sextw CARG3, CARG2 - | lw CARG2, STR:CARG1->len - | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end - | slt AT, CARG4, r0 - | addiu TMP0, CARG2, 1 - | addu TMP1, CARG4, TMP0 - | slt TMP3, CARG3, r0 - | movn CARG4, TMP1, AT // if (end < 0) end += len+1 - | addu TMP1, CARG3, TMP0 - | movn CARG3, TMP1, TMP3 // if (start < 0) start += len+1 - | li TMP2, 1 - | slt AT, CARG4, r0 - | slt TMP3, r0, CARG3 - | movn CARG4, r0, AT // if (end < 0) end = 0 - | movz CARG3, TMP2, TMP3 // if (start < 1) start = 1 - | slt AT, CARG2, CARG4 - | movn CARG4, CARG2, AT // if (end > len) end = len - | daddu CARG2, STR:CARG1, CARG3 - | subu CARG3, CARG4, CARG3 // len = end - start - | daddiu CARG2, CARG2, sizeof(GCstr)-1 - | bgez CARG3, ->fff_newstr - |. addiu CARG3, CARG3, 1 // len++ - |->fff_emptystr: // Return empty string. - | li AT, LJ_TSTR - | daddiu STR:CARG1, DISPATCH, DISPATCH_GL(strempty) - | b ->fff_restv - |. settp CARG1, AT - | - |.macro ffstring_op, name - | .ffunc string_ .. name - | ffgccheck - |. nop - | beqz NARGS8:RC, ->fff_fallback - |. ld CARG2, 0(BASE) - | checkstr STR:CARG2, ->fff_fallback - | daddiu SBUF:CARG1, DISPATCH, DISPATCH_GL(tmpbuf) - | load_got lj_buf_putstr_ .. name - | ld TMP0, SBUF:CARG1->b - | sd L, SBUF:CARG1->L - | sd BASE, L->base - | sd TMP0, SBUF:CARG1->p - | call_intern extern lj_buf_putstr_ .. name - |. sd PC, SAVE_PC - | load_got lj_buf_tostr - | call_intern lj_buf_tostr - |. move SBUF:CARG1, SBUF:CRET1 - | b ->fff_resstr - |. ld BASE, L->base - |.endmacro - | - |ffstring_op reverse - |ffstring_op lower - |ffstring_op upper - | - |//-- Bit library -------------------------------------------------------- - | - |->vm_tobit_fb: - | beqz TMP1, ->fff_fallback - |.if FPU - |. ldc1 FARG1, 0(BASE) - | add.d FARG1, FARG1, TOBIT - | mfc1 CRET1, FARG1 - | jr ra - |. zextw CRET1, CRET1 - |.else - |// FP number to bit conversion for soft-float. - |->vm_tobit: - | dsll TMP0, CARG1, 1 - | li CARG3, 1076 - | dsrl AT, TMP0, 53 - | dsubu CARG3, CARG3, AT - | sltiu AT, CARG3, 54 - | beqz AT, >1 - |. dextm TMP0, TMP0, 0, 20 - | dinsu TMP0, AT, 21, 21 - | slt AT, CARG1, r0 - | dsrlv CRET1, TMP0, CARG3 - | dsubu TMP0, r0, CRET1 - | movn CRET1, TMP0, AT - | jr ra - |. zextw CRET1, CRET1 - |1: - | jr ra - |. move CRET1, r0 - |.endif - | - |.macro .ffunc_bit, name - | .ffunc_1 bit_..name - | gettp TMP0, CARG1 - | beq TMP0, TISNUM, >6 - |. zextw CRET1, CARG1 - | bal ->vm_tobit_fb - |. sltiu TMP1, TMP0, LJ_TISNUM - |6: - |.endmacro - | - |.macro .ffunc_bit_op, name, bins - | .ffunc_bit name - | daddiu TMP2, BASE, 8 - | daddu TMP3, BASE, NARGS8:RC - |1: - | beq TMP2, TMP3, ->fff_resi - |. ld CARG1, 0(TMP2) - | gettp TMP0, CARG1 - |.if FPU - | bne TMP0, TISNUM, >2 - |. daddiu TMP2, TMP2, 8 - | zextw CARG1, CARG1 - | b <1 - |. bins CRET1, CRET1, CARG1 - |2: - | ldc1 FARG1, -8(TMP2) - | sltiu AT, TMP0, LJ_TISNUM - | beqz AT, ->fff_fallback - |. add.d FARG1, FARG1, TOBIT - | mfc1 CARG1, FARG1 - | zextw CARG1, CARG1 - | b <1 - |. bins CRET1, CRET1, CARG1 - |.else - | beq TMP0, TISNUM, >2 - |. move CRET2, CRET1 - | bal ->vm_tobit_fb - |. sltiu TMP1, TMP0, LJ_TISNUM - | move CARG1, CRET2 - |2: - | zextw CARG1, CARG1 - | bins CRET1, CRET1, CARG1 - | b <1 - |. daddiu TMP2, TMP2, 8 - |.endif - |.endmacro - | - |.ffunc_bit_op band, and - |.ffunc_bit_op bor, or - |.ffunc_bit_op bxor, xor - | - |.ffunc_bit bswap - | dsrl TMP0, CRET1, 8 - | dsrl TMP1, CRET1, 24 - | andi TMP2, TMP0, 0xff00 - | dins TMP1, CRET1, 24, 31 - | dins TMP2, TMP0, 16, 23 - | b ->fff_resi - |. or CRET1, TMP1, TMP2 - | - |.ffunc_bit bnot - | not CRET1, CRET1 - | b ->fff_resi - |. zextw CRET1, CRET1 - | - |.macro .ffunc_bit_sh, name, shins, shmod - | .ffunc_2 bit_..name - | gettp TMP0, CARG1 - | beq TMP0, TISNUM, >1 - |. nop - | bal ->vm_tobit_fb - |. sltiu TMP1, TMP0, LJ_TISNUM - | move CARG1, CRET1 - |1: - | gettp TMP0, CARG2 - | bne TMP0, TISNUM, ->fff_fallback - |. zextw CARG2, CARG2 - | sextw CARG1, CARG1 - |.if shmod == 1 - | negu CARG2, CARG2 - |.endif - | shins CRET1, CARG1, CARG2 - | b ->fff_resi - |. zextw CRET1, CRET1 - |.endmacro - | - |.ffunc_bit_sh lshift, sllv, 0 - |.ffunc_bit_sh rshift, srlv, 0 - |.ffunc_bit_sh arshift, srav, 0 - |.ffunc_bit_sh rol, rotrv, 1 - |.ffunc_bit_sh ror, rotrv, 0 - | - |.ffunc_bit tobit - |->fff_resi: - | ld PC, FRAME_PC(BASE) - | daddiu RA, BASE, -16 - | settp CRET1, TISNUM - | b ->fff_res1 - |. sd CRET1, -16(BASE) - | - |//----------------------------------------------------------------------- - |->fff_fallback: // Call fast function fallback handler. - | // BASE = new base, RB = CFUNC, RC = nargs*8 - | ld TMP3, CFUNC:RB->f - | daddu TMP1, BASE, NARGS8:RC - | ld PC, FRAME_PC(BASE) // Fallback may overwrite PC. - | daddiu TMP0, TMP1, 8*LUA_MINSTACK - | ld TMP2, L->maxstack - | sd PC, SAVE_PC // Redundant (but a defined value). - | sltu AT, TMP2, TMP0 - | sd BASE, L->base - | sd TMP1, L->top - | bnez AT, >5 // Need to grow stack. - |. move CFUNCADDR, TMP3 - | jalr TMP3 // (lua_State *L) - |. move CARG1, L - | // Either throws an error, or recovers and returns -1, 0 or nresults+1. - | ld BASE, L->base - | sll RD, CRET1, 3 - | bgtz CRET1, ->fff_res // Returned nresults+1? - |. daddiu RA, BASE, -16 - |1: // Returned 0 or -1: retry fast path. - | ld LFUNC:RB, FRAME_FUNC(BASE) - | ld TMP0, L->top - | cleartp LFUNC:RB - | bnez CRET1, ->vm_call_tail // Returned -1? - |. dsubu NARGS8:RC, TMP0, BASE - | ins_callt // Returned 0: retry fast path. - | - |// Reconstruct previous base for vmeta_call during tailcall. - |->vm_call_tail: - | andi TMP0, PC, FRAME_TYPE - | li AT, -4 - | bnez TMP0, >3 - |. and TMP1, PC, AT - | lbu TMP1, OFS_RA(PC) - | sll TMP1, TMP1, 3 - | addiu TMP1, TMP1, 16 - |3: - | b ->vm_call_dispatch // Resolve again for tailcall. - |. dsubu TMP2, BASE, TMP1 - | - |5: // Grow stack for fallback handler. - | load_got lj_state_growstack - | li CARG2, LUA_MINSTACK - | call_intern lj_state_growstack // (lua_State *L, int n) - |. move CARG1, L - | ld BASE, L->base - | b <1 - |. li CRET1, 0 // Force retry. - | - |->fff_gcstep: // Call GC step function. - | // BASE = new base, RC = nargs*8 - | move MULTRES, ra - | load_got lj_gc_step - | sd BASE, L->base - | daddu TMP0, BASE, NARGS8:RC - | sd PC, SAVE_PC // Redundant (but a defined value). - | sd TMP0, L->top - | call_intern lj_gc_step // (lua_State *L) - |. move CARG1, L - | ld BASE, L->base - | move ra, MULTRES - | ld TMP0, L->top - | ld CFUNC:RB, FRAME_FUNC(BASE) - | cleartp CFUNC:RB - | jr ra - |. dsubu NARGS8:RC, TMP0, BASE - | - |//----------------------------------------------------------------------- - |//-- Special dispatch targets ------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_record: // Dispatch target for recording phase. - |.if JIT - | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | andi AT, TMP3, HOOK_VMEVENT // No recording while in vmevent. - | bnez AT, >5 - | // Decrement the hookcount for consistency, but always do the call. - |. lw TMP2, DISPATCH_GL(hookcount)(DISPATCH) - | andi AT, TMP3, HOOK_ACTIVE - | bnez AT, >1 - |. addiu TMP2, TMP2, -1 - | andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT - | beqz AT, >1 - |. nop - | b >1 - |. sw TMP2, DISPATCH_GL(hookcount)(DISPATCH) - |.endif - | - |->vm_rethook: // Dispatch target for return hooks. - | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | andi AT, TMP3, HOOK_ACTIVE // Hook already active? - | beqz AT, >1 - |5: // Re-dispatch to static ins. - |. ld AT, GG_DISP2STATIC(TMP0) // Assumes TMP0 holds DISPATCH+OP*4. - | jr AT - |. nop - | - |->vm_inshook: // Dispatch target for instr/line hooks. - | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | lw TMP2, DISPATCH_GL(hookcount)(DISPATCH) - | andi AT, TMP3, HOOK_ACTIVE // Hook already active? - | bnez AT, <5 - |. andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT - | beqz AT, <5 - |. addiu TMP2, TMP2, -1 - | beqz TMP2, >1 - |. sw TMP2, DISPATCH_GL(hookcount)(DISPATCH) - | andi AT, TMP3, LUA_MASKLINE - | beqz AT, <5 - |1: - |. load_got lj_dispatch_ins - | sw MULTRES, SAVE_MULTRES - | move CARG2, PC - | sd BASE, L->base - | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. - | call_intern lj_dispatch_ins // (lua_State *L, const BCIns *pc) - |. move CARG1, L - |3: - | ld BASE, L->base - |4: // Re-dispatch to static ins. - | lw INS, -4(PC) - | decode_OP8a TMP1, INS - | decode_OP8b TMP1 - | daddu TMP0, DISPATCH, TMP1 - | decode_RD8a RD, INS - | ld AT, GG_DISP2STATIC(TMP0) - | decode_RA8a RA, INS - | decode_RD8b RD - | jr AT - | decode_RA8b RA - | - |->cont_hook: // Continue from hook yield. - | daddiu PC, PC, 4 - | b <4 - |. lw MULTRES, -24+LO(RB) // Restore MULTRES for *M ins. - | - |->vm_hotloop: // Hot loop counter underflow. - |.if JIT - | ld LFUNC:TMP1, FRAME_FUNC(BASE) - | daddiu CARG1, DISPATCH, GG_DISP2J - | cleartp LFUNC:TMP1 - | sd PC, SAVE_PC - | ld TMP1, LFUNC:TMP1->pc - | move CARG2, PC - | sd L, DISPATCH_J(L)(DISPATCH) - | lbu TMP1, PC2PROTO(framesize)(TMP1) - | load_got lj_trace_hot - | sd BASE, L->base - | dsll TMP1, TMP1, 3 - | daddu TMP1, BASE, TMP1 - | call_intern lj_trace_hot // (jit_State *J, const BCIns *pc) - |. sd TMP1, L->top - | b <3 - |. nop - |.endif - | - | - |->vm_callhook: // Dispatch target for call hooks. - |.if JIT - | b >1 - |.endif - |. move CARG2, PC - | - |->vm_hotcall: // Hot call counter underflow. - |.if JIT - | ori CARG2, PC, 1 - |1: - |.endif - | load_got lj_dispatch_call - | daddu TMP0, BASE, RC - | sd PC, SAVE_PC - | sd BASE, L->base - | dsubu RA, RA, BASE - | sd TMP0, L->top - | call_intern lj_dispatch_call // (lua_State *L, const BCIns *pc) - |. move CARG1, L - | // Returns ASMFunction. - | ld BASE, L->base - | ld TMP0, L->top - | sd r0, SAVE_PC // Invalidate for subsequent line hook. - | dsubu NARGS8:RC, TMP0, BASE - | daddu RA, BASE, RA - | ld LFUNC:RB, FRAME_FUNC(BASE) - | cleartp LFUNC:RB - | jr CRET1 - |. lw INS, -4(PC) - | - |->cont_stitch: // Trace stitching. - |.if JIT - | // RA = resultptr, RB = meta base - | lw INS, -4(PC) - | ld TRACE:TMP2, -40(RB) // Save previous trace. - | decode_RA8a RC, INS - | daddiu AT, MULTRES, -8 - | cleartp TRACE:TMP2 - | decode_RA8b RC - | beqz AT, >2 - |. daddu RC, BASE, RC // Call base. - |1: // Move results down. - | ld CARG1, 0(RA) - | daddiu AT, AT, -8 - | daddiu RA, RA, 8 - | sd CARG1, 0(RC) - | bnez AT, <1 - |. daddiu RC, RC, 8 - |2: - | decode_RA8a RA, INS - | decode_RB8a RB, INS - | decode_RA8b RA - | decode_RB8b RB - | daddu RA, RA, RB - | daddu RA, BASE, RA - |3: - | sltu AT, RC, RA - | bnez AT, >9 // More results wanted? - |. nop - | - | lhu TMP3, TRACE:TMP2->traceno - | lhu RD, TRACE:TMP2->link - | beq RD, TMP3, ->cont_nop // Blacklisted. - |. load_got lj_dispatch_stitch - | bnez RD, =>BC_JLOOP // Jump to stitched trace. - |. sll RD, RD, 3 - | - | // Stitch a new trace to the previous trace. - | sw TMP3, DISPATCH_J(exitno)(DISPATCH) - | sd L, DISPATCH_J(L)(DISPATCH) - | sd BASE, L->base - | daddiu CARG1, DISPATCH, GG_DISP2J - | call_intern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) - |. move CARG2, PC - | b ->cont_nop - |. ld BASE, L->base - | - |9: - | sd TISNIL, 0(RC) - | b <3 - |. daddiu RC, RC, 8 - |.endif - | - |->vm_profhook: // Dispatch target for profiler hook. -#if LJ_HASPROFILE - | load_got lj_dispatch_profile - | sd MULTRES, SAVE_MULTRES - | move CARG2, PC - | sd BASE, L->base - | call_intern lj_dispatch_profile // (lua_State *L, const BCIns *pc) - |. move CARG1, L - | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. - | daddiu PC, PC, -4 - | b ->cont_nop - |. ld BASE, L->base -#endif - | - |//----------------------------------------------------------------------- - |//-- Trace exit handler ------------------------------------------------- - |//----------------------------------------------------------------------- - | - |.macro savex_, a, b - |.if FPU - | sdc1 f..a, a*8(sp) - | sdc1 f..b, b*8(sp) - | sd r..a, 32*8+a*8(sp) - | sd r..b, 32*8+b*8(sp) - |.else - | sd r..a, a*8(sp) - | sd r..b, b*8(sp) - |.endif - |.endmacro - | - |->vm_exit_handler: - |.if JIT - |.if FPU - | daddiu sp, sp, -(32*8+32*8) - |.else - | daddiu sp, sp, -(32*8) - |.endif - | savex_ 0, 1 - | savex_ 2, 3 - | savex_ 4, 5 - | savex_ 6, 7 - | savex_ 8, 9 - | savex_ 10, 11 - | savex_ 12, 13 - | savex_ 14, 15 - | savex_ 16, 17 - | savex_ 18, 19 - | savex_ 20, 21 - | savex_ 22, 23 - | savex_ 24, 25 - | savex_ 26, 27 - | savex_ 28, 30 - |.if FPU - | sdc1 f29, 29*8(sp) - | sdc1 f31, 31*8(sp) - | sd r0, 32*8+31*8(sp) // Clear RID_TMP. - | daddiu TMP2, sp, 32*8+32*8 // Recompute original value of sp. - | sd TMP2, 32*8+29*8(sp) // Store sp in RID_SP - |.else - | sd r0, 31*8(sp) // Clear RID_TMP. - | daddiu TMP2, sp, 32*8 // Recompute original value of sp. - | sd TMP2, 29*8(sp) // Store sp in RID_SP - |.endif - | li_vmstate EXIT - | daddiu DISPATCH, JGL, -GG_DISP2G-32768 - | lw TMP1, 0(TMP2) // Load exit number. - | st_vmstate - | ld L, DISPATCH_GL(cur_L)(DISPATCH) - | ld BASE, DISPATCH_GL(jit_base)(DISPATCH) - | load_got lj_trace_exit - | sd L, DISPATCH_J(L)(DISPATCH) - | sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number. - | sd BASE, L->base - | sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number. - | daddiu CARG1, DISPATCH, GG_DISP2J - | sd r0, DISPATCH_GL(jit_base)(DISPATCH) - | call_intern lj_trace_exit // (jit_State *J, ExitState *ex) - |. move CARG2, sp - | // Returns MULTRES (unscaled) or negated error code. - | ld TMP1, L->cframe - | li AT, -4 - | ld BASE, L->base - | and sp, TMP1, AT - | ld PC, SAVE_PC // Get SAVE_PC. - | b >1 - |. sd L, SAVE_L // Set SAVE_L (on-trace resume/yield). - |.endif - |->vm_exit_interp: - |.if JIT - | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set. - | ld L, SAVE_L - | daddiu DISPATCH, JGL, -GG_DISP2G-32768 - | sd BASE, L->base - |1: - | bltz CRET1, >9 // Check for error from exit. - |. ld LFUNC:RB, FRAME_FUNC(BASE) - | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | dsll MULTRES, CRET1, 3 - | cleartp LFUNC:RB - | sd MULTRES, SAVE_MULTRES - | li TISNIL, LJ_TNIL - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | .FPU mtc1 TMP3, TOBIT - | ld TMP1, LFUNC:RB->pc - | sd r0, DISPATCH_GL(jit_base)(DISPATCH) - | ld KBASE, PC2PROTO(k)(TMP1) - | .FPU cvt.d.s TOBIT, TOBIT - | // Modified copy of ins_next which handles function header dispatch, too. - | lw INS, 0(PC) - | daddiu PC, PC, 4 - | // Assumes TISNIL == ~LJ_VMST_INTERP == -1 - | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) - | decode_OP8a TMP1, INS - | decode_OP8b TMP1 - | sltiu TMP2, TMP1, BC_FUNCF*8 - | daddu TMP0, DISPATCH, TMP1 - | decode_RD8a RD, INS - | ld AT, 0(TMP0) - | decode_RA8a RA, INS - | beqz TMP2, >2 - |. decode_RA8b RA - | jr AT - |. decode_RD8b RD - |2: - | sltiu TMP2, TMP1, (BC_FUNCC+2)*8 // Fast function? - | bnez TMP2, >3 - |. ld TMP1, FRAME_PC(BASE) - | // Check frame below fast function. - | andi TMP0, TMP1, FRAME_TYPE - | bnez TMP0, >3 // Trace stitching continuation? - |. nop - | // Otherwise set KBASE for Lua function below fast function. - | lw TMP2, -4(TMP1) - | decode_RA8a TMP0, TMP2 - | decode_RA8b TMP0 - | dsubu TMP1, BASE, TMP0 - | ld LFUNC:TMP2, -32(TMP1) - | cleartp LFUNC:TMP2 - | ld TMP1, LFUNC:TMP2->pc - | ld KBASE, PC2PROTO(k)(TMP1) - |3: - | daddiu RC, MULTRES, -8 - | jr AT - |. daddu RA, RA, BASE - | - |9: // Rethrow error from the right C frame. - | load_got lj_err_throw - | negu CARG2, CRET1 - | call_intern lj_err_throw // (lua_State *L, int errcode) - |. move CARG1, L - |.endif - | - |//----------------------------------------------------------------------- - |//-- Math helper functions ---------------------------------------------- - |//----------------------------------------------------------------------- - | - |// Hard-float round to integer. - |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1. - |.macro vm_round_hf, func - | lui TMP0, 0x4330 // Hiword of 2^52 (double). - | dsll TMP0, TMP0, 32 - | dmtc1 TMP0, f4 - | abs.d FRET2, FARG1 // |x| - | dmfc1 AT, FARG1 - | c.olt.d 0, FRET2, f4 - | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52 - | bc1f 0, >1 // Truncate only if |x| < 2^52. - |. sub.d FRET1, FRET1, f4 - | slt AT, AT, r0 - |.if "func" == "ceil" - | lui TMP0, 0xbff0 // Hiword of -1 (double). Preserves -0. - |.else - | lui TMP0, 0x3ff0 // Hiword of +1 (double). - |.endif - |.if "func" == "trunc" - | dsll TMP0, TMP0, 32 - | dmtc1 TMP0, f4 - | c.olt.d 0, FRET2, FRET1 // |x| < result? - | sub.d FRET2, FRET1, f4 - | movt.d FRET1, FRET2, 0 // If yes, subtract +1. - | neg.d FRET2, FRET1 - | jr ra - |. movn.d FRET1, FRET2, AT // Merge sign bit back in. - |.else - | neg.d FRET2, FRET1 - | dsll TMP0, TMP0, 32 - | dmtc1 TMP0, f4 - | movn.d FRET1, FRET2, AT // Merge sign bit back in. - |.if "func" == "ceil" - | c.olt.d 0, FRET1, FARG1 // x > result? - |.else - | c.olt.d 0, FARG1, FRET1 // x < result? - |.endif - | sub.d FRET2, FRET1, f4 // If yes, subtract +-1. - | jr ra - |. movt.d FRET1, FRET2, 0 - |.endif - |1: - | jr ra - |. mov.d FRET1, FARG1 - |.endmacro - | - |.macro vm_round, func - |.if FPU - | vm_round_hf, func - |.endif - |.endmacro - | - |->vm_floor: - | vm_round floor - |->vm_ceil: - | vm_round ceil - |->vm_trunc: - |.if JIT - | vm_round trunc - |.endif - | - |// Soft-float integer to number conversion. - |.macro sfi2d, ARG - |.if not FPU - | beqz ARG, >9 // Handle zero first. - |. sra TMP0, ARG, 31 - | xor TMP1, ARG, TMP0 - | dsubu TMP1, TMP1, TMP0 // Absolute value in TMP1. - | dclz ARG, TMP1 - | addiu ARG, ARG, -11 - | li AT, 0x3ff+63-11-1 - | dsllv TMP1, TMP1, ARG // Align mantissa left with leading 1. - | subu ARG, AT, ARG // Exponent - 1. - | ins ARG, TMP0, 11, 11 // Sign | Exponent. - | dsll ARG, ARG, 52 // Align left. - | jr ra - |. daddu ARG, ARG, TMP1 // Add mantissa, increment exponent. - |9: - | jr ra - |. nop - |.endif - |.endmacro - | - |// Input CARG1. Output: CARG1. Temporaries: AT, TMP0, TMP1. - |->vm_sfi2d_1: - | sfi2d CARG1 - | - |// Input CARG2. Output: CARG2. Temporaries: AT, TMP0, TMP1. - |->vm_sfi2d_2: - | sfi2d CARG2 - | - |// Soft-float comparison. Equivalent to c.eq.d. - |// Input: CARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1. - |->vm_sfcmpeq: - |.if not FPU - | dsll AT, CARG1, 1 - | dsll TMP0, CARG2, 1 - | or TMP1, AT, TMP0 - | beqz TMP1, >8 // Both args +-0: return 1. - |. lui TMP1, 0xffe0 - | dsll TMP1, TMP1, 32 - | sltu AT, TMP1, AT - | sltu TMP0, TMP1, TMP0 - | or TMP1, AT, TMP0 - | bnez TMP1, >9 // Either arg is NaN: return 0; - |. xor AT, CARG1, CARG2 - | jr ra - |. sltiu CRET1, AT, 1 // Same values: return 1. - |8: - | jr ra - |. li CRET1, 1 - |9: - | jr ra - |. li CRET1, 0 - |.endif - | - |// Soft-float comparison. Equivalent to c.ult.d and c.olt.d. - |// Input: CARG1, CARG2. Output: CRET1. Temporaries: AT, TMP0, TMP1, CRET2. - |->vm_sfcmpult: - |.if not FPU - | b >1 - |. li CRET2, 1 - |.endif - | - |->vm_sfcmpolt: - |.if not FPU - | li CRET2, 0 - |1: - | dsll AT, CARG1, 1 - | dsll TMP0, CARG2, 1 - | or TMP1, AT, TMP0 - | beqz TMP1, >8 // Both args +-0: return 0. - |. lui TMP1, 0xffe0 - | dsll TMP1, TMP1, 32 - | sltu AT, TMP1, AT - | sltu TMP0, TMP1, TMP0 - | or TMP1, AT, TMP0 - | bnez TMP1, >9 // Either arg is NaN: return 0 or 1; - |. and AT, CARG1, CARG2 - | bltz AT, >5 // Both args negative? - |. nop - | jr ra - |. slt CRET1, CARG1, CARG2 - |5: // Swap conditions if both operands are negative. - | jr ra - |. slt CRET1, CARG2, CARG1 - |8: - | jr ra - |. nop - |9: - | jr ra - |. move CRET1, CRET2 - |.endif - | - |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a. - |// Input: CARG1, CARG2, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1. - |->vm_sfcmpolex: - |.if not FPU - | dsll AT, CARG1, 1 - | dsll TMP0, CARG2, 1 - | or TMP1, AT, TMP0 - | beqz TMP1, >8 // Both args +-0: return 1. - |. lui TMP1, 0xffe0 - | dsll TMP1, TMP1, 32 - | sltu AT, TMP1, AT - | sltu TMP0, TMP1, TMP0 - | or TMP1, AT, TMP0 - | bnez TMP1, >9 // Either arg is NaN: return 0; - |. and AT, CARG1, CARG2 - | xor AT, AT, TMP3 - | bltz AT, >5 // Both args negative? - |. nop - | jr ra - |. slt CRET1, CARG2, CARG1 - |5: // Swap conditions if both operands are negative. - | jr ra - |. slt CRET1, CARG1, CARG2 - |8: - | jr ra - |. li CRET1, 1 - |9: - | jr ra - |. li CRET1, 0 - |.endif - | - |//----------------------------------------------------------------------- - |//-- Miscellaneous functions -------------------------------------------- - |//----------------------------------------------------------------------- - | - |//----------------------------------------------------------------------- - |//-- FFI helper functions ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |// Handler for callback functions. Callback slot number in r1, g in r2. - |->vm_ffi_callback: - |.if FFI - |.type CTSTATE, CTState, PC - | saveregs - | ld CTSTATE, GL:r2->ctype_state - | daddiu DISPATCH, r2, GG_G2DISP - | load_got lj_ccallback_enter - | sw r1, CTSTATE->cb.slot - | sd CARG1, CTSTATE->cb.gpr[0] - | .FPU sdc1 FARG1, CTSTATE->cb.fpr[0] - | sd CARG2, CTSTATE->cb.gpr[1] - | .FPU sdc1 FARG2, CTSTATE->cb.fpr[1] - | sd CARG3, CTSTATE->cb.gpr[2] - | .FPU sdc1 FARG3, CTSTATE->cb.fpr[2] - | sd CARG4, CTSTATE->cb.gpr[3] - | .FPU sdc1 FARG4, CTSTATE->cb.fpr[3] - | sd CARG5, CTSTATE->cb.gpr[4] - | .FPU sdc1 FARG5, CTSTATE->cb.fpr[4] - | sd CARG6, CTSTATE->cb.gpr[5] - | .FPU sdc1 FARG6, CTSTATE->cb.fpr[5] - | sd CARG7, CTSTATE->cb.gpr[6] - | .FPU sdc1 FARG7, CTSTATE->cb.fpr[6] - | sd CARG8, CTSTATE->cb.gpr[7] - | .FPU sdc1 FARG8, CTSTATE->cb.fpr[7] - | daddiu TMP0, sp, CFRAME_SPACE - | sd TMP0, CTSTATE->cb.stack - | sd r0, SAVE_PC // Any value outside of bytecode is ok. - | move CARG2, sp - | call_intern lj_ccallback_enter // (CTState *cts, void *cf) - |. move CARG1, CTSTATE - | // Returns lua_State *. - | ld BASE, L:CRET1->base - | ld RC, L:CRET1->top - | move L, CRET1 - | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | ld LFUNC:RB, FRAME_FUNC(BASE) - | .FPU mtc1 TMP3, TOBIT - | li TISNIL, LJ_TNIL - | li TISNUM, LJ_TISNUM - | li_vmstate INTERP - | subu RC, RC, BASE - | cleartp LFUNC:RB - | st_vmstate - | .FPU cvt.d.s TOBIT, TOBIT - | ins_callt - |.endif - | - |->cont_ffi_callback: // Return from FFI callback. - |.if FFI - | load_got lj_ccallback_leave - | ld CTSTATE, DISPATCH_GL(ctype_state)(DISPATCH) - | sd BASE, L->base - | sd RB, L->top - | sd L, CTSTATE->L - | move CARG2, RA - | call_intern lj_ccallback_leave // (CTState *cts, TValue *o) - |. move CARG1, CTSTATE - | .FPU ldc1 FRET1, CTSTATE->cb.fpr[0] - | ld CRET1, CTSTATE->cb.gpr[0] - | .FPU ldc1 FRET2, CTSTATE->cb.fpr[1] - | b ->vm_leave_unw - |. ld CRET2, CTSTATE->cb.gpr[1] - |.endif - | - |->vm_ffi_call: // Call C function via FFI. - | // Caveat: needs special frame unwinding, see below. - |.if FFI - | .type CCSTATE, CCallState, CARG1 - | lw TMP1, CCSTATE->spadj - | lbu CARG2, CCSTATE->nsp - | move TMP2, sp - | dsubu sp, sp, TMP1 - | sd ra, -8(TMP2) - | sll CARG2, CARG2, 3 - | sd r16, -16(TMP2) - | sd CCSTATE, -24(TMP2) - | move r16, TMP2 - | daddiu TMP1, CCSTATE, offsetof(CCallState, stack) - | move TMP2, sp - | beqz CARG2, >2 - |. daddu TMP3, TMP1, CARG2 - |1: - | ld TMP0, 0(TMP1) - | daddiu TMP1, TMP1, 8 - | sltu AT, TMP1, TMP3 - | sd TMP0, 0(TMP2) - | bnez AT, <1 - |. daddiu TMP2, TMP2, 8 - |2: - | ld CFUNCADDR, CCSTATE->func - | .FPU ldc1 FARG1, CCSTATE->gpr[0] - | ld CARG2, CCSTATE->gpr[1] - | .FPU ldc1 FARG2, CCSTATE->gpr[1] - | ld CARG3, CCSTATE->gpr[2] - | .FPU ldc1 FARG3, CCSTATE->gpr[2] - | ld CARG4, CCSTATE->gpr[3] - | .FPU ldc1 FARG4, CCSTATE->gpr[3] - | ld CARG5, CCSTATE->gpr[4] - | .FPU ldc1 FARG5, CCSTATE->gpr[4] - | ld CARG6, CCSTATE->gpr[5] - | .FPU ldc1 FARG6, CCSTATE->gpr[5] - | ld CARG7, CCSTATE->gpr[6] - | .FPU ldc1 FARG7, CCSTATE->gpr[6] - | ld CARG8, CCSTATE->gpr[7] - | .FPU ldc1 FARG8, CCSTATE->gpr[7] - | jalr CFUNCADDR - |. ld CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. - | ld CCSTATE:TMP1, -24(r16) - | ld TMP2, -16(r16) - | ld ra, -8(r16) - | sd CRET1, CCSTATE:TMP1->gpr[0] - | sd CRET2, CCSTATE:TMP1->gpr[1] - |.if FPU - | sdc1 FRET1, CCSTATE:TMP1->fpr[0] - | sdc1 FRET2, CCSTATE:TMP1->fpr[1] - |.else - | sd CARG1, CCSTATE:TMP1->gpr[2] // 2nd FP struct field for soft-float. - |.endif - | move sp, r16 - | jr ra - |. move r16, TMP2 - |.endif - |// Note: vm_ffi_call must be the last function in this object file! - | - |//----------------------------------------------------------------------- -} - -/* Generate the code for a single instruction. */ -static void build_ins(BuildCtx *ctx, BCOp op, int defop) -{ - int vk = 0; - |=>defop: - - switch (op) { - - /* -- Comparison ops ---------------------------------------------------- */ - - /* Remember: all ops branch for a true comparison, fall through otherwise. */ - - case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: - | // RA = src1*8, RD = src2*8, JMP with RD = target - |.macro bc_comp, FRA, FRD, ARGRA, ARGRD, movop, fmovop, fcomp, sfcomp - | daddu RA, BASE, RA - | daddu RD, BASE, RD - | ld ARGRA, 0(RA) - | ld ARGRD, 0(RD) - | lhu TMP2, OFS_RD(PC) - | gettp CARG3, ARGRA - | gettp CARG4, ARGRD - | bne CARG3, TISNUM, >2 - |. daddiu PC, PC, 4 - | bne CARG4, TISNUM, >5 - |. decode_RD4b TMP2 - | sextw ARGRA, ARGRA - | sextw ARGRD, ARGRD - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | slt AT, CARG1, CARG2 - | addu TMP2, TMP2, TMP3 - | movop TMP2, r0, AT - |1: - | daddu PC, PC, TMP2 - | ins_next - | - |2: // RA is not an integer. - | sltiu AT, CARG3, LJ_TISNUM - | beqz AT, ->vmeta_comp - |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | sltiu AT, CARG4, LJ_TISNUM - | beqz AT, >4 - |. decode_RD4b TMP2 - |.if FPU - | ldc1 FRA, 0(RA) - | ldc1 FRD, 0(RD) - |.endif - |3: // RA and RD are both numbers. - |.if FPU - | fcomp f20, f22 - | addu TMP2, TMP2, TMP3 - | b <1 - |. fmovop TMP2, r0 - |.else - | bal sfcomp - |. addu TMP2, TMP2, TMP3 - | b <1 - |. movop TMP2, r0, CRET1 - |.endif - | - |4: // RA is a number, RD is not a number. - | bne CARG4, TISNUM, ->vmeta_comp - | // RA is a number, RD is an integer. Convert RD to a number. - |.if FPU - |. lwc1 FRD, LO(RD) - | ldc1 FRA, 0(RA) - | b <3 - |. cvt.d.w FRD, FRD - |.else - |.if "ARGRD" == "CARG1" - |. sextw CARG1, CARG1 - | bal ->vm_sfi2d_1 - |. nop - |.else - |. sextw CARG2, CARG2 - | bal ->vm_sfi2d_2 - |. nop - |.endif - | b <3 - |. nop - |.endif - | - |5: // RA is an integer, RD is not an integer - | sltiu AT, CARG4, LJ_TISNUM - | beqz AT, ->vmeta_comp - |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | // RA is an integer, RD is a number. Convert RA to a number. - |.if FPU - | lwc1 FRA, LO(RA) - | ldc1 FRD, 0(RD) - | b <3 - | cvt.d.w FRA, FRA - |.else - |.if "ARGRA" == "CARG1" - | bal ->vm_sfi2d_1 - |. sextw CARG1, CARG1 - |.else - | bal ->vm_sfi2d_2 - |. sextw CARG2, CARG2 - |.endif - | b <3 - |. nop - |.endif - |.endmacro - | - if (op == BC_ISLT) { - | bc_comp f20, f22, CARG1, CARG2, movz, movf, c.olt.d, ->vm_sfcmpolt - } else if (op == BC_ISGE) { - | bc_comp f20, f22, CARG1, CARG2, movn, movt, c.olt.d, ->vm_sfcmpolt - } else if (op == BC_ISLE) { - | bc_comp f22, f20, CARG2, CARG1, movn, movt, c.ult.d, ->vm_sfcmpult - } else { - | bc_comp f22, f20, CARG2, CARG1, movz, movf, c.ult.d, ->vm_sfcmpult - } - break; - - case BC_ISEQV: case BC_ISNEV: - vk = op == BC_ISEQV; - | // RA = src1*8, RD = src2*8, JMP with RD = target - | daddu RA, BASE, RA - | daddiu PC, PC, 4 - | daddu RD, BASE, RD - | ld CARG1, 0(RA) - | lhu TMP2, -4+OFS_RD(PC) - | ld CARG2, 0(RD) - | gettp CARG3, CARG1 - | gettp CARG4, CARG2 - | sltu AT, TISNUM, CARG3 - | sltu TMP1, TISNUM, CARG4 - | or AT, AT, TMP1 - if (vk) { - | beqz AT, ->BC_ISEQN_Z - } else { - | beqz AT, ->BC_ISNEN_Z - } - | // Either or both types are not numbers. - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - |.if FFI - |. li AT, LJ_TCDATA - | beq CARG3, AT, ->vmeta_equal_cd - |.endif - | decode_RD4b TMP2 - |.if FFI - | beq CARG4, AT, ->vmeta_equal_cd - |. nop - |.endif - | bne CARG1, CARG2, >2 - |. addu TMP2, TMP2, TMP3 - | // Tag and value are equal. - if (vk) { - |->BC_ISEQV_Z: - | daddu PC, PC, TMP2 - } - |1: - | ins_next - | - |2: // Check if the tags are the same and it's a table or userdata. - | xor AT, CARG3, CARG4 // Same type? - | sltiu TMP0, CARG3, LJ_TISTABUD+1 // Table or userdata? - | movn TMP0, r0, AT - if (vk) { - | beqz TMP0, <1 - } else { - | beqz TMP0, ->BC_ISEQV_Z // Reuse code from opposite instruction. - } - | // Different tables or userdatas. Need to check __eq metamethod. - | // Field metatable must be at same offset for GCtab and GCudata! - |. cleartp TAB:TMP1, CARG1 - | ld TAB:TMP3, TAB:TMP1->metatable - if (vk) { - | beqz TAB:TMP3, <1 // No metatable? - |. nop - | lbu TMP3, TAB:TMP3->nomm - | andi TMP3, TMP3, 1<1 // Or 'no __eq' flag set? - } else { - | beqz TAB:TMP3,->BC_ISEQV_Z // No metatable? - |. nop - | lbu TMP3, TAB:TMP3->nomm - | andi TMP3, TMP3, 1<BC_ISEQV_Z // Or 'no __eq' flag set? - } - |. nop - | b ->vmeta_equal // Handle __eq metamethod. - |. li TMP0, 1-vk // ne = 0 or 1. - break; - - case BC_ISEQS: case BC_ISNES: - vk = op == BC_ISEQS; - | // RA = src*8, RD = str_const*8 (~), JMP with RD = target - | daddu RA, BASE, RA - | daddiu PC, PC, 4 - | ld CARG1, 0(RA) - | dsubu RD, KBASE, RD - | lhu TMP2, -4+OFS_RD(PC) - | ld CARG2, -8(RD) // KBASE-8-str_const*8 - |.if FFI - | gettp TMP0, CARG1 - | li AT, LJ_TCDATA - |.endif - | li TMP1, LJ_TSTR - | decode_RD4b TMP2 - |.if FFI - | beq TMP0, AT, ->vmeta_equal_cd - |.endif - |. settp CARG2, TMP1 - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | xor TMP1, CARG1, CARG2 - | addu TMP2, TMP2, TMP3 - if (vk) { - | movn TMP2, r0, TMP1 - } else { - | movz TMP2, r0, TMP1 - } - | daddu PC, PC, TMP2 - | ins_next - break; - - case BC_ISEQN: case BC_ISNEN: - vk = op == BC_ISEQN; - | // RA = src*8, RD = num_const*8, JMP with RD = target - | daddu RA, BASE, RA - | daddu RD, KBASE, RD - | ld CARG1, 0(RA) - | ld CARG2, 0(RD) - | lhu TMP2, OFS_RD(PC) - | gettp CARG3, CARG1 - | gettp CARG4, CARG2 - | daddiu PC, PC, 4 - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - if (vk) { - |->BC_ISEQN_Z: - } else { - |->BC_ISNEN_Z: - } - | bne CARG3, TISNUM, >3 - |. decode_RD4b TMP2 - | bne CARG4, TISNUM, >6 - |. addu TMP2, TMP2, TMP3 - | xor AT, CARG1, CARG2 - if (vk) { - | movn TMP2, r0, AT - |1: - | daddu PC, PC, TMP2 - |2: - } else { - | movz TMP2, r0, AT - |1: - |2: - | daddu PC, PC, TMP2 - } - | ins_next - | - |3: // RA is not an integer. - | sltu AT, CARG3, TISNUM - |.if FFI - | beqz AT, >8 - |.else - | beqz AT, <2 - |.endif - |. addu TMP2, TMP2, TMP3 - | sltu AT, CARG4, TISNUM - |.if FPU - | ldc1 f20, 0(RA) - | ldc1 f22, 0(RD) - |.endif - | beqz AT, >5 - |. nop - |4: // RA and RD are both numbers. - |.if FPU - | c.eq.d f20, f22 - | b <1 - if (vk) { - |. movf TMP2, r0 - } else { - |. movt TMP2, r0 - } - |.else - | bal ->vm_sfcmpeq - |. nop - | b <1 - if (vk) { - |. movz TMP2, r0, CRET1 - } else { - |. movn TMP2, r0, CRET1 - } - |.endif - | - |5: // RA is a number, RD is not a number. - |.if FFI - | bne CARG4, TISNUM, >9 - |.else - | bne CARG4, TISNUM, <2 - |.endif - | // RA is a number, RD is an integer. Convert RD to a number. - |.if FPU - |. lwc1 f22, LO(RD) - | b <4 - |. cvt.d.w f22, f22 - |.else - |. sextw CARG2, CARG2 - | bal ->vm_sfi2d_2 - |. nop - | b <4 - |. nop - |.endif - | - |6: // RA is an integer, RD is not an integer - | sltu AT, CARG4, TISNUM - |.if FFI - | beqz AT, >9 - |.else - | beqz AT, <2 - |.endif - | // RA is an integer, RD is a number. Convert RA to a number. - |.if FPU - |. lwc1 f20, LO(RA) - | ldc1 f22, 0(RD) - | b <4 - | cvt.d.w f20, f20 - |.else - |. sextw CARG1, CARG1 - | bal ->vm_sfi2d_1 - |. nop - | b <4 - |. nop - |.endif - | - |.if FFI - |8: - | li AT, LJ_TCDATA - | bne CARG3, AT, <2 - |. nop - | b ->vmeta_equal_cd - |. nop - |9: - | li AT, LJ_TCDATA - | bne CARG4, AT, <2 - |. nop - | b ->vmeta_equal_cd - |. nop - |.endif - break; - - case BC_ISEQP: case BC_ISNEP: - vk = op == BC_ISEQP; - | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target - | daddu RA, BASE, RA - | srl TMP1, RD, 3 - | ld TMP0, 0(RA) - | lhu TMP2, OFS_RD(PC) - | not TMP1, TMP1 - | gettp TMP0, TMP0 - | daddiu PC, PC, 4 - |.if FFI - | li AT, LJ_TCDATA - | beq TMP0, AT, ->vmeta_equal_cd - |.endif - |. xor TMP0, TMP0, TMP1 - | decode_RD4b TMP2 - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | addu TMP2, TMP2, TMP3 - if (vk) { - | movn TMP2, r0, TMP0 - } else { - | movz TMP2, r0, TMP0 - } - | daddu PC, PC, TMP2 - | ins_next - break; - - /* -- Unary test and copy ops ------------------------------------------- */ - - case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: - | // RA = dst*8 or unused, RD = src*8, JMP with RD = target - | daddu RD, BASE, RD - | lhu TMP2, OFS_RD(PC) - | ld TMP0, 0(RD) - | daddiu PC, PC, 4 - | gettp TMP0, TMP0 - | sltiu TMP0, TMP0, LJ_TISTRUECOND - if (op == BC_IST || op == BC_ISF) { - | decode_RD4b TMP2 - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | addu TMP2, TMP2, TMP3 - if (op == BC_IST) { - | movz TMP2, r0, TMP0 - } else { - | movn TMP2, r0, TMP0 - } - | daddu PC, PC, TMP2 - } else { - | ld CRET1, 0(RD) - if (op == BC_ISTC) { - | beqz TMP0, >1 - } else { - | bnez TMP0, >1 - } - |. daddu RA, BASE, RA - | decode_RD4b TMP2 - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | addu TMP2, TMP2, TMP3 - | sd CRET1, 0(RA) - | daddu PC, PC, TMP2 - |1: - } - | ins_next - break; - - case BC_ISTYPE: - | // RA = src*8, RD = -type*8 - | daddu TMP2, BASE, RA - | srl TMP1, RD, 3 - | ld TMP0, 0(TMP2) - | ins_next1 - | gettp TMP0, TMP0 - | daddu AT, TMP0, TMP1 - | bnez AT, ->vmeta_istype - |. ins_next2 - break; - case BC_ISNUM: - | // RA = src*8, RD = -(TISNUM-1)*8 - | daddu TMP2, BASE, RA - | ld TMP0, 0(TMP2) - | ins_next1 - | checknum TMP0, ->vmeta_istype - |. ins_next2 - break; - - /* -- Unary ops --------------------------------------------------------- */ - - case BC_MOV: - | // RA = dst*8, RD = src*8 - | daddu RD, BASE, RD - | daddu RA, BASE, RA - | ld CRET1, 0(RD) - | ins_next1 - | sd CRET1, 0(RA) - | ins_next2 - break; - case BC_NOT: - | // RA = dst*8, RD = src*8 - | daddu RD, BASE, RD - | daddu RA, BASE, RA - | ld TMP0, 0(RD) - | li AT, LJ_TTRUE - | gettp TMP0, TMP0 - | sltu TMP0, AT, TMP0 - | addiu TMP0, TMP0, 1 - | dsll TMP0, TMP0, 47 - | not TMP0, TMP0 - | ins_next1 - | sd TMP0, 0(RA) - | ins_next2 - break; - case BC_UNM: - | // RA = dst*8, RD = src*8 - | daddu RB, BASE, RD - | ld CARG1, 0(RB) - | daddu RA, BASE, RA - | gettp CARG3, CARG1 - | bne CARG3, TISNUM, >2 - |. lui TMP1, 0x8000 - | sextw CARG1, CARG1 - | beq CARG1, TMP1, ->vmeta_unm // Meta handler deals with -2^31. - |. negu CARG1, CARG1 - | zextw CARG1, CARG1 - | settp CARG1, TISNUM - |1: - | ins_next1 - | sd CARG1, 0(RA) - | ins_next2 - |2: - | sltiu AT, CARG3, LJ_TISNUM - | beqz AT, ->vmeta_unm - |. dsll TMP1, TMP1, 32 - | b <1 - |. xor CARG1, CARG1, TMP1 - break; - case BC_LEN: - | // RA = dst*8, RD = src*8 - | daddu CARG2, BASE, RD - | daddu RA, BASE, RA - | ld TMP0, 0(CARG2) - | gettp TMP1, TMP0 - | daddiu AT, TMP1, -LJ_TSTR - | bnez AT, >2 - |. cleartp STR:CARG1, TMP0 - | lw CRET1, STR:CARG1->len - |1: - | settp CRET1, TISNUM - | ins_next1 - | sd CRET1, 0(RA) - | ins_next2 - |2: - | daddiu AT, TMP1, -LJ_TTAB - | bnez AT, ->vmeta_len - |. nop -#if LJ_52 - | ld TAB:TMP2, TAB:CARG1->metatable - | bnez TAB:TMP2, >9 - |. nop - |3: -#endif - |->BC_LEN_Z: - | load_got lj_tab_len - | call_intern lj_tab_len // (GCtab *t) - |. nop - | // Returns uint32_t (but less than 2^31). - | b <1 - |. nop -#if LJ_52 - |9: - | lbu TMP0, TAB:TMP2->nomm - | andi TMP0, TMP0, 1<vmeta_len - |. nop -#endif - break; - - /* -- Binary ops -------------------------------------------------------- */ - - |.macro fpmod, a, b, c - | bal ->vm_floor // floor(b/c) - |. div.d FARG1, b, c - | mul.d a, FRET1, c - | sub.d a, b, a // b - floor(b/c)*c - |.endmacro - - |.macro sfpmod - | daddiu sp, sp, -16 - | - | load_got __divdf3 - | sd CARG1, 0(sp) - | call_extern - |. sd CARG2, 8(sp) - | - | load_got floor - | call_extern - |. move CARG1, CRET1 - | - | load_got __muldf3 - | move CARG1, CRET1 - | call_extern - |. ld CARG2, 8(sp) - | - | load_got __subdf3 - | ld CARG1, 0(sp) - | call_extern - |. move CARG2, CRET1 - | - | daddiu sp, sp, 16 - |.endmacro - - |.macro ins_arithpre, label - ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); - | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 - ||switch (vk) { - ||case 0: - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | // RA = dst*8, RB = src1*8, RC = num_const*8 - | daddu RB, BASE, RB - |.if "label" ~= "none" - | b label - |.endif - |. daddu RC, KBASE, RC - || break; - ||case 1: - | decode_RB8a RC, INS - | decode_RB8b RC - | decode_RDtoRC8 RB, RD - | // RA = dst*8, RB = num_const*8, RC = src1*8 - | daddu RC, BASE, RC - |.if "label" ~= "none" - | b label - |.endif - |. daddu RB, KBASE, RB - || break; - ||default: - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | // RA = dst*8, RB = src1*8, RC = src2*8 - | daddu RB, BASE, RB - |.if "label" ~= "none" - | b label - |.endif - |. daddu RC, BASE, RC - || break; - ||} - |.endmacro - | - |.macro ins_arith, intins, fpins, fpcall, label - | ins_arithpre none - | - |.if "label" ~= "none" - |label: - |.endif - | - |// Used in 5. - | ld CARG1, 0(RB) - | ld CARG2, 0(RC) - | gettp TMP0, CARG1 - | gettp TMP1, CARG2 - | - |.if "intins" ~= "div" - | - | // Check for two integers. - | sextw CARG3, CARG1 - | bne TMP0, TISNUM, >5 - |. sextw CARG4, CARG2 - | bne TMP1, TISNUM, >5 - | - |.if "intins" == "addu" - |. intins CRET1, CARG3, CARG4 - | xor TMP1, CRET1, CARG3 // ((y^a) & (y^b)) < 0: overflow. - | xor TMP2, CRET1, CARG4 - | and TMP1, TMP1, TMP2 - | bltz TMP1, ->vmeta_arith - |. daddu RA, BASE, RA - |.elif "intins" == "subu" - |. intins CRET1, CARG3, CARG4 - | xor TMP1, CRET1, CARG3 // ((y^a) & (a^b)) < 0: overflow. - | xor TMP2, CARG3, CARG4 - | and TMP1, TMP1, TMP2 - | bltz TMP1, ->vmeta_arith - |. daddu RA, BASE, RA - |.elif "intins" == "mult" - |. intins CARG3, CARG4 - | mflo CRET1 - | mfhi TMP2 - | sra TMP1, CRET1, 31 - | bne TMP1, TMP2, ->vmeta_arith - |. daddu RA, BASE, RA - |.else - |. load_got lj_vm_modi - | beqz CARG4, ->vmeta_arith - |. daddu RA, BASE, RA - | move CARG1, CARG3 - | call_extern - |. move CARG2, CARG4 - |.endif - | - | zextw CRET1, CRET1 - | settp CRET1, TISNUM - | ins_next1 - | sd CRET1, 0(RA) - |3: - | ins_next2 - | - |.endif - | - |5: // Check for two numbers. - | .FPU ldc1 f20, 0(RB) - | sltu AT, TMP0, TISNUM - | sltu TMP0, TMP1, TISNUM - | .FPU ldc1 f22, 0(RC) - | and AT, AT, TMP0 - | beqz AT, ->vmeta_arith - |. daddu RA, BASE, RA - | - |.if FPU - | fpins FRET1, f20, f22 - |.elif "fpcall" == "sfpmod" - | sfpmod - |.else - | load_got fpcall - | call_extern - |. nop - |.endif - | - | ins_next1 - |.if "intins" ~= "div" - | b <3 - |.endif - |.if FPU - |. sdc1 FRET1, 0(RA) - |.else - |. sd CRET1, 0(RA) - |.endif - |.if "intins" == "div" - | ins_next2 - |.endif - | - |.endmacro - - case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: - | ins_arith addu, add.d, __adddf3, none - break; - case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: - | ins_arith subu, sub.d, __subdf3, none - break; - case BC_MULVN: case BC_MULNV: case BC_MULVV: - | ins_arith mult, mul.d, __muldf3, none - break; - case BC_DIVVN: - | ins_arith div, div.d, __divdf3, ->BC_DIVVN_Z - break; - case BC_DIVNV: case BC_DIVVV: - | ins_arithpre ->BC_DIVVN_Z - break; - case BC_MODVN: - | ins_arith modi, fpmod, sfpmod, ->BC_MODVN_Z - break; - case BC_MODNV: case BC_MODVV: - | ins_arithpre ->BC_MODVN_Z - break; - case BC_POW: - | ins_arithpre none - | ld CARG1, 0(RB) - | ld CARG2, 0(RC) - | gettp TMP0, CARG1 - | gettp TMP1, CARG2 - | sltiu TMP0, TMP0, LJ_TISNUM - | sltiu TMP1, TMP1, LJ_TISNUM - | and AT, TMP0, TMP1 - | load_got pow - | beqz AT, ->vmeta_arith - |. daddu RA, BASE, RA - |.if FPU - | ldc1 FARG1, 0(RB) - | ldc1 FARG2, 0(RC) - |.endif - | call_extern - |. nop - | ins_next1 - |.if FPU - | sdc1 FRET1, 0(RA) - |.else - | sd CRET1, 0(RA) - |.endif - | ins_next2 - break; - - case BC_CAT: - | // RA = dst*8, RB = src_start*8, RC = src_end*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | dsubu CARG3, RC, RB - | sd BASE, L->base - | daddu CARG2, BASE, RC - | move MULTRES, RB - |->BC_CAT_Z: - | load_got lj_meta_cat - | srl CARG3, CARG3, 3 - | sd PC, SAVE_PC - | call_intern lj_meta_cat // (lua_State *L, TValue *top, int left) - |. move CARG1, L - | // Returns NULL (finished) or TValue * (metamethod). - | bnez CRET1, ->vmeta_binop - |. ld BASE, L->base - | daddu RB, BASE, MULTRES - | ld CRET1, 0(RB) - | daddu RA, BASE, RA - | ins_next1 - | sd CRET1, 0(RA) - | ins_next2 - break; - - /* -- Constant ops ------------------------------------------------------ */ - - case BC_KSTR: - | // RA = dst*8, RD = str_const*8 (~) - | dsubu TMP1, KBASE, RD - | ins_next1 - | li TMP2, LJ_TSTR - | ld TMP0, -8(TMP1) // KBASE-8-str_const*8 - | daddu RA, BASE, RA - | settp TMP0, TMP2 - | sd TMP0, 0(RA) - | ins_next2 - break; - case BC_KCDATA: - |.if FFI - | // RA = dst*8, RD = cdata_const*8 (~) - | dsubu TMP1, KBASE, RD - | ins_next1 - | ld TMP0, -8(TMP1) // KBASE-8-cdata_const*8 - | li TMP2, LJ_TCDATA - | daddu RA, BASE, RA - | settp TMP0, TMP2 - | sd TMP0, 0(RA) - | ins_next2 - |.endif - break; - case BC_KSHORT: - | // RA = dst*8, RD = int16_literal*8 - | sra RD, INS, 16 - | daddu RA, BASE, RA - | zextw RD, RD - | ins_next1 - | settp RD, TISNUM - | sd RD, 0(RA) - | ins_next2 - break; - case BC_KNUM: - | // RA = dst*8, RD = num_const*8 - | daddu RD, KBASE, RD - | daddu RA, BASE, RA - | ld CRET1, 0(RD) - | ins_next1 - | sd CRET1, 0(RA) - | ins_next2 - break; - case BC_KPRI: - | // RA = dst*8, RD = primitive_type*8 (~) - | daddu RA, BASE, RA - | dsll TMP0, RD, 44 - | not TMP0, TMP0 - | ins_next1 - | sd TMP0, 0(RA) - | ins_next2 - break; - case BC_KNIL: - | // RA = base*8, RD = end*8 - | daddu RA, BASE, RA - | sd TISNIL, 0(RA) - | daddiu RA, RA, 8 - | daddu RD, BASE, RD - |1: - | sd TISNIL, 0(RA) - | slt AT, RA, RD - | bnez AT, <1 - |. daddiu RA, RA, 8 - | ins_next_ - break; - - /* -- Upvalue and function ops ------------------------------------------ */ - - case BC_UGET: - | // RA = dst*8, RD = uvnum*8 - | ld LFUNC:RB, FRAME_FUNC(BASE) - | daddu RA, BASE, RA - | cleartp LFUNC:RB - | daddu RD, RD, LFUNC:RB - | ld UPVAL:RB, LFUNC:RD->uvptr - | ins_next1 - | ld TMP1, UPVAL:RB->v - | ld CRET1, 0(TMP1) - | sd CRET1, 0(RA) - | ins_next2 - break; - case BC_USETV: - | // RA = uvnum*8, RD = src*8 - | ld LFUNC:RB, FRAME_FUNC(BASE) - | daddu RD, BASE, RD - | cleartp LFUNC:RB - | daddu RA, RA, LFUNC:RB - | ld UPVAL:RB, LFUNC:RA->uvptr - | ld CRET1, 0(RD) - | lbu TMP3, UPVAL:RB->marked - | ld CARG2, UPVAL:RB->v - | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv) - | lbu TMP0, UPVAL:RB->closed - | gettp TMP2, RD - | sd CRET1, 0(CARG2) - | li AT, LJ_GC_BLACK|1 - | or TMP3, TMP3, TMP0 - | beq TMP3, AT, >2 // Upvalue is closed and black? - |. daddiu TMP2, TMP2, -(LJ_TNUMX+1) - |1: - | ins_next - | - |2: // Check if new value is collectable. - | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1) - | beqz AT, <1 // tvisgcv(v) - |. cleartp GCOBJ:TMP1, RB - | lbu TMP3, GCOBJ:TMP1->gch.marked - | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v) - | beqz TMP3, <1 - |. load_got lj_gc_barrieruv - | // Crossed a write barrier. Move the barrier forward. - | call_intern lj_gc_barrieruv // (global_State *g, TValue *tv) - |. daddiu CARG1, DISPATCH, GG_DISP2G - | b <1 - |. nop - break; - case BC_USETS: - | // RA = uvnum*8, RD = str_const*8 (~) - | ld LFUNC:RB, FRAME_FUNC(BASE) - | dsubu TMP1, KBASE, RD - | cleartp LFUNC:RB - | daddu RA, RA, LFUNC:RB - | ld UPVAL:RB, LFUNC:RA->uvptr - | ld STR:TMP1, -8(TMP1) // KBASE-8-str_const*8 - | lbu TMP2, UPVAL:RB->marked - | ld CARG2, UPVAL:RB->v - | lbu TMP3, STR:TMP1->marked - | andi AT, TMP2, LJ_GC_BLACK // isblack(uv) - | lbu TMP2, UPVAL:RB->closed - | li TMP0, LJ_TSTR - | settp TMP1, TMP0 - | bnez AT, >2 - |. sd TMP1, 0(CARG2) - |1: - | ins_next - | - |2: // Check if string is white and ensure upvalue is closed. - | beqz TMP2, <1 - |. andi AT, TMP3, LJ_GC_WHITES // iswhite(str) - | beqz AT, <1 - |. load_got lj_gc_barrieruv - | // Crossed a write barrier. Move the barrier forward. - | call_intern lj_gc_barrieruv // (global_State *g, TValue *tv) - |. daddiu CARG1, DISPATCH, GG_DISP2G - | b <1 - |. nop - break; - case BC_USETN: - | // RA = uvnum*8, RD = num_const*8 - | ld LFUNC:RB, FRAME_FUNC(BASE) - | daddu RD, KBASE, RD - | cleartp LFUNC:RB - | daddu RA, RA, LFUNC:RB - | ld UPVAL:RB, LFUNC:RA->uvptr - | ld CRET1, 0(RD) - | ld TMP1, UPVAL:RB->v - | ins_next1 - | sd CRET1, 0(TMP1) - | ins_next2 - break; - case BC_USETP: - | // RA = uvnum*8, RD = primitive_type*8 (~) - | ld LFUNC:RB, FRAME_FUNC(BASE) - | dsll TMP0, RD, 44 - | cleartp LFUNC:RB - | daddu RA, RA, LFUNC:RB - | not TMP0, TMP0 - | ld UPVAL:RB, LFUNC:RA->uvptr - | ins_next1 - | ld TMP1, UPVAL:RB->v - | sd TMP0, 0(TMP1) - | ins_next2 - break; - - case BC_UCLO: - | // RA = level*8, RD = target - | ld TMP2, L->openupval - | branch_RD // Do this first since RD is not saved. - | load_got lj_func_closeuv - | sd BASE, L->base - | beqz TMP2, >1 - |. move CARG1, L - | call_intern lj_func_closeuv // (lua_State *L, TValue *level) - |. daddu CARG2, BASE, RA - | ld BASE, L->base - |1: - | ins_next - break; - - case BC_FNEW: - | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype) - | load_got lj_func_newL_gc - | dsubu TMP1, KBASE, RD - | ld CARG3, FRAME_FUNC(BASE) - | ld CARG2, -8(TMP1) // KBASE-8-tab_const*8 - | sd BASE, L->base - | sd PC, SAVE_PC - | cleartp CARG3 - | // (lua_State *L, GCproto *pt, GCfuncL *parent) - | call_intern lj_func_newL_gc - |. move CARG1, L - | // Returns GCfuncL *. - | li TMP0, LJ_TFUNC - | ld BASE, L->base - | ins_next1 - | settp CRET1, TMP0 - | daddu RA, BASE, RA - | sd CRET1, 0(RA) - | ins_next2 - break; - - /* -- Table ops --------------------------------------------------------- */ - - case BC_TNEW: - case BC_TDUP: - | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~) - | ld TMP0, DISPATCH_GL(gc.total)(DISPATCH) - | ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) - | sd BASE, L->base - | sd PC, SAVE_PC - | sltu AT, TMP0, TMP1 - | beqz AT, >5 - |1: - if (op == BC_TNEW) { - | load_got lj_tab_new - | srl CARG2, RD, 3 - | andi CARG2, CARG2, 0x7ff - | li TMP0, 0x801 - | addiu AT, CARG2, -0x7ff - | srl CARG3, RD, 14 - | movz CARG2, TMP0, AT - | // (lua_State *L, int32_t asize, uint32_t hbits) - | call_intern lj_tab_new - |. move CARG1, L - | // Returns Table *. - } else { - | load_got lj_tab_dup - | dsubu TMP1, KBASE, RD - | move CARG1, L - | call_intern lj_tab_dup // (lua_State *L, Table *kt) - |. ld CARG2, -8(TMP1) // KBASE-8-str_const*8 - | // Returns Table *. - } - | li TMP0, LJ_TTAB - | ld BASE, L->base - | ins_next1 - | daddu RA, BASE, RA - | settp CRET1, TMP0 - | sd CRET1, 0(RA) - | ins_next2 - |5: - | load_got lj_gc_step_fixtop - | move MULTRES, RD - | call_intern lj_gc_step_fixtop // (lua_State *L) - |. move CARG1, L - | b <1 - |. move RD, MULTRES - break; - - case BC_GGET: - | // RA = dst*8, RD = str_const*8 (~) - case BC_GSET: - | // RA = src*8, RD = str_const*8 (~) - | ld LFUNC:TMP2, FRAME_FUNC(BASE) - | dsubu TMP1, KBASE, RD - | ld STR:RC, -8(TMP1) // KBASE-8-str_const*8 - | cleartp LFUNC:TMP2 - | ld TAB:RB, LFUNC:TMP2->env - if (op == BC_GGET) { - | b ->BC_TGETS_Z - } else { - | b ->BC_TSETS_Z - } - |. daddu RA, BASE, RA - break; - - case BC_TGETV: - | // RA = dst*8, RB = table*8, RC = key*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | daddu CARG2, BASE, RB - | daddu CARG3, BASE, RC - | ld TAB:RB, 0(CARG2) - | ld TMP2, 0(CARG3) - | daddu RA, BASE, RA - | checktab TAB:RB, ->vmeta_tgetv - | gettp TMP3, TMP2 - | bne TMP3, TISNUM, >5 // Integer key? - |. lw TMP0, TAB:RB->asize - | sextw TMP2, TMP2 - | ld TMP1, TAB:RB->array - | sltu AT, TMP2, TMP0 - | sll TMP2, TMP2, 3 - | beqz AT, ->vmeta_tgetv // Integer key and in array part? - |. daddu TMP2, TMP1, TMP2 - | ld AT, 0(TMP2) - | beq AT, TISNIL, >2 - |. ld CRET1, 0(TMP2) - |1: - | ins_next1 - | sd CRET1, 0(RA) - | ins_next2 - | - |2: // Check for __index if table value is nil. - | ld TAB:TMP2, TAB:RB->metatable - | beqz TAB:TMP2, <1 // No metatable: done. - |. nop - | lbu TMP0, TAB:TMP2->nomm - | andi TMP0, TMP0, 1<vmeta_tgetv - |. nop - | - |5: - | li AT, LJ_TSTR - | bne TMP3, AT, ->vmeta_tgetv - |. cleartp RC, TMP2 - | b ->BC_TGETS_Z // String key? - |. nop - break; - case BC_TGETS: - | // RA = dst*8, RB = table*8, RC = str_const*8 (~) - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RC8a RC, INS - | daddu CARG2, BASE, RB - | decode_RC8b RC - | ld TAB:RB, 0(CARG2) - | dsubu CARG3, KBASE, RC - | daddu RA, BASE, RA - | ld STR:RC, -8(CARG3) // KBASE-8-str_const*8 - | checktab TAB:RB, ->vmeta_tgets1 - |->BC_TGETS_Z: - | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 - | lw TMP0, TAB:RB->hmask - | lw TMP1, STR:RC->hash - | ld NODE:TMP2, TAB:RB->node - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask - | sll TMP0, TMP1, 5 - | sll TMP1, TMP1, 3 - | subu TMP1, TMP0, TMP1 - | li TMP3, LJ_TSTR - | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) - | settp STR:RC, TMP3 // Tagged key to look for. - |1: - | ld CARG1, NODE:TMP2->key - | ld CRET1, NODE:TMP2->val - | ld NODE:TMP1, NODE:TMP2->next - | bne CARG1, RC, >4 - |. ld TAB:TMP3, TAB:RB->metatable - | beq CRET1, TISNIL, >5 // Key found, but nil value? - |. nop - |3: - | ins_next1 - | sd CRET1, 0(RA) - | ins_next2 - | - |4: // Follow hash chain. - | bnez NODE:TMP1, <1 - |. move NODE:TMP2, NODE:TMP1 - | // End of hash chain: key not found, nil result. - | - |5: // Check for __index if table value is nil. - | beqz TAB:TMP3, <3 // No metatable: done. - |. move CRET1, TISNIL - | lbu TMP0, TAB:TMP3->nomm - | andi TMP0, TMP0, 1<vmeta_tgets - |. nop - break; - case BC_TGETB: - | // RA = dst*8, RB = table*8, RC = index*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | daddu CARG2, BASE, RB - | decode_RDtoRC8 RC, RD - | ld TAB:RB, 0(CARG2) - | daddu RA, BASE, RA - | srl TMP0, RC, 3 - | checktab TAB:RB, ->vmeta_tgetb - | lw TMP1, TAB:RB->asize - | ld TMP2, TAB:RB->array - | sltu AT, TMP0, TMP1 - | beqz AT, ->vmeta_tgetb - |. daddu RC, TMP2, RC - | ld AT, 0(RC) - | beq AT, TISNIL, >5 - |. ld CRET1, 0(RC) - |1: - | ins_next1 - | sd CRET1, 0(RA) - | ins_next2 - | - |5: // Check for __index if table value is nil. - | ld TAB:TMP2, TAB:RB->metatable - | beqz TAB:TMP2, <1 // No metatable: done. - |. nop - | lbu TMP1, TAB:TMP2->nomm - | andi TMP1, TMP1, 1<vmeta_tgetb // Caveat: preserve TMP0 and CARG2! - |. nop - break; - case BC_TGETR: - | // RA = dst*8, RB = table*8, RC = key*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | daddu RB, BASE, RB - | daddu RC, BASE, RC - | ld TAB:CARG1, 0(RB) - | lw CARG2, LO(RC) - | daddu RA, BASE, RA - | cleartp TAB:CARG1 - | lw TMP0, TAB:CARG1->asize - | ld TMP1, TAB:CARG1->array - | sltu AT, CARG2, TMP0 - | sll TMP2, CARG2, 3 - | beqz AT, ->vmeta_tgetr // In array part? - |. daddu CRET1, TMP1, TMP2 - | ld CARG2, 0(CRET1) - |->BC_TGETR_Z: - | ins_next1 - | sd CARG2, 0(RA) - | ins_next2 - break; - - case BC_TSETV: - | // RA = src*8, RB = table*8, RC = key*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | daddu CARG2, BASE, RB - | daddu CARG3, BASE, RC - | ld RB, 0(CARG2) - | ld TMP2, 0(CARG3) - | daddu RA, BASE, RA - | checktab RB, ->vmeta_tsetv - | checkint TMP2, >5 - |. sextw RC, TMP2 - | lw TMP0, TAB:RB->asize - | ld TMP1, TAB:RB->array - | sltu AT, RC, TMP0 - | sll TMP2, RC, 3 - | beqz AT, ->vmeta_tsetv // Integer key and in array part? - |. daddu TMP1, TMP1, TMP2 - | ld TMP0, 0(TMP1) - | lbu TMP3, TAB:RB->marked - | beq TMP0, TISNIL, >3 - |. ld CRET1, 0(RA) - |1: - | andi AT, TMP3, LJ_GC_BLACK // isblack(table) - | bnez AT, >7 - |. sd CRET1, 0(TMP1) - |2: - | ins_next - | - |3: // Check for __newindex if previous value is nil. - | ld TAB:TMP2, TAB:RB->metatable - | beqz TAB:TMP2, <1 // No metatable: done. - |. nop - | lbu TMP2, TAB:TMP2->nomm - | andi TMP2, TMP2, 1<vmeta_tsetv - |. nop - | - |5: - | gettp AT, TMP2 - | daddiu AT, AT, -LJ_TSTR - | bnez AT, ->vmeta_tsetv - |. nop - | b ->BC_TSETS_Z // String key? - |. cleartp STR:RC, TMP2 - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, TMP3, TMP0, <2 - break; - case BC_TSETS: - | // RA = src*8, RB = table*8, RC = str_const*8 (~) - | decode_RB8a RB, INS - | decode_RB8b RB - | daddu CARG2, BASE, RB - | decode_RC8a RC, INS - | ld TAB:RB, 0(CARG2) - | decode_RC8b RC - | dsubu CARG3, KBASE, RC - | ld RC, -8(CARG3) // KBASE-8-str_const*8 - | daddu RA, BASE, RA - | cleartp STR:RC - | checktab TAB:RB, ->vmeta_tsets1 - |->BC_TSETS_Z: - | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8 - | lw TMP0, TAB:RB->hmask - | lw TMP1, STR:RC->hash - | ld NODE:TMP2, TAB:RB->node - | sb r0, TAB:RB->nomm // Clear metamethod cache. - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask - | sll TMP0, TMP1, 5 - | sll TMP1, TMP1, 3 - | subu TMP1, TMP0, TMP1 - | li TMP3, LJ_TSTR - | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) - | settp STR:RC, TMP3 // Tagged key to look for. - |.if FPU - | ldc1 f20, 0(RA) - |.else - | ld CRET1, 0(RA) - |.endif - |1: - | ld TMP0, NODE:TMP2->key - | ld CARG2, NODE:TMP2->val - | ld NODE:TMP1, NODE:TMP2->next - | bne TMP0, RC, >5 - |. lbu TMP3, TAB:RB->marked - | beq CARG2, TISNIL, >4 // Key found, but nil value? - |. ld TAB:TMP0, TAB:RB->metatable - |2: - | andi AT, TMP3, LJ_GC_BLACK // isblack(table) - | bnez AT, >7 - |.if FPU - |. sdc1 f20, NODE:TMP2->val - |.else - |. sd CRET1, NODE:TMP2->val - |.endif - |3: - | ins_next - | - |4: // Check for __newindex if previous value is nil. - | beqz TAB:TMP0, <2 // No metatable: done. - |. nop - | lbu TMP0, TAB:TMP0->nomm - | andi TMP0, TMP0, 1<vmeta_tsets - |. nop - | - |5: // Follow hash chain. - | bnez NODE:TMP1, <1 - |. move NODE:TMP2, NODE:TMP1 - | // End of hash chain: key not found, add a new one - | - | // But check for __newindex first. - | ld TAB:TMP2, TAB:RB->metatable - | beqz TAB:TMP2, >6 // No metatable: continue. - |. daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | lbu TMP0, TAB:TMP2->nomm - | andi TMP0, TMP0, 1<vmeta_tsets // 'no __newindex' flag NOT set: check. - |6: - | load_got lj_tab_newkey - | sd RC, 0(CARG3) - | sd BASE, L->base - | move CARG2, TAB:RB - | sd PC, SAVE_PC - | call_intern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k - |. move CARG1, L - | // Returns TValue *. - | ld BASE, L->base - |.if FPU - | b <3 // No 2nd write barrier needed. - |. sdc1 f20, 0(CRET1) - |.else - | ld CARG1, 0(RA) - | b <3 // No 2nd write barrier needed. - |. sd CARG1, 0(CRET1) - |.endif - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, TMP3, TMP0, <3 - break; - case BC_TSETB: - | // RA = src*8, RB = table*8, RC = index*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | daddu CARG2, BASE, RB - | decode_RDtoRC8 RC, RD - | ld TAB:RB, 0(CARG2) - | daddu RA, BASE, RA - | srl TMP0, RC, 3 - | checktab RB, ->vmeta_tsetb - | lw TMP1, TAB:RB->asize - | ld TMP2, TAB:RB->array - | sltu AT, TMP0, TMP1 - | beqz AT, ->vmeta_tsetb - |. daddu RC, TMP2, RC - | ld TMP1, 0(RC) - | lbu TMP3, TAB:RB->marked - | beq TMP1, TISNIL, >5 - |1: - |. ld CRET1, 0(RA) - | andi AT, TMP3, LJ_GC_BLACK // isblack(table) - | bnez AT, >7 - |. sd CRET1, 0(RC) - |2: - | ins_next - | - |5: // Check for __newindex if previous value is nil. - | ld TAB:TMP2, TAB:RB->metatable - | beqz TAB:TMP2, <1 // No metatable: done. - |. nop - | lbu TMP1, TAB:TMP2->nomm - | andi TMP1, TMP1, 1<vmeta_tsetb // Caveat: preserve TMP0 and CARG2! - |. nop - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, TMP3, TMP0, <2 - break; - case BC_TSETR: - | // RA = dst*8, RB = table*8, RC = key*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | daddu CARG1, BASE, RB - | daddu CARG3, BASE, RC - | ld TAB:CARG2, 0(CARG1) - | lw CARG3, LO(CARG3) - | cleartp TAB:CARG2 - | lbu TMP3, TAB:CARG2->marked - | lw TMP0, TAB:CARG2->asize - | ld TMP1, TAB:CARG2->array - | andi AT, TMP3, LJ_GC_BLACK // isblack(table) - | bnez AT, >7 - |. daddu RA, BASE, RA - |2: - | sltu AT, CARG3, TMP0 - | sll TMP2, CARG3, 3 - | beqz AT, ->vmeta_tsetr // In array part? - |. daddu CRET1, TMP1, TMP2 - |->BC_TSETR_Z: - | ld CARG1, 0(RA) - | ins_next1 - | sd CARG1, 0(CRET1) - | ins_next2 - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP3, TMP0, <2 - break; - - case BC_TSETM: - | // RA = base*8 (table at base-1), RD = num_const*8 (start index) - | daddu RA, BASE, RA - |1: - | daddu TMP3, KBASE, RD - | ld TAB:CARG2, -8(RA) // Guaranteed to be a table. - | addiu TMP0, MULTRES, -8 - | lw TMP3, LO(TMP3) // Integer constant is in lo-word. - | beqz TMP0, >4 // Nothing to copy? - |. srl CARG3, TMP0, 3 - | cleartp CARG2 - | addu CARG3, CARG3, TMP3 - | lw TMP2, TAB:CARG2->asize - | sll TMP1, TMP3, 3 - | lbu TMP3, TAB:CARG2->marked - | ld CARG1, TAB:CARG2->array - | sltu AT, TMP2, CARG3 - | bnez AT, >5 - |. daddu TMP2, RA, TMP0 - | daddu TMP1, TMP1, CARG1 - | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table) - |3: // Copy result slots to table. - | ld CRET1, 0(RA) - | daddiu RA, RA, 8 - | sltu AT, RA, TMP2 - | sd CRET1, 0(TMP1) - | bnez AT, <3 - |. daddiu TMP1, TMP1, 8 - | bnez TMP0, >7 - |. nop - |4: - | ins_next - | - |5: // Need to resize array part. - | load_got lj_tab_reasize - | sd BASE, L->base - | sd PC, SAVE_PC - | move BASE, RD - | call_intern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) - |. move CARG1, L - | // Must not reallocate the stack. - | move RD, BASE - | b <1 - |. ld BASE, L->base // Reload BASE for lack of a saved register. - | - |7: // Possible table write barrier for any value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP3, TMP0, <4 - break; - - /* -- Calls and vararg handling ----------------------------------------- */ - - case BC_CALLM: - | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8 - | decode_RDtoRC8 NARGS8:RC, RD - | b ->BC_CALL_Z - |. addu NARGS8:RC, NARGS8:RC, MULTRES - break; - case BC_CALL: - | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8 - | decode_RDtoRC8 NARGS8:RC, RD - |->BC_CALL_Z: - | move TMP2, BASE - | daddu BASE, BASE, RA - | ld LFUNC:RB, 0(BASE) - | daddiu BASE, BASE, 16 - | addiu NARGS8:RC, NARGS8:RC, -8 - | checkfunc RB, ->vmeta_call - | ins_call - break; - - case BC_CALLMT: - | // RA = base*8, (RB = 0,) RC = extra_nargs*8 - | addu NARGS8:RD, NARGS8:RD, MULTRES // BC_CALLT gets RC from RD. - | // Fall through. Assumes BC_CALLT follows. - break; - case BC_CALLT: - | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 - | daddu RA, BASE, RA - | ld RB, 0(RA) - | move NARGS8:RC, RD - | ld TMP1, FRAME_PC(BASE) - | daddiu RA, RA, 16 - | addiu NARGS8:RC, NARGS8:RC, -8 - | checktp CARG3, RB, -LJ_TFUNC, ->vmeta_callt - |->BC_CALLT_Z: - | andi TMP0, TMP1, FRAME_TYPE // Caveat: preserve TMP0 until the 'or'. - | lbu TMP3, LFUNC:CARG3->ffid - | bnez TMP0, >7 - |. xori TMP2, TMP1, FRAME_VARG - |1: - | sd RB, FRAME_FUNC(BASE) // Copy function down, but keep PC. - | sltiu AT, TMP3, 2 // (> FF_C) Calling a fast function? - | move TMP2, BASE - | move RB, CARG3 - | beqz NARGS8:RC, >3 - |. move TMP3, NARGS8:RC - |2: - | ld CRET1, 0(RA) - | daddiu RA, RA, 8 - | addiu TMP3, TMP3, -8 - | sd CRET1, 0(TMP2) - | bnez TMP3, <2 - |. daddiu TMP2, TMP2, 8 - |3: - | or TMP0, TMP0, AT - | beqz TMP0, >5 - |. nop - |4: - | ins_callt - | - |5: // Tailcall to a fast function with a Lua frame below. - | lw INS, -4(TMP1) - | decode_RA8a RA, INS - | decode_RA8b RA - | dsubu TMP1, BASE, RA - | ld TMP1, -32(TMP1) - | cleartp LFUNC:TMP1 - | ld TMP1, LFUNC:TMP1->pc - | b <4 - |. ld KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE. - | - |7: // Tailcall from a vararg function. - | andi AT, TMP2, FRAME_TYPEP - | bnez AT, <1 // Vararg frame below? - |. dsubu TMP2, BASE, TMP2 // Relocate BASE down. - | move BASE, TMP2 - | ld TMP1, FRAME_PC(TMP2) - | b <1 - |. andi TMP0, TMP1, FRAME_TYPE - break; - - case BC_ITERC: - | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8)) - | move TMP2, BASE // Save old BASE fir vmeta_call. - | daddu BASE, BASE, RA - | ld RB, -24(BASE) - | ld CARG1, -16(BASE) - | ld CARG2, -8(BASE) - | li NARGS8:RC, 16 // Iterators get 2 arguments. - | sd RB, 0(BASE) // Copy callable. - | sd CARG1, 16(BASE) // Copy state. - | sd CARG2, 24(BASE) // Copy control var. - | daddiu BASE, BASE, 16 - | checkfunc RB, ->vmeta_call - | ins_call - break; - - case BC_ITERN: - | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) - |.if JIT - | // NYI: add hotloop, record BC_ITERN. - |.endif - | daddu RA, BASE, RA - | ld TAB:RB, -16(RA) - | lw RC, -8+LO(RA) // Get index from control var. - | cleartp TAB:RB - | daddiu PC, PC, 4 - | lw TMP0, TAB:RB->asize - | ld TMP1, TAB:RB->array - | dsll CARG3, TISNUM, 47 - |1: // Traverse array part. - | sltu AT, RC, TMP0 - | beqz AT, >5 // Index points after array part? - |. sll TMP3, RC, 3 - | daddu TMP3, TMP1, TMP3 - | ld CARG1, 0(TMP3) - | lhu RD, -4+OFS_RD(PC) - | or TMP2, RC, CARG3 - | beq CARG1, TISNIL, <1 // Skip holes in array part. - |. addiu RC, RC, 1 - | sd TMP2, 0(RA) - | sd CARG1, 8(RA) - | or TMP0, RC, CARG3 - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | decode_RD4b RD - | daddu RD, RD, TMP3 - | sw TMP0, -8+LO(RA) // Update control var. - | daddu PC, PC, RD - |3: - | ins_next - | - |5: // Traverse hash part. - | lw TMP1, TAB:RB->hmask - | subu RC, RC, TMP0 - | ld TMP2, TAB:RB->node - |6: - | sltu AT, TMP1, RC // End of iteration? Branch to ITERL+1. - | bnez AT, <3 - |. sll TMP3, RC, 5 - | sll RB, RC, 3 - | subu TMP3, TMP3, RB - | daddu NODE:TMP3, TMP3, TMP2 - | ld CARG1, 0(NODE:TMP3) - | lhu RD, -4+OFS_RD(PC) - | beq CARG1, TISNIL, <6 // Skip holes in hash part. - |. addiu RC, RC, 1 - | ld CARG2, NODE:TMP3->key - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | sd CARG1, 8(RA) - | addu RC, RC, TMP0 - | decode_RD4b RD - | addu RD, RD, TMP3 - | sd CARG2, 0(RA) - | daddu PC, PC, RD - | b <3 - |. sw RC, -8+LO(RA) // Update control var. - break; - - case BC_ISNEXT: - | // RA = base*8, RD = target (points to ITERN) - | daddu RA, BASE, RA - | srl TMP0, RD, 1 - | ld CFUNC:CARG1, -24(RA) - | daddu TMP0, PC, TMP0 - | ld CARG2, -16(RA) - | ld CARG3, -8(RA) - | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535) - | checkfunc CFUNC:CARG1, >5 - | gettp CARG2, CARG2 - | daddiu CARG2, CARG2, -LJ_TTAB - | lbu TMP1, CFUNC:CARG1->ffid - | daddiu CARG3, CARG3, -LJ_TNIL - | or AT, CARG2, CARG3 - | daddiu TMP1, TMP1, -FF_next_N - | or AT, AT, TMP1 - | bnez AT, >5 - |. lui TMP1, 0xfffe - | daddu PC, TMP0, TMP2 - | ori TMP1, TMP1, 0x7fff - | dsll TMP1, TMP1, 32 - | sd TMP1, -8(RA) - |1: - | ins_next - |5: // Despecialize bytecode if any of the checks fail. - | li TMP3, BC_JMP - | li TMP1, BC_ITERC - | sb TMP3, -4+OFS_OP(PC) - | daddu PC, TMP0, TMP2 - | b <1 - |. sb TMP1, OFS_OP(PC) - break; - - case BC_VARG: - | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 - | ld TMP0, FRAME_PC(BASE) - | decode_RDtoRC8 RC, RD - | decode_RB8a RB, INS - | daddu RC, BASE, RC - | decode_RB8b RB - | daddu RA, BASE, RA - | daddiu RC, RC, FRAME_VARG - | daddu TMP2, RA, RB - | daddiu TMP3, BASE, -16 // TMP3 = vtop - | dsubu RC, RC, TMP0 // RC = vbase - | // Note: RC may now be even _above_ BASE if nargs was < numparams. - | beqz RB, >5 // Copy all varargs? - |. dsubu TMP1, TMP3, RC - | daddiu TMP2, TMP2, -16 - |1: // Copy vararg slots to destination slots. - | ld CARG1, 0(RC) - | sltu AT, RC, TMP3 - | daddiu RC, RC, 8 - | movz CARG1, TISNIL, AT - | sd CARG1, 0(RA) - | sltu AT, RA, TMP2 - | bnez AT, <1 - |. daddiu RA, RA, 8 - |3: - | ins_next - | - |5: // Copy all varargs. - | ld TMP0, L->maxstack - | blez TMP1, <3 // No vararg slots? - |. li MULTRES, 8 // MULTRES = (0+1)*8 - | daddu TMP2, RA, TMP1 - | sltu AT, TMP0, TMP2 - | bnez AT, >7 - |. daddiu MULTRES, TMP1, 8 - |6: - | ld CRET1, 0(RC) - | daddiu RC, RC, 8 - | sd CRET1, 0(RA) - | sltu AT, RC, TMP3 - | bnez AT, <6 // More vararg slots? - |. daddiu RA, RA, 8 - | b <3 - |. nop - | - |7: // Grow stack for varargs. - | load_got lj_state_growstack - | sd RA, L->top - | dsubu RA, RA, BASE - | sd BASE, L->base - | dsubu BASE, RC, BASE // Need delta, because BASE may change. - | sd PC, SAVE_PC - | srl CARG2, TMP1, 3 - | call_intern lj_state_growstack // (lua_State *L, int n) - |. move CARG1, L - | move RC, BASE - | ld BASE, L->base - | daddu RA, BASE, RA - | daddu RC, BASE, RC - | b <6 - |. daddiu TMP3, BASE, -16 - break; - - /* -- Returns ----------------------------------------------------------- */ - - case BC_RETM: - | // RA = results*8, RD = extra_nresults*8 - | addu RD, RD, MULTRES // MULTRES >= 8, so RD >= 8. - | // Fall through. Assumes BC_RET follows. - break; - - case BC_RET: - | // RA = results*8, RD = (nresults+1)*8 - | ld PC, FRAME_PC(BASE) - | daddu RA, BASE, RA - | move MULTRES, RD - |1: - | andi TMP0, PC, FRAME_TYPE - | bnez TMP0, ->BC_RETV_Z - |. xori TMP1, PC, FRAME_VARG - | - |->BC_RET_Z: - | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return - | lw INS, -4(PC) - | daddiu TMP2, BASE, -16 - | daddiu RC, RD, -8 - | decode_RA8a TMP0, INS - | decode_RB8a RB, INS - | decode_RA8b TMP0 - | decode_RB8b RB - | daddu TMP3, TMP2, RB - | beqz RC, >3 - |. dsubu BASE, TMP2, TMP0 - |2: - | ld CRET1, 0(RA) - | daddiu RA, RA, 8 - | daddiu RC, RC, -8 - | sd CRET1, 0(TMP2) - | bnez RC, <2 - |. daddiu TMP2, TMP2, 8 - |3: - | daddiu TMP3, TMP3, -8 - |5: - | sltu AT, TMP2, TMP3 - | bnez AT, >6 - |. ld LFUNC:TMP1, FRAME_FUNC(BASE) - | ins_next1 - | cleartp LFUNC:TMP1 - | ld TMP1, LFUNC:TMP1->pc - | ld KBASE, PC2PROTO(k)(TMP1) - | ins_next2 - | - |6: // Fill up results with nil. - | sd TISNIL, 0(TMP2) - | b <5 - |. daddiu TMP2, TMP2, 8 - | - |->BC_RETV_Z: // Non-standard return case. - | andi TMP2, TMP1, FRAME_TYPEP - | bnez TMP2, ->vm_return - |. nop - | // Return from vararg function: relocate BASE down. - | dsubu BASE, BASE, TMP1 - | b <1 - |. ld PC, FRAME_PC(BASE) - break; - - case BC_RET0: case BC_RET1: - | // RA = results*8, RD = (nresults+1)*8 - | ld PC, FRAME_PC(BASE) - | daddu RA, BASE, RA - | move MULTRES, RD - | andi TMP0, PC, FRAME_TYPE - | bnez TMP0, ->BC_RETV_Z - |. xori TMP1, PC, FRAME_VARG - | lw INS, -4(PC) - | daddiu TMP2, BASE, -16 - if (op == BC_RET1) { - | ld CRET1, 0(RA) - } - | decode_RB8a RB, INS - | decode_RA8a RA, INS - | decode_RB8b RB - | decode_RA8b RA - | dsubu BASE, TMP2, RA - if (op == BC_RET1) { - | sd CRET1, 0(TMP2) - } - |5: - | sltu AT, RD, RB - | bnez AT, >6 - |. ld TMP1, FRAME_FUNC(BASE) - | ins_next1 - | cleartp LFUNC:TMP1 - | ld TMP1, LFUNC:TMP1->pc - | ld KBASE, PC2PROTO(k)(TMP1) - | ins_next2 - | - |6: // Fill up results with nil. - | daddiu TMP2, TMP2, 8 - | daddiu RD, RD, 8 - | b <5 - if (op == BC_RET1) { - |. sd TISNIL, 0(TMP2) - } else { - |. sd TISNIL, -8(TMP2) - } - break; - - /* -- Loops and branches ------------------------------------------------ */ - - case BC_FORL: - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_IFORL follows. - break; - - case BC_JFORI: - case BC_JFORL: -#if !LJ_HASJIT - break; -#endif - case BC_FORI: - case BC_IFORL: - | // RA = base*8, RD = target (after end of loop or start of loop) - vk = (op == BC_IFORL || op == BC_JFORL); - | daddu RA, BASE, RA - | ld CARG1, FORL_IDX*8(RA) // IDX CARG1 - CARG3 type - | gettp CARG3, CARG1 - if (op != BC_JFORL) { - | srl RD, RD, 1 - | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535) - | daddu TMP2, RD, TMP2 - } - if (!vk) { - | ld CARG2, FORL_STOP*8(RA) // STOP CARG2 - CARG4 type - | ld CRET1, FORL_STEP*8(RA) // STEP CRET1 - CRET2 type - | gettp CARG4, CARG2 - | bne CARG3, TISNUM, >5 - |. gettp CRET2, CRET1 - | bne CARG4, TISNUM, ->vmeta_for - |. sextw CARG3, CARG1 - | bne CRET2, TISNUM, ->vmeta_for - |. sextw CARG2, CARG2 - | dext AT, CRET1, 31, 0 - | slt CRET1, CARG2, CARG3 - | slt TMP1, CARG3, CARG2 - | movn CRET1, TMP1, AT - } else { - | bne CARG3, TISNUM, >5 - |. ld CARG2, FORL_STEP*8(RA) // STEP CARG2 - CARG4 type - | ld CRET1, FORL_STOP*8(RA) // STOP CRET1 - CRET2 type - | sextw TMP3, CARG1 - | sextw CARG2, CARG2 - | sextw CRET1, CRET1 - | addu CARG1, TMP3, CARG2 - | xor TMP0, CARG1, TMP3 - | xor TMP1, CARG1, CARG2 - | and TMP0, TMP0, TMP1 - | slt TMP1, CARG1, CRET1 - | slt CRET1, CRET1, CARG1 - | slt AT, CARG2, r0 - | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow. - | movn CRET1, TMP1, AT - | or CRET1, CRET1, TMP0 - | zextw CARG1, CARG1 - | settp CARG1, TISNUM - } - |1: - if (op == BC_FORI) { - | movz TMP2, r0, CRET1 - | daddu PC, PC, TMP2 - } else if (op == BC_JFORI) { - | daddu PC, PC, TMP2 - | lhu RD, -4+OFS_RD(PC) - } else if (op == BC_IFORL) { - | movn TMP2, r0, CRET1 - | daddu PC, PC, TMP2 - } - if (vk) { - | sd CARG1, FORL_IDX*8(RA) - } - | ins_next1 - | sd CARG1, FORL_EXT*8(RA) - |2: - if (op == BC_JFORI) { - | beqz CRET1, =>BC_JLOOP - |. decode_RD8b RD - } else if (op == BC_JFORL) { - | beqz CRET1, =>BC_JLOOP - } - | ins_next2 - | - |5: // FP loop. - |.if FPU - if (!vk) { - | ldc1 f0, FORL_IDX*8(RA) - | ldc1 f2, FORL_STOP*8(RA) - | sltiu TMP0, CARG3, LJ_TISNUM - | sltiu TMP1, CARG4, LJ_TISNUM - | sltiu AT, CRET2, LJ_TISNUM - | ld TMP3, FORL_STEP*8(RA) - | and TMP0, TMP0, TMP1 - | and AT, AT, TMP0 - | beqz AT, ->vmeta_for - |. slt TMP3, TMP3, r0 - | c.ole.d 0, f0, f2 - | c.ole.d 1, f2, f0 - | li CRET1, 1 - | movt CRET1, r0, 0 - | movt AT, r0, 1 - | b <1 - |. movn CRET1, AT, TMP3 - } else { - | ldc1 f0, FORL_IDX*8(RA) - | ldc1 f4, FORL_STEP*8(RA) - | ldc1 f2, FORL_STOP*8(RA) - | ld TMP3, FORL_STEP*8(RA) - | add.d f0, f0, f4 - | c.ole.d 0, f0, f2 - | c.ole.d 1, f2, f0 - | slt TMP3, TMP3, r0 - | li CRET1, 1 - | li AT, 1 - | movt CRET1, r0, 0 - | movt AT, r0, 1 - | movn CRET1, AT, TMP3 - if (op == BC_IFORL) { - | movn TMP2, r0, CRET1 - | daddu PC, PC, TMP2 - } - | sdc1 f0, FORL_IDX*8(RA) - | ins_next1 - | b <2 - |. sdc1 f0, FORL_EXT*8(RA) - } - |.else - if (!vk) { - | sltiu TMP0, CARG3, LJ_TISNUM - | sltiu TMP1, CARG4, LJ_TISNUM - | sltiu AT, CRET2, LJ_TISNUM - | and TMP0, TMP0, TMP1 - | and AT, AT, TMP0 - | beqz AT, ->vmeta_for - |. nop - | bal ->vm_sfcmpolex - |. lw TMP3, FORL_STEP*8+HI(RA) - | b <1 - |. nop - } else { - | load_got __adddf3 - | call_extern - |. sw TMP2, TMPD - | ld CARG2, FORL_STOP*8(RA) - | move CARG1, CRET1 - if ( op == BC_JFORL ) { - | lhu RD, -4+OFS_RD(PC) - | decode_RD8b RD - } - | bal ->vm_sfcmpolex - |. lw TMP3, FORL_STEP*8+HI(RA) - | b <1 - |. lw TMP2, TMPD - } - |.endif - break; - - case BC_ITERL: - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_IITERL follows. - break; - - case BC_JITERL: -#if !LJ_HASJIT - break; -#endif - case BC_IITERL: - | // RA = base*8, RD = target - | daddu RA, BASE, RA - | ld TMP1, 0(RA) - | beq TMP1, TISNIL, >1 // Stop if iterator returned nil. - |. nop - if (op == BC_JITERL) { - | b =>BC_JLOOP - |. sd TMP1, -8(RA) - } else { - | branch_RD // Otherwise save control var + branch. - | sd TMP1, -8(RA) - } - |1: - | ins_next - break; - - case BC_LOOP: - | // RA = base*8, RD = target (loop extent) - | // Note: RA/RD is only used by trace recorder to determine scope/extent - | // This opcode does NOT jump, it's only purpose is to detect a hot loop. - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_ILOOP follows. - break; - - case BC_ILOOP: - | // RA = base*8, RD = target (loop extent) - | ins_next - break; - - case BC_JLOOP: - |.if JIT - | // RA = base*8 (ignored), RD = traceno*8 - | ld TMP1, DISPATCH_J(trace)(DISPATCH) - | li AT, 0 - | daddu TMP1, TMP1, RD - | // Traces on MIPS don't store the trace number, so use 0. - | sd AT, DISPATCH_GL(vmstate)(DISPATCH) - | ld TRACE:TMP2, 0(TMP1) - | sd BASE, DISPATCH_GL(jit_base)(DISPATCH) - | ld TMP2, TRACE:TMP2->mcode - | sd L, DISPATCH_GL(tmpbuf.L)(DISPATCH) - | jr TMP2 - |. daddiu JGL, DISPATCH, GG_DISP2G+32768 - |.endif - break; - - case BC_JMP: - | // RA = base*8 (only used by trace recorder), RD = target - | branch_RD - | ins_next - break; - - /* -- Function headers -------------------------------------------------- */ - - case BC_FUNCF: - |.if JIT - | hotcall - |.endif - case BC_FUNCV: /* NYI: compiled vararg functions. */ - | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. - break; - - case BC_JFUNCF: -#if !LJ_HASJIT - break; -#endif - case BC_IFUNCF: - | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 - | ld TMP2, L->maxstack - | lbu TMP1, -4+PC2PROTO(numparams)(PC) - | ld KBASE, -4+PC2PROTO(k)(PC) - | sltu AT, TMP2, RA - | bnez AT, ->vm_growstack_l - |. sll TMP1, TMP1, 3 - if (op != BC_JFUNCF) { - | ins_next1 - } - |2: - | sltu AT, NARGS8:RC, TMP1 // Check for missing parameters. - | bnez AT, >3 - |. daddu AT, BASE, NARGS8:RC - if (op == BC_JFUNCF) { - | decode_RD8a RD, INS - | b =>BC_JLOOP - |. decode_RD8b RD - } else { - | ins_next2 - } - | - |3: // Clear missing parameters. - | sd TISNIL, 0(AT) - | b <2 - |. addiu NARGS8:RC, NARGS8:RC, 8 - break; - - case BC_JFUNCV: -#if !LJ_HASJIT - break; -#endif - | NYI // NYI: compiled vararg functions - break; /* NYI: compiled vararg functions. */ - - case BC_IFUNCV: - | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 - | li TMP0, LJ_TFUNC - | daddu TMP1, BASE, RC - | ld TMP2, L->maxstack - | settp LFUNC:RB, TMP0 - | daddu TMP0, RA, RC - | sd LFUNC:RB, 0(TMP1) // Store (tagged) copy of LFUNC. - | daddiu TMP3, RC, 16+FRAME_VARG - | sltu AT, TMP0, TMP2 - | ld KBASE, -4+PC2PROTO(k)(PC) - | beqz AT, ->vm_growstack_l - |. sd TMP3, 8(TMP1) // Store delta + FRAME_VARG. - | lbu TMP2, -4+PC2PROTO(numparams)(PC) - | move RA, BASE - | move RC, TMP1 - | ins_next1 - | beqz TMP2, >3 - |. daddiu BASE, TMP1, 16 - |1: - | ld TMP0, 0(RA) - | sltu AT, RA, RC // Less args than parameters? - | move CARG1, TMP0 - | movz TMP0, TISNIL, AT // Clear missing parameters. - | movn CARG1, TISNIL, AT // Clear old fixarg slot (help the GC). - | addiu TMP2, TMP2, -1 - | sd TMP0, 16(TMP1) - | daddiu TMP1, TMP1, 8 - | sd CARG1, 0(RA) - | bnez TMP2, <1 - |. daddiu RA, RA, 8 - |3: - | ins_next2 - break; - - case BC_FUNCC: - case BC_FUNCCW: - | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8 - if (op == BC_FUNCC) { - | ld CFUNCADDR, CFUNC:RB->f - } else { - | ld CFUNCADDR, DISPATCH_GL(wrapf)(DISPATCH) - } - | daddu TMP1, RA, NARGS8:RC - | ld TMP2, L->maxstack - | daddu RC, BASE, NARGS8:RC - | sd BASE, L->base - | sltu AT, TMP2, TMP1 - | sd RC, L->top - | li_vmstate C - if (op == BC_FUNCCW) { - | ld CARG2, CFUNC:RB->f - } - | bnez AT, ->vm_growstack_c // Need to grow stack. - |. move CARG1, L - | jalr CFUNCADDR // (lua_State *L [, lua_CFunction f]) - |. st_vmstate - | // Returns nresults. - | ld BASE, L->base - | sll RD, CRET1, 3 - | ld TMP1, L->top - | li_vmstate INTERP - | ld PC, FRAME_PC(BASE) // Fetch PC of caller. - | dsubu RA, TMP1, RD // RA = L->top - nresults*8 - | sd L, DISPATCH_GL(cur_L)(DISPATCH) - | b ->vm_returnc - |. st_vmstate - break; - - /* ---------------------------------------------------------------------- */ - - default: - fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); - exit(2); - break; - } -} - -static int build_backend(BuildCtx *ctx) -{ - int op; - - dasm_growpc(Dst, BC__MAX); - - build_subroutines(ctx); - - |.code_op - for (op = 0; op < BC__MAX; op++) - build_ins(ctx, (BCOp)op, op); - - return BC__MAX; -} - -/* Emit pseudo frame-info for all assembler functions. */ -static void emit_asm_debug(BuildCtx *ctx) -{ - int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); - int i; - switch (ctx->mode) { - case BUILD_elfasm: - fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); - fprintf(ctx->fp, - ".Lframe0:\n" - "\t.4byte .LECIE0-.LSCIE0\n" - ".LSCIE0:\n" - "\t.4byte 0xffffffff\n" - "\t.byte 0x1\n" - "\t.string \"\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -4\n" - "\t.byte 31\n" - "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 0\n" - "\t.align 2\n" - ".LECIE0:\n\n"); - fprintf(ctx->fp, - ".LSFDE0:\n" - "\t.4byte .LEFDE0-.LASFDE0\n" - ".LASFDE0:\n" - "\t.4byte .Lframe0\n" - "\t.8byte .Lbegin\n" - "\t.8byte %d\n" - "\t.byte 0xe\n\t.uleb128 %d\n" - "\t.byte 0x9f\n\t.sleb128 2*5\n" - "\t.byte 0x9e\n\t.sleb128 2*6\n", - fcofs, CFRAME_SIZE); - for (i = 23; i >= 16; i--) - fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(30-i)); -#if !LJ_SOFTFP - for (i = 31; i >= 24; i--) - fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(46-i)); -#endif - fprintf(ctx->fp, - "\t.align 2\n" - ".LEFDE0:\n\n"); -#if LJ_HASFFI - fprintf(ctx->fp, - ".LSFDE1:\n" - "\t.4byte .LEFDE1-.LASFDE1\n" - ".LASFDE1:\n" - "\t.4byte .Lframe0\n" - "\t.4byte lj_vm_ffi_call\n" - "\t.4byte %d\n" - "\t.byte 0x9f\n\t.uleb128 2*1\n" - "\t.byte 0x90\n\t.uleb128 2*2\n" - "\t.byte 0xd\n\t.uleb128 0x10\n" - "\t.align 2\n" - ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); -#endif -#if !LJ_NO_UNWIND - /* NYI */ -#endif - break; - default: - break; - } -} - diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc deleted file mode 100644 index b4260ebc10..0000000000 --- a/src/vm_ppc.dasc +++ /dev/null @@ -1,5248 +0,0 @@ -|// Low-level VM code for PowerPC 32 bit or 32on64 bit mode. -|// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -| -|.arch ppc -|.section code_op, code_sub -| -|.actionlist build_actionlist -|.globals GLOB_ -|.globalnames globnames -|.externnames extnames -| -|// Note: The ragged indentation of the instructions is intentional. -|// The starting columns indicate data dependencies. -| -|//----------------------------------------------------------------------- -| -|// DynASM defines used by the PPC port: -|// -|// P64 64 bit pointers (only for GPR64 testing). -|// Note: see vm_ppc64.dasc for a full PPC64 _LP64 port. -|// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3). -|// Affects reg saves, stack layout, carry/overflow/dot flags etc. -|// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360). -|// TOC Need table of contents (64 bit or 32 bit variant, e.g. PS3). -|// Function pointers are really a struct: code, TOC, env (optional). -|// TOCENV Function pointers have an environment pointer, too (not on PS3). -|// PPE Power Processor Element of Cell (PS3) or Xenon (Xbox 360). -|// Must avoid (slow) micro-coded instructions. -| -|.if P64 -|.define TOC, 1 -|.define TOCENV, 1 -|.macro lpx, a, b, c; ldx a, b, c; .endmacro -|.macro lp, a, b; ld a, b; .endmacro -|.macro stp, a, b; std a, b; .endmacro -|.define decode_OPP, decode_OP8 -|.if FFI -|// Missing: Calling conventions, 64 bit regs, TOC. -|.error lib_ffi not yet implemented for PPC64 -|.endif -|.else -|.macro lpx, a, b, c; lwzx a, b, c; .endmacro -|.macro lp, a, b; lwz a, b; .endmacro -|.macro stp, a, b; stw a, b; .endmacro -|.define decode_OPP, decode_OP4 -|.endif -| -|// Convenience macros for TOC handling. -|.if TOC -|// Linker needs a TOC patch area for every external call relocation. -|.macro blex, target; bl extern target@plt; nop; .endmacro -|.macro .toc, a, b; a, b; .endmacro -|.if P64 -|.define TOC_OFS, 8 -|.define ENV_OFS, 16 -|.else -|.define TOC_OFS, 4 -|.define ENV_OFS, 8 -|.endif -|.else // No TOC. -|.macro blex, target; bl extern target@plt; .endmacro -|.macro .toc, a, b; .endmacro -|.endif -|.macro .tocenv, a, b; .if TOCENV; a, b; .endif; .endmacro -| -|.macro .gpr64, a, b; .if GPR64; a, b; .endif; .endmacro -| -|.macro andix., y, a, i -|.if PPE -| rlwinm y, a, 0, 31-lj_fls(i), 31-lj_ffs(i) -| cmpwi y, 0 -|.else -| andi. y, a, i -|.endif -|.endmacro -| -|.macro clrso, reg -|.if PPE -| li reg, 0 -| mtxer reg -|.else -| mcrxr cr0 -|.endif -|.endmacro -| -|.macro checkov, reg, noov -|.if PPE -| mfxer reg -| add reg, reg, reg -| cmpwi reg, 0 -| li reg, 0 -| mtxer reg -| bgey noov -|.else -| mcrxr cr0 -| bley noov -|.endif -|.endmacro -| -|//----------------------------------------------------------------------- -| -|// Fixed register assignments for the interpreter. -|// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA) -| -|// The following must be C callee-save (but BASE is often refetched). -|.define BASE, r14 // Base of current Lua stack frame. -|.define KBASE, r15 // Constants of current Lua function. -|.define PC, r16 // Next PC. -|.define DISPATCH, r17 // Opcode dispatch table. -|.define LREG, r18 // Register holding lua_State (also in SAVE_L). -|.define MULTRES, r19 // Size of multi-result: (nresults+1)*8. -|.define JGL, r31 // On-trace: global_State + 32768. -| -|// Constants for type-comparisons, stores and conversions. C callee-save. -|.define TISNUM, r22 -|.define TISNIL, r23 -|.define ZERO, r24 -|.define TOBIT, f30 // 2^52 + 2^51. -|.define TONUM, f31 // 2^52 + 2^51 + 2^31. -| -|// The following temporaries are not saved across C calls, except for RA. -|.define RA, r20 // Callee-save. -|.define RB, r10 -|.define RC, r11 -|.define RD, r12 -|.define INS, r7 // Overlaps CARG5. -| -|.define TMP0, r0 -|.define TMP1, r8 -|.define TMP2, r9 -|.define TMP3, r6 // Overlaps CARG4. -| -|// Saved temporaries. -|.define SAVE0, r21 -| -|// Calling conventions. -|.define CARG1, r3 -|.define CARG2, r4 -|.define CARG3, r5 -|.define CARG4, r6 // Overlaps TMP3. -|.define CARG5, r7 // Overlaps INS. -| -|.define FARG1, f1 -|.define FARG2, f2 -| -|.define CRET1, r3 -|.define CRET2, r4 -| -|.define TOCREG, r2 // TOC register (only used by C code). -|.define ENVREG, r11 // Environment pointer (nested C functions). -| -|// Stack layout while in interpreter. Must match with lj_frame.h. -|.if GPR64 -|.if FRAME32 -| -|// 456(sp) // \ 32/64 bit C frame info -|.define TONUM_LO, 452(sp) // | -|.define TONUM_HI, 448(sp) // | -|.define TMPD_LO, 444(sp) // | -|.define TMPD_HI, 440(sp) // | -|.define SAVE_CR, 432(sp) // | 64 bit CR save. -|.define SAVE_ERRF, 424(sp) // > Parameter save area. -|.define SAVE_NRES, 420(sp) // | -|.define SAVE_L, 416(sp) // | -|.define SAVE_PC, 412(sp) // | -|.define SAVE_MULTRES, 408(sp) // | -|.define SAVE_CFRAME, 400(sp) // / 64 bit C frame chain. -|// 392(sp) // Reserved. -|.define CFRAME_SPACE, 384 // Delta for sp. -|// Back chain for sp: 384(sp) <-- sp entering interpreter -|.define SAVE_LR, 376(sp) // 32 bit LR stored in hi-part. -|.define SAVE_GPR_, 232 // .. 232+18*8: 64 bit GPR saves. -|.define SAVE_FPR_, 88 // .. 88+18*8: 64 bit FPR saves. -|// 80(sp) // Needed for 16 byte stack frame alignment. -|// 16(sp) // Callee parameter save area (ABI mandated). -|// 8(sp) // Reserved -|// Back chain for sp: 0(sp) <-- sp while in interpreter -|// 32 bit sp stored in hi-part of 0(sp). -| -|.define TMPD_BLO, 447(sp) -|.define TMPD, TMPD_HI -|.define TONUM_D, TONUM_HI -| -|.else -| -|// 508(sp) // \ 32 bit C frame info. -|.define SAVE_ERRF, 472(sp) // | -|.define SAVE_NRES, 468(sp) // | -|.define SAVE_L, 464(sp) // > Parameter save area. -|.define SAVE_PC, 460(sp) // | -|.define SAVE_MULTRES, 456(sp) // | -|.define SAVE_CFRAME, 448(sp) // / 64 bit C frame chain. -|.define SAVE_LR, 416(sp) -|.define CFRAME_SPACE, 400 // Delta for sp. -|// Back chain for sp: 400(sp) <-- sp entering interpreter -|.define SAVE_FPR_, 256 // .. 256+18*8: 64 bit FPR saves. -|.define SAVE_GPR_, 112 // .. 112+18*8: 64 bit GPR saves. -|// 48(sp) // Callee parameter save area (ABI mandated). -|.define SAVE_TOC, 40(sp) // TOC save area. -|.define TMPD_LO, 36(sp) // \ Link editor temp (ABI mandated). -|.define TMPD_HI, 32(sp) // / -|.define TONUM_LO, 28(sp) // \ Compiler temp (ABI mandated). -|.define TONUM_HI, 24(sp) // / -|// Next frame lr: 16(sp) -|.define SAVE_CR, 8(sp) // 64 bit CR save. -|// Back chain for sp: 0(sp) <-- sp while in interpreter -| -|.define TMPD_BLO, 39(sp) -|.define TMPD, TMPD_HI -|.define TONUM_D, TONUM_HI -| -|.endif -|.else -| -|.define SAVE_LR, 276(sp) -|.define CFRAME_SPACE, 272 // Delta for sp. -|// Back chain for sp: 272(sp) <-- sp entering interpreter -|.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves. -|.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves. -|.define SAVE_CR, 52(sp) // 32 bit CR save. -|.define SAVE_ERRF, 48(sp) // 32 bit C frame info. -|.define SAVE_NRES, 44(sp) -|.define SAVE_CFRAME, 40(sp) -|.define SAVE_L, 36(sp) -|.define SAVE_PC, 32(sp) -|.define SAVE_MULTRES, 28(sp) -|.define UNUSED1, 24(sp) -|.define TMPD_LO, 20(sp) -|.define TMPD_HI, 16(sp) -|.define TONUM_LO, 12(sp) -|.define TONUM_HI, 8(sp) -|// Next frame lr: 4(sp) -|// Back chain for sp: 0(sp) <-- sp while in interpreter -| -|.define TMPD_BLO, 23(sp) -|.define TMPD, TMPD_HI -|.define TONUM_D, TONUM_HI -| -|.endif -| -|.macro save_, reg -|.if GPR64 -| std r..reg, SAVE_GPR_+(reg-14)*8(sp) -|.else -| stw r..reg, SAVE_GPR_+(reg-14)*4(sp) -|.endif -| stfd f..reg, SAVE_FPR_+(reg-14)*8(sp) -|.endmacro -|.macro rest_, reg -|.if GPR64 -| ld r..reg, SAVE_GPR_+(reg-14)*8(sp) -|.else -| lwz r..reg, SAVE_GPR_+(reg-14)*4(sp) -|.endif -| lfd f..reg, SAVE_FPR_+(reg-14)*8(sp) -|.endmacro -| -|.macro saveregs -|.if GPR64 and not FRAME32 -| stdu sp, -CFRAME_SPACE(sp) -|.else -| stwu sp, -CFRAME_SPACE(sp) -|.endif -| save_ 14; save_ 15; save_ 16 -| mflr r0 -| save_ 17; save_ 18; save_ 19; save_ 20; save_ 21; save_ 22 -|.if GPR64 and not FRAME32 -| std r0, SAVE_LR -|.else -| stw r0, SAVE_LR -|.endif -| save_ 23; save_ 24; save_ 25 -| mfcr r0 -| save_ 26; save_ 27; save_ 28; save_ 29; save_ 30; save_ 31 -|.if GPR64 -| std r0, SAVE_CR -|.else -| stw r0, SAVE_CR -|.endif -| .toc std TOCREG, SAVE_TOC -|.endmacro -| -|.macro restoreregs -|.if GPR64 and not FRAME32 -| ld r0, SAVE_LR -|.else -| lwz r0, SAVE_LR -|.endif -|.if GPR64 -| ld r12, SAVE_CR -|.else -| lwz r12, SAVE_CR -|.endif -| rest_ 14; rest_ 15; rest_ 16; rest_ 17; rest_ 18; rest_ 19 -| mtlr r0; -|.if PPE; mtocrf 0x20, r12; .else; mtcrf 0x38, r12; .endif -| rest_ 20; rest_ 21; rest_ 22; rest_ 23; rest_ 24; rest_ 25 -|.if PPE; mtocrf 0x10, r12; .endif -| rest_ 26; rest_ 27; rest_ 28; rest_ 29; rest_ 30; rest_ 31 -|.if PPE; mtocrf 0x08, r12; .endif -| addi sp, sp, CFRAME_SPACE -|.endmacro -| -|// Type definitions. Some of these are only used for documentation. -|.type L, lua_State, LREG -|.type GL, global_State -|.type TVALUE, TValue -|.type GCOBJ, GCobj -|.type STR, GCstr -|.type TAB, GCtab -|.type LFUNC, GCfuncL -|.type CFUNC, GCfuncC -|.type PROTO, GCproto -|.type UPVAL, GCupval -|.type NODE, Node -|.type NARGS8, int -|.type TRACE, GCtrace -|.type SBUF, SBuf -| -|//----------------------------------------------------------------------- -| -|// Trap for not-yet-implemented parts. -|.macro NYI; tw 4, sp, sp; .endmacro -| -|// int/FP conversions. -|.macro tonum_i, freg, reg -| xoris reg, reg, 0x8000 -| stw reg, TONUM_LO -| lfd freg, TONUM_D -| fsub freg, freg, TONUM -|.endmacro -| -|.macro tonum_u, freg, reg -| stw reg, TONUM_LO -| lfd freg, TONUM_D -| fsub freg, freg, TOBIT -|.endmacro -| -|.macro toint, reg, freg, tmpfreg -| fctiwz tmpfreg, freg -| stfd tmpfreg, TMPD -| lwz reg, TMPD_LO -|.endmacro -| -|.macro toint, reg, freg -| toint reg, freg, freg -|.endmacro -| -|//----------------------------------------------------------------------- -| -|// Access to frame relative to BASE. -|.define FRAME_PC, -8 -|.define FRAME_FUNC, -4 -| -|// Instruction decode. -|.macro decode_OP4, dst, ins; rlwinm dst, ins, 2, 22, 29; .endmacro -|.macro decode_OP8, dst, ins; rlwinm dst, ins, 3, 21, 28; .endmacro -|.macro decode_RA8, dst, ins; rlwinm dst, ins, 27, 21, 28; .endmacro -|.macro decode_RB8, dst, ins; rlwinm dst, ins, 11, 21, 28; .endmacro -|.macro decode_RC8, dst, ins; rlwinm dst, ins, 19, 21, 28; .endmacro -|.macro decode_RD8, dst, ins; rlwinm dst, ins, 19, 13, 28; .endmacro -| -|.macro decode_OP1, dst, ins; rlwinm dst, ins, 0, 24, 31; .endmacro -|.macro decode_RD4, dst, ins; rlwinm dst, ins, 18, 14, 29; .endmacro -| -|// Instruction fetch. -|.macro ins_NEXT1 -| lwz INS, 0(PC) -| addi PC, PC, 4 -|.endmacro -|// Instruction decode+dispatch. Note: optimized for e300! -|.macro ins_NEXT2 -| decode_OPP TMP1, INS -| lpx TMP0, DISPATCH, TMP1 -| mtctr TMP0 -| decode_RB8 RB, INS -| decode_RD8 RD, INS -| decode_RA8 RA, INS -| decode_RC8 RC, INS -| bctr -|.endmacro -|.macro ins_NEXT -| ins_NEXT1 -| ins_NEXT2 -|.endmacro -| -|// Instruction footer. -|.if 1 -| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. -| .define ins_next, ins_NEXT -| .define ins_next_, ins_NEXT -| .define ins_next1, ins_NEXT1 -| .define ins_next2, ins_NEXT2 -|.else -| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. -| // Affects only certain kinds of benchmarks (and only with -j off). -| .macro ins_next -| b ->ins_next -| .endmacro -| .macro ins_next1 -| .endmacro -| .macro ins_next2 -| b ->ins_next -| .endmacro -| .macro ins_next_ -| ->ins_next: -| ins_NEXT -| .endmacro -|.endif -| -|// Call decode and dispatch. -|.macro ins_callt -| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC -| lwz PC, LFUNC:RB->pc -| lwz INS, 0(PC) -| addi PC, PC, 4 -| decode_OPP TMP1, INS -| decode_RA8 RA, INS -| lpx TMP0, DISPATCH, TMP1 -| add RA, RA, BASE -| mtctr TMP0 -| bctr -|.endmacro -| -|.macro ins_call -| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC -| stw PC, FRAME_PC(BASE) -| ins_callt -|.endmacro -| -|//----------------------------------------------------------------------- -| -|// Macros to test operand types. -|.macro checknum, reg; cmplw reg, TISNUM; .endmacro -|.macro checknum, cr, reg; cmplw cr, reg, TISNUM; .endmacro -|.macro checkstr, reg; cmpwi reg, LJ_TSTR; .endmacro -|.macro checktab, reg; cmpwi reg, LJ_TTAB; .endmacro -|.macro checkfunc, reg; cmpwi reg, LJ_TFUNC; .endmacro -|.macro checknil, reg; cmpwi reg, LJ_TNIL; .endmacro -| -|.macro branch_RD -| srwi TMP0, RD, 1 -| addis PC, PC, -(BCBIAS_J*4 >> 16) -| add PC, PC, TMP0 -|.endmacro -| -|// Assumes DISPATCH is relative to GL. -#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) -#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) -| -#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) -| -|.macro hotcheck, delta, target -| rlwinm TMP1, PC, 31, 25, 30 -| addi TMP1, TMP1, GG_DISP2HOT -| lhzx TMP2, DISPATCH, TMP1 -| addic. TMP2, TMP2, -delta -| sthx TMP2, DISPATCH, TMP1 -| blt target -|.endmacro -| -|.macro hotloop -| hotcheck HOTCOUNT_LOOP, ->vm_hotloop -|.endmacro -| -|.macro hotcall -| hotcheck HOTCOUNT_CALL, ->vm_hotcall -|.endmacro -| -|// Set current VM state. Uses TMP0. -|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro -|.macro st_vmstate; stw TMP0, DISPATCH_GL(vmstate)(DISPATCH); .endmacro -| -|// Move table write barrier back. Overwrites mark and tmp. -|.macro barrierback, tab, mark, tmp -| lwz tmp, DISPATCH_GL(gc.grayagain)(DISPATCH) -| // Assumes LJ_GC_BLACK is 0x04. -| rlwinm mark, mark, 0, 30, 28 // black2gray(tab) -| stw tab, DISPATCH_GL(gc.grayagain)(DISPATCH) -| stb mark, tab->marked -| stw tmp, tab->gclist -|.endmacro -| -|//----------------------------------------------------------------------- - -/* Generate subroutines used by opcodes and other parts of the VM. */ -/* The .code_sub section should be last to help static branch prediction. */ -static void build_subroutines(BuildCtx *ctx) -{ - |.code_sub - | - |//----------------------------------------------------------------------- - |//-- Return handling ---------------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_returnp: - | // See vm_return. Also: TMP2 = previous base. - | andix. TMP0, PC, FRAME_P - | li TMP1, LJ_TTRUE - | beq ->cont_dispatch - | - | // Return from pcall or xpcall fast func. - | lwz PC, FRAME_PC(TMP2) // Fetch PC of previous frame. - | mr BASE, TMP2 // Restore caller base. - | // Prepending may overwrite the pcall frame, so do it at the end. - | stwu TMP1, FRAME_PC(RA) // Prepend true to results. - | - |->vm_returnc: - | addi RD, RD, 8 // RD = (nresults+1)*8. - | andix. TMP0, PC, FRAME_TYPE - | cmpwi cr1, RD, 0 - | li CRET1, LUA_YIELD - | beq cr1, ->vm_unwind_c_eh - | mr MULTRES, RD - | beq ->BC_RET_Z // Handle regular return to Lua. - | - |->vm_return: - | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return - | // TMP0 = PC & FRAME_TYPE - | cmpwi TMP0, FRAME_C - | rlwinm TMP2, PC, 0, 0, 28 - | li_vmstate C - | sub TMP2, BASE, TMP2 // TMP2 = previous base. - | bney ->vm_returnp - | - | addic. TMP1, RD, -8 - | stp TMP2, L->base - | lwz TMP2, SAVE_NRES - | subi BASE, BASE, 8 - | st_vmstate - | slwi TMP2, TMP2, 3 - | beq >2 - |1: - | addic. TMP1, TMP1, -8 - | lfd f0, 0(RA) - | addi RA, RA, 8 - | stfd f0, 0(BASE) - | addi BASE, BASE, 8 - | bney <1 - | - |2: - | cmpw TMP2, RD // More/less results wanted? - | bne >6 - |3: - | stp BASE, L->top // Store new top. - | - |->vm_leave_cp: - | lp TMP0, SAVE_CFRAME // Restore previous C frame. - | li CRET1, 0 // Ok return status for vm_pcall. - | stp TMP0, L->cframe - | - |->vm_leave_unw: - | restoreregs - | blr - | - |6: - | ble >7 // Less results wanted? - | // More results wanted. Check stack size and fill up results with nil. - | lwz TMP1, L->maxstack - | cmplw BASE, TMP1 - | bge >8 - | stw TISNIL, 0(BASE) - | addi RD, RD, 8 - | addi BASE, BASE, 8 - | b <2 - | - |7: // Less results wanted. - | subfic TMP3, TMP2, 0 // LUA_MULTRET+1 case? - | sub TMP0, RD, TMP2 - | subfe TMP1, TMP1, TMP1 // TMP1 = TMP2 == 0 ? 0 : -1 - | and TMP0, TMP0, TMP1 - | sub BASE, BASE, TMP0 // Either keep top or shrink it. - | b <3 - | - |8: // Corner case: need to grow stack for filling up results. - | // This can happen if: - | // - A C function grows the stack (a lot). - | // - The GC shrinks the stack in between. - | // - A return back from a lua_call() with (high) nresults adjustment. - | stp BASE, L->top // Save current top held in BASE (yes). - | mr SAVE0, RD - | srwi CARG2, TMP2, 3 - | mr CARG1, L - | bl extern lj_state_growstack // (lua_State *L, int n) - | lwz TMP2, SAVE_NRES - | mr RD, SAVE0 - | slwi TMP2, TMP2, 3 - | lp BASE, L->top // Need the (realloced) L->top in BASE. - | b <2 - | - |->vm_unwind_c: // Unwind C stack, return from vm_pcall. - | // (void *cframe, int errcode) - | mr sp, CARG1 - | mr CRET1, CARG2 - |->vm_unwind_c_eh: // Landing pad for external unwinder. - | lwz L, SAVE_L - | .toc ld TOCREG, SAVE_TOC - | li TMP0, ~LJ_VMST_C - | lwz GL:TMP1, L->glref - | stw TMP0, GL:TMP1->vmstate - | b ->vm_leave_unw - | - |->vm_unwind_ff: // Unwind C stack, return from ff pcall. - | // (void *cframe) - |.if GPR64 - | rldicr sp, CARG1, 0, 61 - |.else - | rlwinm sp, CARG1, 0, 0, 29 - |.endif - |->vm_unwind_ff_eh: // Landing pad for external unwinder. - | lwz L, SAVE_L - | .toc ld TOCREG, SAVE_TOC - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | lp BASE, L->base - | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | lwz DISPATCH, L->glref // Setup pointer to dispatch table. - | li ZERO, 0 - | stw TMP3, TMPD - | li TMP1, LJ_TFALSE - | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). - | li TISNIL, LJ_TNIL - | li_vmstate INTERP - | lfs TOBIT, TMPD - | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame. - | la RA, -8(BASE) // Results start at BASE-8. - | stw TMP3, TMPD - | addi DISPATCH, DISPATCH, GG_G2DISP - | stw TMP1, 0(RA) // Prepend false to error message. - | li RD, 16 // 2 results: false + error message. - | st_vmstate - | lfs TONUM, TMPD - | b ->vm_returnc - | - |//----------------------------------------------------------------------- - |//-- Grow stack for calls ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_growstack_c: // Grow stack for C function. - | li CARG2, LUA_MINSTACK - | b >2 - | - |->vm_growstack_l: // Grow stack for Lua function. - | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC - | add RC, BASE, RC - | sub RA, RA, BASE - | stp BASE, L->base - | addi PC, PC, 4 // Must point after first instruction. - | stp RC, L->top - | srwi CARG2, RA, 3 - |2: - | // L->base = new base, L->top = top - | stw PC, SAVE_PC - | mr CARG1, L - | bl extern lj_state_growstack // (lua_State *L, int n) - | lp BASE, L->base - | lp RC, L->top - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | sub RC, RC, BASE - | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC - | ins_callt // Just retry the call. - | - |//----------------------------------------------------------------------- - |//-- Entry points into the assembler VM --------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_resume: // Setup C frame and resume thread. - | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) - | saveregs - | mr L, CARG1 - | lwz DISPATCH, L->glref // Setup pointer to dispatch table. - | mr BASE, CARG2 - | lbz TMP1, L->status - | stw L, SAVE_L - | li PC, FRAME_CP - | addi TMP0, sp, CFRAME_RESUME - | addi DISPATCH, DISPATCH, GG_G2DISP - | stw CARG3, SAVE_NRES - | cmplwi TMP1, 0 - | stw CARG3, SAVE_ERRF - | stp CARG3, SAVE_CFRAME - | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. - | stp TMP0, L->cframe - | beq >3 - | - | // Resume after yield (like a return). - | stw L, DISPATCH_GL(cur_L)(DISPATCH) - | mr RA, BASE - | lp BASE, L->base - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | lp TMP1, L->top - | lwz PC, FRAME_PC(BASE) - | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | stb CARG3, L->status - | stw TMP3, TMPD - | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). - | lfs TOBIT, TMPD - | sub RD, TMP1, BASE - | stw TMP3, TMPD - | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) - | addi RD, RD, 8 - | stw TMP0, TONUM_HI - | li_vmstate INTERP - | li ZERO, 0 - | st_vmstate - | andix. TMP0, PC, FRAME_TYPE - | mr MULTRES, RD - | lfs TONUM, TMPD - | li TISNIL, LJ_TNIL - | beq ->BC_RET_Z - | b ->vm_return - | - |->vm_pcall: // Setup protected C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) - | saveregs - | li PC, FRAME_CP - | stw CARG4, SAVE_ERRF - | b >1 - | - |->vm_call: // Setup C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1) - | saveregs - | li PC, FRAME_C - | - |1: // Entry point for vm_pcall above (PC = ftype). - | lp TMP1, L:CARG1->cframe - | mr L, CARG1 - | stw CARG3, SAVE_NRES - | lwz DISPATCH, L->glref // Setup pointer to dispatch table. - | stw CARG1, SAVE_L - | mr BASE, CARG2 - | addi DISPATCH, DISPATCH, GG_G2DISP - | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. - | stp TMP1, SAVE_CFRAME - | stp sp, L->cframe // Add our C frame to cframe chain. - | - |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). - | stw L, DISPATCH_GL(cur_L)(DISPATCH) - | lp TMP2, L->base // TMP2 = old base (used in vmeta_call). - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | lp TMP1, L->top - | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | add PC, PC, BASE - | stw TMP3, TMPD - | li ZERO, 0 - | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). - | lfs TOBIT, TMPD - | sub PC, PC, TMP2 // PC = frame delta + frame type - | stw TMP3, TMPD - | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) - | sub NARGS8:RC, TMP1, BASE - | stw TMP0, TONUM_HI - | li_vmstate INTERP - | lfs TONUM, TMPD - | li TISNIL, LJ_TNIL - | st_vmstate - | - |->vm_call_dispatch: - | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC - | lwz TMP0, FRAME_PC(BASE) - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | checkfunc TMP0; bne ->vmeta_call - | - |->vm_call_dispatch_f: - | ins_call - | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC - | - |->vm_cpcall: // Setup protected C frame, call C. - | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) - | saveregs - | mr L, CARG1 - | lwz TMP0, L:CARG1->stack - | stw CARG1, SAVE_L - | lp TMP1, L->top - | lwz DISPATCH, L->glref // Setup pointer to dispatch table. - | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. - | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). - | lp TMP1, L->cframe - | addi DISPATCH, DISPATCH, GG_G2DISP - | .toc lp CARG4, 0(CARG4) - | li TMP2, 0 - | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. - | stw TMP2, SAVE_ERRF // No error function. - | stp TMP1, SAVE_CFRAME - | stp sp, L->cframe // Add our C frame to cframe chain. - | stw L, DISPATCH_GL(cur_L)(DISPATCH) - | mtctr CARG4 - | bctrl // (lua_State *L, lua_CFunction func, void *ud) - |.if PPE - | mr BASE, CRET1 - | cmpwi CRET1, 0 - |.else - | mr. BASE, CRET1 - |.endif - | li PC, FRAME_CP - | bne <3 // Else continue with the call. - | b ->vm_leave_cp // No base? Just remove C frame. - | - |//----------------------------------------------------------------------- - |//-- Metamethod handling ------------------------------------------------ - |//----------------------------------------------------------------------- - | - |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the - |// stack, so BASE doesn't need to be reloaded across these calls. - | - |//-- Continuation dispatch ---------------------------------------------- - | - |->cont_dispatch: - | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8 - | lwz TMP0, -12(BASE) // Continuation. - | mr RB, BASE - | mr BASE, TMP2 // Restore caller BASE. - | lwz LFUNC:TMP1, FRAME_FUNC(TMP2) - |.if FFI - | cmplwi TMP0, 1 - |.endif - | lwz PC, -16(RB) // Restore PC from [cont|PC]. - | subi TMP2, RD, 8 - | lwz TMP1, LFUNC:TMP1->pc - | stwx TISNIL, RA, TMP2 // Ensure one valid arg. - |.if FFI - | ble >1 - |.endif - | lwz KBASE, PC2PROTO(k)(TMP1) - | // BASE = base, RA = resultptr, RB = meta base - | mtctr TMP0 - | bctr // Jump to continuation. - | - |.if FFI - |1: - | beq ->cont_ffi_callback // cont = 1: return from FFI callback. - | // cont = 0: tailcall from C function. - | subi TMP1, RB, 16 - | sub RC, TMP1, BASE - | b ->vm_call_tail - |.endif - | - |->cont_cat: // RA = resultptr, RB = meta base - | lwz INS, -4(PC) - | subi CARG2, RB, 16 - | decode_RB8 SAVE0, INS - | lfd f0, 0(RA) - | add TMP1, BASE, SAVE0 - | stp BASE, L->base - | cmplw TMP1, CARG2 - | sub CARG3, CARG2, TMP1 - | decode_RA8 RA, INS - | stfd f0, 0(CARG2) - | bney ->BC_CAT_Z - | stfdx f0, BASE, RA - | b ->cont_nop - | - |//-- Table indexing metamethods ----------------------------------------- - | - |->vmeta_tgets1: - | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) - | li TMP0, LJ_TSTR - | decode_RB8 RB, INS - | stw STR:RC, 4(CARG3) - | add CARG2, BASE, RB - | stw TMP0, 0(CARG3) - | b >1 - | - |->vmeta_tgets: - | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) - | li TMP0, LJ_TTAB - | stw TAB:RB, 4(CARG2) - | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH) - | stw TMP0, 0(CARG2) - | li TMP1, LJ_TSTR - | stw STR:RC, 4(CARG3) - | stw TMP1, 0(CARG3) - | b >1 - | - |->vmeta_tgetb: // TMP0 = index - |.if not DUALNUM - | tonum_u f0, TMP0 - |.endif - | decode_RB8 RB, INS - | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) - | add CARG2, BASE, RB - |.if DUALNUM - | stw TISNUM, 0(CARG3) - | stw TMP0, 4(CARG3) - |.else - | stfd f0, 0(CARG3) - |.endif - | b >1 - | - |->vmeta_tgetv: - | decode_RB8 RB, INS - | decode_RC8 RC, INS - | add CARG2, BASE, RB - | add CARG3, BASE, RC - |1: - | stp BASE, L->base - | mr CARG1, L - | stw PC, SAVE_PC - | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) - | // Returns TValue * (finished) or NULL (metamethod). - | cmplwi CRET1, 0 - | beq >3 - | lfd f0, 0(CRET1) - | ins_next1 - | stfdx f0, BASE, RA - | ins_next2 - | - |3: // Call __index metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k - | subfic TMP1, BASE, FRAME_CONT - | lp BASE, L->top - | stw PC, -16(BASE) // [cont|PC] - | add PC, TMP1, BASE - | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. - | li NARGS8:RC, 16 // 2 args for func(t, k). - | b ->vm_call_dispatch_f - | - |->vmeta_tgetr: - | bl extern lj_tab_getinth // (GCtab *t, int32_t key) - | // Returns cTValue * or NULL. - | cmplwi CRET1, 0 - | beq >1 - | lfd f14, 0(CRET1) - | b ->BC_TGETR_Z - |1: - | stwx TISNIL, BASE, RA - | b ->cont_nop - | - |//----------------------------------------------------------------------- - | - |->vmeta_tsets1: - | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) - | li TMP0, LJ_TSTR - | decode_RB8 RB, INS - | stw STR:RC, 4(CARG3) - | add CARG2, BASE, RB - | stw TMP0, 0(CARG3) - | b >1 - | - |->vmeta_tsets: - | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) - | li TMP0, LJ_TTAB - | stw TAB:RB, 4(CARG2) - | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH) - | stw TMP0, 0(CARG2) - | li TMP1, LJ_TSTR - | stw STR:RC, 4(CARG3) - | stw TMP1, 0(CARG3) - | b >1 - | - |->vmeta_tsetb: // TMP0 = index - |.if not DUALNUM - | tonum_u f0, TMP0 - |.endif - | decode_RB8 RB, INS - | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) - | add CARG2, BASE, RB - |.if DUALNUM - | stw TISNUM, 0(CARG3) - | stw TMP0, 4(CARG3) - |.else - | stfd f0, 0(CARG3) - |.endif - | b >1 - | - |->vmeta_tsetv: - | decode_RB8 RB, INS - | decode_RC8 RC, INS - | add CARG2, BASE, RB - | add CARG3, BASE, RC - |1: - | stp BASE, L->base - | mr CARG1, L - | stw PC, SAVE_PC - | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) - | // Returns TValue * (finished) or NULL (metamethod). - | cmplwi CRET1, 0 - | lfdx f0, BASE, RA - | beq >3 - | // NOBARRIER: lj_meta_tset ensures the table is not black. - | ins_next1 - | stfd f0, 0(CRET1) - | ins_next2 - | - |3: // Call __newindex metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) - | subfic TMP1, BASE, FRAME_CONT - | lp BASE, L->top - | stw PC, -16(BASE) // [cont|PC] - | add PC, TMP1, BASE - | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. - | li NARGS8:RC, 24 // 3 args for func(t, k, v) - | stfd f0, 16(BASE) // Copy value to third argument. - | b ->vm_call_dispatch_f - | - |->vmeta_tsetr: - | stp BASE, L->base - | stw PC, SAVE_PC - | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) - | // Returns TValue *. - | stfd f14, 0(CRET1) - | b ->cont_nop - | - |//-- Comparison metamethods --------------------------------------------- - | - |->vmeta_comp: - | mr CARG1, L - | subi PC, PC, 4 - |.if DUALNUM - | mr CARG2, RA - |.else - | add CARG2, BASE, RA - |.endif - | stw PC, SAVE_PC - |.if DUALNUM - | mr CARG3, RD - |.else - | add CARG3, BASE, RD - |.endif - | stp BASE, L->base - | decode_OP1 CARG4, INS - | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) - | // Returns 0/1 or TValue * (metamethod). - |3: - | cmplwi CRET1, 1 - | bgt ->vmeta_binop - | subfic CRET1, CRET1, 0 - |4: - | lwz INS, 0(PC) - | addi PC, PC, 4 - | decode_RD4 TMP2, INS - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) - | and TMP2, TMP2, CRET1 - | add PC, PC, TMP2 - |->cont_nop: - | ins_next - | - |->cont_ra: // RA = resultptr - | lwz INS, -4(PC) - | lfd f0, 0(RA) - | decode_RA8 TMP1, INS - | stfdx f0, BASE, TMP1 - | b ->cont_nop - | - |->cont_condt: // RA = resultptr - | lwz TMP0, 0(RA) - | .gpr64 extsw TMP0, TMP0 - | subfic TMP0, TMP0, LJ_TTRUE // Branch if result is true. - | subfe CRET1, CRET1, CRET1 - | not CRET1, CRET1 - | b <4 - | - |->cont_condf: // RA = resultptr - | lwz TMP0, 0(RA) - | .gpr64 extsw TMP0, TMP0 - | subfic TMP0, TMP0, LJ_TTRUE // Branch if result is false. - | subfe CRET1, CRET1, CRET1 - | b <4 - | - |->vmeta_equal: - | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV. - | subi PC, PC, 4 - | stp BASE, L->base - | mr CARG1, L - | stw PC, SAVE_PC - | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) - | // Returns 0/1 or TValue * (metamethod). - | b <3 - | - |->vmeta_equal_cd: - |.if FFI - | mr CARG2, INS - | subi PC, PC, 4 - | stp BASE, L->base - | mr CARG1, L - | stw PC, SAVE_PC - | bl extern lj_meta_equal_cd // (lua_State *L, BCIns op) - | // Returns 0/1 or TValue * (metamethod). - | b <3 - |.endif - | - |->vmeta_istype: - | subi PC, PC, 4 - | stp BASE, L->base - | srwi CARG2, RA, 3 - | mr CARG1, L - | srwi CARG3, RD, 3 - | stw PC, SAVE_PC - | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) - | b ->cont_nop - | - |//-- Arithmetic metamethods --------------------------------------------- - | - |->vmeta_arith_nv: - | add CARG3, KBASE, RC - | add CARG4, BASE, RB - | b >1 - |->vmeta_arith_nv2: - |.if DUALNUM - | mr CARG3, RC - | mr CARG4, RB - | b >1 - |.endif - | - |->vmeta_unm: - | mr CARG3, RD - | mr CARG4, RD - | b >1 - | - |->vmeta_arith_vn: - | add CARG3, BASE, RB - | add CARG4, KBASE, RC - | b >1 - | - |->vmeta_arith_vv: - | add CARG3, BASE, RB - | add CARG4, BASE, RC - |.if DUALNUM - | b >1 - |.endif - |->vmeta_arith_vn2: - |->vmeta_arith_vv2: - |.if DUALNUM - | mr CARG3, RB - | mr CARG4, RC - |.endif - |1: - | add CARG2, BASE, RA - | stp BASE, L->base - | mr CARG1, L - | stw PC, SAVE_PC - | decode_OP1 CARG5, INS // Caveat: CARG5 overlaps INS. - | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) - | // Returns NULL (finished) or TValue * (metamethod). - | cmplwi CRET1, 0 - | beq ->cont_nop - | - | // Call metamethod for binary op. - |->vmeta_binop: - | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 - | sub TMP1, CRET1, BASE - | stw PC, -16(CRET1) // [cont|PC] - | mr TMP2, BASE - | addi PC, TMP1, FRAME_CONT - | mr BASE, CRET1 - | li NARGS8:RC, 16 // 2 args for func(o1, o2). - | b ->vm_call_dispatch - | - |->vmeta_len: -#if LJ_52 - | mr SAVE0, CARG1 -#endif - | mr CARG2, RD - | stp BASE, L->base - | mr CARG1, L - | stw PC, SAVE_PC - | bl extern lj_meta_len // (lua_State *L, TValue *o) - | // Returns NULL (retry) or TValue * (metamethod base). -#if LJ_52 - | cmplwi CRET1, 0 - | bne ->vmeta_binop // Binop call for compatibility. - | mr CARG1, SAVE0 - | b ->BC_LEN_Z -#else - | b ->vmeta_binop // Binop call for compatibility. -#endif - | - |//-- Call metamethod ---------------------------------------------------- - | - |->vmeta_call: // Resolve and call __call metamethod. - | // TMP2 = old base, BASE = new base, RC = nargs*8 - | mr CARG1, L - | stp TMP2, L->base // This is the callers base! - | subi CARG2, BASE, 8 - | stw PC, SAVE_PC - | add CARG3, BASE, RC - | mr SAVE0, NARGS8:RC - | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) - | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. - | addi NARGS8:RC, SAVE0, 8 // Got one more argument now. - | ins_call - | - |->vmeta_callt: // Resolve __call for BC_CALLT. - | // BASE = old base, RA = new base, RC = nargs*8 - | mr CARG1, L - | stp BASE, L->base - | subi CARG2, RA, 8 - | stw PC, SAVE_PC - | add CARG3, RA, RC - | mr SAVE0, NARGS8:RC - | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) - | lwz TMP1, FRAME_PC(BASE) - | addi NARGS8:RC, SAVE0, 8 // Got one more argument now. - | lwz LFUNC:RB, FRAME_FUNC(RA) // Guaranteed to be a function here. - | b ->BC_CALLT_Z - | - |//-- Argument coercion for 'for' statement ------------------------------ - | - |->vmeta_for: - | mr CARG1, L - | stp BASE, L->base - | mr CARG2, RA - | stw PC, SAVE_PC - | mr SAVE0, INS - | bl extern lj_meta_for // (lua_State *L, TValue *base) - |.if JIT - | decode_OP1 TMP0, SAVE0 - |.endif - | decode_RA8 RA, SAVE0 - |.if JIT - | cmpwi TMP0, BC_JFORI - |.endif - | decode_RD8 RD, SAVE0 - |.if JIT - | beqy =>BC_JFORI - |.endif - | b =>BC_FORI - | - |//----------------------------------------------------------------------- - |//-- Fast functions ----------------------------------------------------- - |//----------------------------------------------------------------------- - | - |.macro .ffunc, name - |->ff_ .. name: - |.endmacro - | - |.macro .ffunc_1, name - |->ff_ .. name: - | cmplwi NARGS8:RC, 8 - | lwz CARG3, 0(BASE) - | lwz CARG1, 4(BASE) - | blt ->fff_fallback - |.endmacro - | - |.macro .ffunc_2, name - |->ff_ .. name: - | cmplwi NARGS8:RC, 16 - | lwz CARG3, 0(BASE) - | lwz CARG4, 8(BASE) - | lwz CARG1, 4(BASE) - | lwz CARG2, 12(BASE) - | blt ->fff_fallback - |.endmacro - | - |.macro .ffunc_n, name - |->ff_ .. name: - | cmplwi NARGS8:RC, 8 - | lwz CARG3, 0(BASE) - | lfd FARG1, 0(BASE) - | blt ->fff_fallback - | checknum CARG3; bge ->fff_fallback - |.endmacro - | - |.macro .ffunc_nn, name - |->ff_ .. name: - | cmplwi NARGS8:RC, 16 - | lwz CARG3, 0(BASE) - | lfd FARG1, 0(BASE) - | lwz CARG4, 8(BASE) - | lfd FARG2, 8(BASE) - | blt ->fff_fallback - | checknum CARG3; bge ->fff_fallback - | checknum CARG4; bge ->fff_fallback - |.endmacro - | - |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1. - |.macro ffgccheck - | lwz TMP0, DISPATCH_GL(gc.total)(DISPATCH) - | lwz TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) - | cmplw TMP0, TMP1 - | bgel ->fff_gcstep - |.endmacro - | - |//-- Base library: checks ----------------------------------------------- - | - |.ffunc_1 assert - | li TMP1, LJ_TFALSE - | la RA, -8(BASE) - | cmplw cr1, CARG3, TMP1 - | lwz PC, FRAME_PC(BASE) - | bge cr1, ->fff_fallback - | stw CARG3, 0(RA) - | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8. - | stw CARG1, 4(RA) - | beq ->fff_res // Done if exactly 1 argument. - | li TMP1, 8 - | subi RC, RC, 8 - |1: - | cmplw TMP1, RC - | lfdx f0, BASE, TMP1 - | stfdx f0, RA, TMP1 - | addi TMP1, TMP1, 8 - | bney <1 - | b ->fff_res - | - |.ffunc type - | cmplwi NARGS8:RC, 8 - | lwz CARG1, 0(BASE) - | blt ->fff_fallback - | .gpr64 extsw CARG1, CARG1 - | subfc TMP0, TISNUM, CARG1 - | subfe TMP2, CARG1, CARG1 - | orc TMP1, TMP2, TMP0 - | addi TMP1, TMP1, ~LJ_TISNUM+1 - | slwi TMP1, TMP1, 3 - | la TMP2, CFUNC:RB->upvalue - | lfdx FARG1, TMP2, TMP1 - | b ->fff_resn - | - |//-- Base library: getters and setters --------------------------------- - | - |.ffunc_1 getmetatable - | checktab CARG3; bne >6 - |1: // Field metatable must be at same offset for GCtab and GCudata! - | lwz TAB:CARG1, TAB:CARG1->metatable - |2: - | li CARG3, LJ_TNIL - | cmplwi TAB:CARG1, 0 - | lwz STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) - | beq ->fff_restv - | lwz TMP0, TAB:CARG1->hmask - | li CARG3, LJ_TTAB // Use metatable as default result. - | lwz TMP1, STR:RC->hash - | lwz NODE:TMP2, TAB:CARG1->node - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask - | slwi TMP0, TMP1, 5 - | slwi TMP1, TMP1, 3 - | sub TMP1, TMP0, TMP1 - | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) - |3: // Rearranged logic, because we expect _not_ to find the key. - | lwz CARG4, NODE:TMP2->key - | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2) - | lwz CARG2, NODE:TMP2->val - | lwz TMP1, 4+offsetof(Node, val)(NODE:TMP2) - | checkstr CARG4; bne >4 - | cmpw TMP0, STR:RC; beq >5 - |4: - | lwz NODE:TMP2, NODE:TMP2->next - | cmplwi NODE:TMP2, 0 - | beq ->fff_restv // Not found, keep default result. - | b <3 - |5: - | checknil CARG2 - | beq ->fff_restv // Ditto for nil value. - | mr CARG3, CARG2 // Return value of mt.__metatable. - | mr CARG1, TMP1 - | b ->fff_restv - | - |6: - | cmpwi CARG3, LJ_TUDATA; beq <1 - | .gpr64 extsw CARG3, CARG3 - | subfc TMP0, TISNUM, CARG3 - | subfe TMP2, CARG3, CARG3 - | orc TMP1, TMP2, TMP0 - | addi TMP1, TMP1, ~LJ_TISNUM+1 - | slwi TMP1, TMP1, 2 - | la TMP2, DISPATCH_GL(gcroot[GCROOT_BASEMT])(DISPATCH) - | lwzx TAB:CARG1, TMP2, TMP1 - | b <2 - | - |.ffunc_2 setmetatable - | // Fast path: no mt for table yet and not clearing the mt. - | checktab CARG3; bne ->fff_fallback - | lwz TAB:TMP1, TAB:CARG1->metatable - | checktab CARG4; bne ->fff_fallback - | cmplwi TAB:TMP1, 0 - | lbz TMP3, TAB:CARG1->marked - | bne ->fff_fallback - | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) - | stw TAB:CARG2, TAB:CARG1->metatable - | beq ->fff_restv - | barrierback TAB:CARG1, TMP3, TMP0 - | b ->fff_restv - | - |.ffunc rawget - | cmplwi NARGS8:RC, 16 - | lwz CARG4, 0(BASE) - | lwz TAB:CARG2, 4(BASE) - | blt ->fff_fallback - | checktab CARG4; bne ->fff_fallback - | la CARG3, 8(BASE) - | mr CARG1, L - | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) - | // Returns cTValue *. - | lfd FARG1, 0(CRET1) - | b ->fff_resn - | - |//-- Base library: conversions ------------------------------------------ - | - |.ffunc tonumber - | // Only handles the number case inline (without a base argument). - | cmplwi NARGS8:RC, 8 - | lwz CARG1, 0(BASE) - | lfd FARG1, 0(BASE) - | bne ->fff_fallback // Exactly one argument. - | checknum CARG1; bgt ->fff_fallback - | b ->fff_resn - | - |.ffunc_1 tostring - | // Only handles the string or number case inline. - | checkstr CARG3 - | // A __tostring method in the string base metatable is ignored. - | beq ->fff_restv // String key? - | // Handle numbers inline, unless a number base metatable is present. - | lwz TMP0, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) - | checknum CARG3 - | cmplwi cr1, TMP0, 0 - | stp BASE, L->base // Add frame since C call can throw. - | crorc 4*cr0+eq, 4*cr0+gt, 4*cr1+eq - | stw PC, SAVE_PC // Redundant (but a defined value). - | beq ->fff_fallback - | ffgccheck - | mr CARG1, L - | mr CARG2, BASE - |.if DUALNUM - | bl extern lj_strfmt_number // (lua_State *L, cTValue *o) - |.else - | bl extern lj_strfmt_num // (lua_State *L, lua_Number *np) - |.endif - | // Returns GCstr *. - | li CARG3, LJ_TSTR - | b ->fff_restv - | - |//-- Base library: iterators ------------------------------------------- - | - |.ffunc next - | cmplwi NARGS8:RC, 8 - | lwz CARG1, 0(BASE) - | lwz TAB:CARG2, 4(BASE) - | blt ->fff_fallback - | stwx TISNIL, BASE, NARGS8:RC // Set missing 2nd arg to nil. - | checktab CARG1 - | lwz PC, FRAME_PC(BASE) - | bne ->fff_fallback - | stp BASE, L->base // Add frame since C call can throw. - | mr CARG1, L - | stp BASE, L->top // Dummy frame length is ok. - | la CARG3, 8(BASE) - | stw PC, SAVE_PC - | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) - | // Returns 0 at end of traversal. - | cmplwi CRET1, 0 - | li CARG3, LJ_TNIL - | beq ->fff_restv // End of traversal: return nil. - | lfd f0, 8(BASE) // Copy key and value to results. - | la RA, -8(BASE) - | lfd f1, 16(BASE) - | stfd f0, 0(RA) - | li RD, (2+1)*8 - | stfd f1, 8(RA) - | b ->fff_res - | - |.ffunc_1 pairs - | checktab CARG3 - | lwz PC, FRAME_PC(BASE) - | bne ->fff_fallback -#if LJ_52 - | lwz TAB:TMP2, TAB:CARG1->metatable - | lfd f0, CFUNC:RB->upvalue[0] - | cmplwi TAB:TMP2, 0 - | la RA, -8(BASE) - | bne ->fff_fallback -#else - | lfd f0, CFUNC:RB->upvalue[0] - | la RA, -8(BASE) -#endif - | stw TISNIL, 8(BASE) - | li RD, (3+1)*8 - | stfd f0, 0(RA) - | b ->fff_res - | - |.ffunc ipairs_aux - | cmplwi NARGS8:RC, 16 - | lwz CARG3, 0(BASE) - | lwz TAB:CARG1, 4(BASE) - | lwz CARG4, 8(BASE) - |.if DUALNUM - | lwz TMP2, 12(BASE) - |.else - | lfd FARG2, 8(BASE) - |.endif - | blt ->fff_fallback - | checktab CARG3 - | checknum cr1, CARG4 - | lwz PC, FRAME_PC(BASE) - |.if DUALNUM - | bne ->fff_fallback - | bne cr1, ->fff_fallback - |.else - | lus TMP0, 0x3ff0 - | stw ZERO, TMPD_LO - | bne ->fff_fallback - | stw TMP0, TMPD_HI - | bge cr1, ->fff_fallback - | lfd FARG1, TMPD - | toint TMP2, FARG2, f0 - |.endif - | lwz TMP0, TAB:CARG1->asize - | lwz TMP1, TAB:CARG1->array - |.if not DUALNUM - | fadd FARG2, FARG2, FARG1 - |.endif - | addi TMP2, TMP2, 1 - | la RA, -8(BASE) - | cmplw TMP0, TMP2 - |.if DUALNUM - | stw TISNUM, 0(RA) - | slwi TMP3, TMP2, 3 - | stw TMP2, 4(RA) - |.else - | slwi TMP3, TMP2, 3 - | stfd FARG2, 0(RA) - |.endif - | ble >2 // Not in array part? - | lwzx TMP2, TMP1, TMP3 - | lfdx f0, TMP1, TMP3 - |1: - | checknil TMP2 - | li RD, (0+1)*8 - | beq ->fff_res // End of iteration, return 0 results. - | li RD, (2+1)*8 - | stfd f0, 8(RA) - | b ->fff_res - |2: // Check for empty hash part first. Otherwise call C function. - | lwz TMP0, TAB:CARG1->hmask - | cmplwi TMP0, 0 - | li RD, (0+1)*8 - | beq ->fff_res - | mr CARG2, TMP2 - | bl extern lj_tab_getinth // (GCtab *t, int32_t key) - | // Returns cTValue * or NULL. - | cmplwi CRET1, 0 - | li RD, (0+1)*8 - | beq ->fff_res - | lwz TMP2, 0(CRET1) - | lfd f0, 0(CRET1) - | b <1 - | - |.ffunc_1 ipairs - | checktab CARG3 - | lwz PC, FRAME_PC(BASE) - | bne ->fff_fallback -#if LJ_52 - | lwz TAB:TMP2, TAB:CARG1->metatable - | lfd f0, CFUNC:RB->upvalue[0] - | cmplwi TAB:TMP2, 0 - | la RA, -8(BASE) - | bne ->fff_fallback -#else - | lfd f0, CFUNC:RB->upvalue[0] - | la RA, -8(BASE) -#endif - |.if DUALNUM - | stw TISNUM, 8(BASE) - |.else - | stw ZERO, 8(BASE) - |.endif - | stw ZERO, 12(BASE) - | li RD, (3+1)*8 - | stfd f0, 0(RA) - | b ->fff_res - | - |//-- Base library: catch errors ---------------------------------------- - | - |.ffunc pcall - | cmplwi NARGS8:RC, 8 - | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | blt ->fff_fallback - | mr TMP2, BASE - | la BASE, 8(BASE) - | // Remember active hook before pcall. - | rlwinm TMP3, TMP3, 32-HOOK_ACTIVE_SHIFT, 31, 31 - | subi NARGS8:RC, NARGS8:RC, 8 - | addi PC, TMP3, 8+FRAME_PCALL - | b ->vm_call_dispatch - | - |.ffunc xpcall - | cmplwi NARGS8:RC, 16 - | lwz CARG4, 8(BASE) - | lfd FARG2, 8(BASE) - | lfd FARG1, 0(BASE) - | blt ->fff_fallback - | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH) - | mr TMP2, BASE - | checkfunc CARG4; bne ->fff_fallback // Traceback must be a function. - | la BASE, 16(BASE) - | // Remember active hook before pcall. - | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31 - | stfd FARG2, 0(TMP2) // Swap function and traceback. - | subi NARGS8:RC, NARGS8:RC, 16 - | stfd FARG1, 8(TMP2) - | addi PC, TMP1, 16+FRAME_PCALL - | b ->vm_call_dispatch - | - |//-- Coroutine library -------------------------------------------------- - | - |.macro coroutine_resume_wrap, resume - |.if resume - |.ffunc_1 coroutine_resume - | cmpwi CARG3, LJ_TTHREAD; bne ->fff_fallback - |.else - |.ffunc coroutine_wrap_aux - | lwz L:CARG1, CFUNC:RB->upvalue[0].gcr - |.endif - | lbz TMP0, L:CARG1->status - | lp TMP1, L:CARG1->cframe - | lp CARG2, L:CARG1->top - | cmplwi cr0, TMP0, LUA_YIELD - | lp TMP2, L:CARG1->base - | cmplwi cr1, TMP1, 0 - | lwz TMP0, L:CARG1->maxstack - | cmplw cr7, CARG2, TMP2 - | lwz PC, FRAME_PC(BASE) - | crorc 4*cr6+lt, 4*cr0+gt, 4*cr1+eq // st>LUA_YIELD || cframe!=0 - | add TMP2, CARG2, NARGS8:RC - | crandc 4*cr6+gt, 4*cr7+eq, 4*cr0+eq // base==top && st!=LUA_YIELD - | cmplw cr1, TMP2, TMP0 - | cror 4*cr6+lt, 4*cr6+lt, 4*cr6+gt - | stw PC, SAVE_PC - | cror 4*cr6+lt, 4*cr6+lt, 4*cr1+gt // cond1 || cond2 || stackov - | stp BASE, L->base - | blt cr6, ->fff_fallback - |1: - |.if resume - | addi BASE, BASE, 8 // Keep resumed thread in stack for GC. - | subi NARGS8:RC, NARGS8:RC, 8 - | subi TMP2, TMP2, 8 - |.endif - | stp TMP2, L:CARG1->top - | li TMP1, 0 - | stp BASE, L->top - |2: // Move args to coroutine. - | cmpw TMP1, NARGS8:RC - | lfdx f0, BASE, TMP1 - | beq >3 - | stfdx f0, CARG2, TMP1 - | addi TMP1, TMP1, 8 - | b <2 - |3: - | li CARG3, 0 - | mr L:SAVE0, L:CARG1 - | li CARG4, 0 - | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0) - | // Returns thread status. - |4: - | lp TMP2, L:SAVE0->base - | cmplwi CRET1, LUA_YIELD - | lp TMP3, L:SAVE0->top - | li_vmstate INTERP - | lp BASE, L->base - | stw L, DISPATCH_GL(cur_L)(DISPATCH) - | st_vmstate - | bgt >8 - | sub RD, TMP3, TMP2 - | lwz TMP0, L->maxstack - | cmplwi RD, 0 - | add TMP1, BASE, RD - | beq >6 // No results? - | cmplw TMP1, TMP0 - | li TMP1, 0 - | bgt >9 // Need to grow stack? - | - | subi TMP3, RD, 8 - | stp TMP2, L:SAVE0->top // Clear coroutine stack. - |5: // Move results from coroutine. - | cmplw TMP1, TMP3 - | lfdx f0, TMP2, TMP1 - | stfdx f0, BASE, TMP1 - | addi TMP1, TMP1, 8 - | bne <5 - |6: - | andix. TMP0, PC, FRAME_TYPE - |.if resume - | li TMP1, LJ_TTRUE - | la RA, -8(BASE) - | stw TMP1, -8(BASE) // Prepend true to results. - | addi RD, RD, 16 - |.else - | mr RA, BASE - | addi RD, RD, 8 - |.endif - |7: - | stw PC, SAVE_PC - | mr MULTRES, RD - | beq ->BC_RET_Z - | b ->vm_return - | - |8: // Coroutine returned with error (at co->top-1). - |.if resume - | andix. TMP0, PC, FRAME_TYPE - | la TMP3, -8(TMP3) - | li TMP1, LJ_TFALSE - | lfd f0, 0(TMP3) - | stp TMP3, L:SAVE0->top // Remove error from coroutine stack. - | li RD, (2+1)*8 - | stw TMP1, -8(BASE) // Prepend false to results. - | la RA, -8(BASE) - | stfd f0, 0(BASE) // Copy error message. - | b <7 - |.else - | mr CARG1, L - | mr CARG2, L:SAVE0 - | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) - |.endif - | - |9: // Handle stack expansion on return from yield. - | mr CARG1, L - | srwi CARG2, RD, 3 - | bl extern lj_state_growstack // (lua_State *L, int n) - | li CRET1, 0 - | b <4 - |.endmacro - | - | coroutine_resume_wrap 1 // coroutine.resume - | coroutine_resume_wrap 0 // coroutine.wrap - | - |.ffunc coroutine_yield - | lp TMP0, L->cframe - | add TMP1, BASE, NARGS8:RC - | stp BASE, L->base - | andix. TMP0, TMP0, CFRAME_RESUME - | stp TMP1, L->top - | li CRET1, LUA_YIELD - | beq ->fff_fallback - | stp ZERO, L->cframe - | stb CRET1, L->status - | b ->vm_leave_unw - | - |//-- Math library ------------------------------------------------------- - | - |.ffunc_1 math_abs - | checknum CARG3 - |.if DUALNUM - | bne >2 - | srawi TMP1, CARG1, 31 - | xor TMP2, TMP1, CARG1 - |.if GPR64 - | lus TMP0, 0x8000 - | sub CARG1, TMP2, TMP1 - | cmplw CARG1, TMP0 - | beq >1 - |.else - | sub. CARG1, TMP2, TMP1 - | blt >1 - |.endif - |->fff_resi: - | lwz PC, FRAME_PC(BASE) - | la RA, -8(BASE) - | stw TISNUM, -8(BASE) - | stw CRET1, -4(BASE) - | b ->fff_res1 - |1: - | lus CARG3, 0x41e0 // 2^31. - | li CARG1, 0 - | b ->fff_restv - |2: - |.endif - | bge ->fff_fallback - | rlwinm CARG3, CARG3, 0, 1, 31 - | // Fallthrough. - | - |->fff_restv: - | // CARG3/CARG1 = TValue result. - | lwz PC, FRAME_PC(BASE) - | stw CARG3, -8(BASE) - | la RA, -8(BASE) - | stw CARG1, -4(BASE) - |->fff_res1: - | // RA = results, PC = return. - | li RD, (1+1)*8 - |->fff_res: - | // RA = results, RD = (nresults+1)*8, PC = return. - | andix. TMP0, PC, FRAME_TYPE - | mr MULTRES, RD - | bney ->vm_return - | lwz INS, -4(PC) - | decode_RB8 RB, INS - |5: - | cmplw RB, RD // More results expected? - | decode_RA8 TMP0, INS - | bgt >6 - | ins_next1 - | // Adjust BASE. KBASE is assumed to be set for the calling frame. - | sub BASE, RA, TMP0 - | ins_next2 - | - |6: // Fill up results with nil. - | subi TMP1, RD, 8 - | addi RD, RD, 8 - | stwx TISNIL, RA, TMP1 - | b <5 - | - |.macro math_extern, func - | .ffunc_n math_ .. func - | blex func - | b ->fff_resn - |.endmacro - | - |.macro math_extern2, func - | .ffunc_nn math_ .. func - | blex func - | b ->fff_resn - |.endmacro - | - |.macro math_round, func - | .ffunc_1 math_ .. func - | checknum CARG3; beqy ->fff_restv - | rlwinm TMP2, CARG3, 12, 21, 31 - | bge ->fff_fallback - | addic. TMP2, TMP2, -1023 // exp = exponent(x) - 1023 - | cmplwi cr1, TMP2, 31 // 0 <= exp < 31? - | subfic TMP0, TMP2, 31 - | blt >3 - | slwi TMP1, CARG3, 11 - | srwi TMP3, CARG1, 21 - | oris TMP1, TMP1, 0x8000 - | addi TMP2, TMP2, 1 - | or TMP1, TMP1, TMP3 - | slwi CARG2, CARG1, 11 - | bge cr1, >4 - | slw TMP3, TMP1, TMP2 - | srw RD, TMP1, TMP0 - | or TMP3, TMP3, CARG2 - | srawi TMP2, CARG3, 31 - |.if "func" == "floor" - | and TMP1, TMP3, TMP2 - | addic TMP0, TMP1, -1 - | subfe TMP1, TMP0, TMP1 - | add CARG1, RD, TMP1 - | xor CARG1, CARG1, TMP2 - | sub CARG1, CARG1, TMP2 - | b ->fff_resi - |.else - | andc TMP1, TMP3, TMP2 - | addic TMP0, TMP1, -1 - | subfe TMP1, TMP0, TMP1 - | add CARG1, RD, TMP1 - | cmpw CARG1, RD - | xor CARG1, CARG1, TMP2 - | sub CARG1, CARG1, TMP2 - | bge ->fff_resi - | // Overflow to 2^31. - | lus CARG3, 0x41e0 // 2^31. - | li CARG1, 0 - | b ->fff_restv - |.endif - |3: // |x| < 1 - | slwi TMP2, CARG3, 1 - | srawi TMP1, CARG3, 31 - | or TMP2, CARG1, TMP2 // ztest = (hi+hi) | lo - |.if "func" == "floor" - | and TMP1, TMP2, TMP1 // (ztest & sign) == 0 ? 0 : -1 - | subfic TMP2, TMP1, 0 - | subfe CARG1, CARG1, CARG1 - |.else - | andc TMP1, TMP2, TMP1 // (ztest & ~sign) == 0 ? 0 : 1 - | addic TMP2, TMP1, -1 - | subfe CARG1, TMP2, TMP1 - |.endif - | b ->fff_resi - |4: // exp >= 31. Check for -(2^31). - | xoris TMP1, TMP1, 0x8000 - | srawi TMP2, CARG3, 31 - |.if "func" == "floor" - | or TMP1, TMP1, CARG2 - |.endif - |.if PPE - | orc TMP1, TMP1, TMP2 - | cmpwi TMP1, 0 - |.else - | orc. TMP1, TMP1, TMP2 - |.endif - | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq - | lus CARG1, 0x8000 // -(2^31). - | beqy ->fff_resi - |5: - | lfd FARG1, 0(BASE) - | blex func - | b ->fff_resn - |.endmacro - | - |.if DUALNUM - | math_round floor - | math_round ceil - |.else - | // NYI: use internal implementation. - | math_extern floor - | math_extern ceil - |.endif - | - |.if SQRT - |.ffunc_n math_sqrt - | fsqrt FARG1, FARG1 - | b ->fff_resn - |.else - | math_extern sqrt - |.endif - | - |.ffunc math_log - | cmplwi NARGS8:RC, 8 - | lwz CARG3, 0(BASE) - | lfd FARG1, 0(BASE) - | bne ->fff_fallback // Need exactly 1 argument. - | checknum CARG3; bge ->fff_fallback - | blex log - | b ->fff_resn - | - | math_extern log10 - | math_extern exp - | math_extern sin - | math_extern cos - | math_extern tan - | math_extern asin - | math_extern acos - | math_extern atan - | math_extern sinh - | math_extern cosh - | math_extern tanh - | math_extern2 pow - | math_extern2 atan2 - | math_extern2 fmod - | - |.if DUALNUM - |.ffunc math_ldexp - | cmplwi NARGS8:RC, 16 - | lwz CARG3, 0(BASE) - | lfd FARG1, 0(BASE) - | lwz CARG4, 8(BASE) - |.if GPR64 - | lwz CARG2, 12(BASE) - |.else - | lwz CARG1, 12(BASE) - |.endif - | blt ->fff_fallback - | checknum CARG3; bge ->fff_fallback - | checknum CARG4; bne ->fff_fallback - |.else - |.ffunc_nn math_ldexp - |.if GPR64 - | toint CARG2, FARG2 - |.else - | toint CARG1, FARG2 - |.endif - |.endif - | blex ldexp - | b ->fff_resn - | - |.ffunc_n math_frexp - |.if GPR64 - | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) - |.else - | la CARG1, DISPATCH_GL(tmptv)(DISPATCH) - |.endif - | lwz PC, FRAME_PC(BASE) - | blex frexp - | lwz TMP1, DISPATCH_GL(tmptv)(DISPATCH) - | la RA, -8(BASE) - |.if not DUALNUM - | tonum_i FARG2, TMP1 - |.endif - | stfd FARG1, 0(RA) - | li RD, (2+1)*8 - |.if DUALNUM - | stw TISNUM, 8(RA) - | stw TMP1, 12(RA) - |.else - | stfd FARG2, 8(RA) - |.endif - | b ->fff_res - | - |.ffunc_n math_modf - |.if GPR64 - | la CARG2, -8(BASE) - |.else - | la CARG1, -8(BASE) - |.endif - | lwz PC, FRAME_PC(BASE) - | blex modf - | la RA, -8(BASE) - | stfd FARG1, 0(BASE) - | li RD, (2+1)*8 - | b ->fff_res - | - |.macro math_minmax, name, ismax - |.if DUALNUM - | .ffunc_1 name - | checknum CARG3 - | addi TMP1, BASE, 8 - | add TMP2, BASE, NARGS8:RC - | bne >4 - |1: // Handle integers. - | lwz CARG4, 0(TMP1) - | cmplw cr1, TMP1, TMP2 - | lwz CARG2, 4(TMP1) - | bge cr1, ->fff_resi - | checknum CARG4 - | xoris TMP0, CARG1, 0x8000 - | xoris TMP3, CARG2, 0x8000 - | bne >3 - | subfc TMP3, TMP3, TMP0 - | subfe TMP0, TMP0, TMP0 - |.if ismax - | andc TMP3, TMP3, TMP0 - |.else - | and TMP3, TMP3, TMP0 - |.endif - | add CARG1, TMP3, CARG2 - |.if GPR64 - | rldicl CARG1, CARG1, 0, 32 - |.endif - | addi TMP1, TMP1, 8 - | b <1 - |3: - | bge ->fff_fallback - | // Convert intermediate result to number and continue below. - | tonum_i FARG1, CARG1 - | lfd FARG2, 0(TMP1) - | b >6 - |4: - | lfd FARG1, 0(BASE) - | bge ->fff_fallback - |5: // Handle numbers. - | lwz CARG4, 0(TMP1) - | cmplw cr1, TMP1, TMP2 - | lfd FARG2, 0(TMP1) - | bge cr1, ->fff_resn - | checknum CARG4; bge >7 - |6: - | fsub f0, FARG1, FARG2 - | addi TMP1, TMP1, 8 - |.if ismax - | fsel FARG1, f0, FARG1, FARG2 - |.else - | fsel FARG1, f0, FARG2, FARG1 - |.endif - | b <5 - |7: // Convert integer to number and continue above. - | lwz CARG2, 4(TMP1) - | bne ->fff_fallback - | tonum_i FARG2, CARG2 - | b <6 - |.else - | .ffunc_n name - | li TMP1, 8 - |1: - | lwzx CARG2, BASE, TMP1 - | lfdx FARG2, BASE, TMP1 - | cmplw cr1, TMP1, NARGS8:RC - | checknum CARG2 - | bge cr1, ->fff_resn - | bge ->fff_fallback - | fsub f0, FARG1, FARG2 - | addi TMP1, TMP1, 8 - |.if ismax - | fsel FARG1, f0, FARG1, FARG2 - |.else - | fsel FARG1, f0, FARG2, FARG1 - |.endif - | b <1 - |.endif - |.endmacro - | - | math_minmax math_min, 0 - | math_minmax math_max, 1 - | - |//-- String library ----------------------------------------------------- - | - |.ffunc string_byte // Only handle the 1-arg case here. - | cmplwi NARGS8:RC, 8 - | lwz CARG3, 0(BASE) - | lwz STR:CARG1, 4(BASE) - | bne ->fff_fallback // Need exactly 1 argument. - | checkstr CARG3 - | bne ->fff_fallback - | lwz TMP0, STR:CARG1->len - |.if DUALNUM - | lbz CARG1, STR:CARG1[1] // Access is always ok (NUL at end). - | li RD, (0+1)*8 - | lwz PC, FRAME_PC(BASE) - | cmplwi TMP0, 0 - | la RA, -8(BASE) - | beqy ->fff_res - | b ->fff_resi - |.else - | lbz TMP1, STR:CARG1[1] // Access is always ok (NUL at end). - | addic TMP3, TMP0, -1 // RD = ((str->len != 0)+1)*8 - | subfe RD, TMP3, TMP0 - | stw TMP1, TONUM_LO // Inlined tonum_u f0, TMP1. - | addi RD, RD, 1 - | lfd f0, TONUM_D - | la RA, -8(BASE) - | lwz PC, FRAME_PC(BASE) - | fsub f0, f0, TOBIT - | slwi RD, RD, 3 - | stfd f0, 0(RA) - | b ->fff_res - |.endif - | - |.ffunc string_char // Only handle the 1-arg case here. - | ffgccheck - | cmplwi NARGS8:RC, 8 - | lwz CARG3, 0(BASE) - |.if DUALNUM - | lwz TMP0, 4(BASE) - | bne ->fff_fallback // Exactly 1 argument. - | checknum CARG3; bne ->fff_fallback - | la CARG2, 7(BASE) - |.else - | lfd FARG1, 0(BASE) - | bne ->fff_fallback // Exactly 1 argument. - | checknum CARG3; bge ->fff_fallback - | toint TMP0, FARG1 - | la CARG2, TMPD_BLO - |.endif - | li CARG3, 1 - | cmplwi TMP0, 255; bgt ->fff_fallback - |->fff_newstr: - | mr CARG1, L - | stp BASE, L->base - | stw PC, SAVE_PC - | bl extern lj_str_new // (lua_State *L, char *str, size_t l) - |->fff_resstr: - | // Returns GCstr *. - | lp BASE, L->base - | li CARG3, LJ_TSTR - | b ->fff_restv - | - |.ffunc string_sub - | ffgccheck - | cmplwi NARGS8:RC, 16 - | lwz CARG3, 16(BASE) - |.if not DUALNUM - | lfd f0, 16(BASE) - |.endif - | lwz TMP0, 0(BASE) - | lwz STR:CARG1, 4(BASE) - | blt ->fff_fallback - | lwz CARG2, 8(BASE) - |.if DUALNUM - | lwz TMP1, 12(BASE) - |.else - | lfd f1, 8(BASE) - |.endif - | li TMP2, -1 - | beq >1 - |.if DUALNUM - | checknum CARG3 - | lwz TMP2, 20(BASE) - | bne ->fff_fallback - |1: - | checknum CARG2; bne ->fff_fallback - |.else - | checknum CARG3; bge ->fff_fallback - | toint TMP2, f0 - |1: - | checknum CARG2; bge ->fff_fallback - |.endif - | checkstr TMP0; bne ->fff_fallback - |.if not DUALNUM - | toint TMP1, f1 - |.endif - | lwz TMP0, STR:CARG1->len - | cmplw TMP0, TMP2 // len < end? (unsigned compare) - | addi TMP3, TMP2, 1 - | blt >5 - |2: - | cmpwi TMP1, 0 // start <= 0? - | add TMP3, TMP1, TMP0 - | ble >7 - |3: - | sub CARG3, TMP2, TMP1 - | addi CARG2, STR:CARG1, #STR-1 - | srawi TMP0, CARG3, 31 - | addi CARG3, CARG3, 1 - | add CARG2, CARG2, TMP1 - | andc CARG3, CARG3, TMP0 - |.if GPR64 - | rldicl CARG2, CARG2, 0, 32 - | rldicl CARG3, CARG3, 0, 32 - |.endif - | b ->fff_newstr - | - |5: // Negative end or overflow. - | cmpw TMP0, TMP2 // len >= end? (signed compare) - | add TMP2, TMP0, TMP3 // Negative end: end = end+len+1. - | bge <2 - | mr TMP2, TMP0 // Overflow: end = len. - | b <2 - | - |7: // Negative start or underflow. - | .gpr64 extsw TMP1, TMP1 - | addic CARG3, TMP1, -1 - | subfe CARG3, CARG3, CARG3 - | srawi CARG2, TMP3, 31 // Note: modifies carry. - | andc TMP3, TMP3, CARG3 - | andc TMP1, TMP3, CARG2 - | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0) - | b <3 - | - |.macro ffstring_op, name - | .ffunc string_ .. name - | ffgccheck - | cmplwi NARGS8:RC, 8 - | lwz CARG3, 0(BASE) - | lwz STR:CARG2, 4(BASE) - | blt ->fff_fallback - | checkstr CARG3 - | la SBUF:CARG1, DISPATCH_GL(tmpbuf)(DISPATCH) - | bne ->fff_fallback - | lwz TMP0, SBUF:CARG1->b - | stw L, SBUF:CARG1->L - | stp BASE, L->base - | stw PC, SAVE_PC - | stw TMP0, SBUF:CARG1->p - | bl extern lj_buf_putstr_ .. name - | bl extern lj_buf_tostr - | b ->fff_resstr - |.endmacro - | - |ffstring_op reverse - |ffstring_op lower - |ffstring_op upper - | - |//-- Bit library -------------------------------------------------------- - | - |.macro .ffunc_bit, name - |.if DUALNUM - | .ffunc_1 bit_..name - | checknum CARG3; bnel ->fff_tobit_fb - |.else - | .ffunc_n bit_..name - | fadd FARG1, FARG1, TOBIT - | stfd FARG1, TMPD - | lwz CARG1, TMPD_LO - |.endif - |.endmacro - | - |.macro .ffunc_bit_op, name, ins - | .ffunc_bit name - | addi TMP1, BASE, 8 - | add TMP2, BASE, NARGS8:RC - |1: - | lwz CARG4, 0(TMP1) - | cmplw cr1, TMP1, TMP2 - |.if DUALNUM - | lwz CARG2, 4(TMP1) - |.else - | lfd FARG1, 0(TMP1) - |.endif - | bgey cr1, ->fff_resi - | checknum CARG4 - |.if DUALNUM - | bnel ->fff_bitop_fb - |.else - | fadd FARG1, FARG1, TOBIT - | bge ->fff_fallback - | stfd FARG1, TMPD - | lwz CARG2, TMPD_LO - |.endif - | ins CARG1, CARG1, CARG2 - | addi TMP1, TMP1, 8 - | b <1 - |.endmacro - | - |.ffunc_bit_op band, and - |.ffunc_bit_op bor, or - |.ffunc_bit_op bxor, xor - | - |.ffunc_bit bswap - | rotlwi TMP0, CARG1, 8 - | rlwimi TMP0, CARG1, 24, 0, 7 - | rlwimi TMP0, CARG1, 24, 16, 23 - | mr CRET1, TMP0 - | b ->fff_resi - | - |.ffunc_bit bnot - | not CRET1, CARG1 - | b ->fff_resi - | - |.macro .ffunc_bit_sh, name, ins, shmod - |.if DUALNUM - | .ffunc_2 bit_..name - | checknum CARG3; bnel ->fff_tobit_fb - | // Note: no inline conversion from number for 2nd argument! - | checknum CARG4; bne ->fff_fallback - |.else - | .ffunc_nn bit_..name - | fadd FARG1, FARG1, TOBIT - | fadd FARG2, FARG2, TOBIT - | stfd FARG1, TMPD - | lwz CARG1, TMPD_LO - | stfd FARG2, TMPD - | lwz CARG2, TMPD_LO - |.endif - |.if shmod == 1 - | rlwinm CARG2, CARG2, 0, 27, 31 - |.elif shmod == 2 - | neg CARG2, CARG2 - |.endif - | ins CRET1, CARG1, CARG2 - | b ->fff_resi - |.endmacro - | - |.ffunc_bit_sh lshift, slw, 1 - |.ffunc_bit_sh rshift, srw, 1 - |.ffunc_bit_sh arshift, sraw, 1 - |.ffunc_bit_sh rol, rotlw, 0 - |.ffunc_bit_sh ror, rotlw, 2 - | - |.ffunc_bit tobit - |.if DUALNUM - | b ->fff_resi - |.else - |->fff_resi: - | tonum_i FARG1, CRET1 - |.endif - |->fff_resn: - | lwz PC, FRAME_PC(BASE) - | la RA, -8(BASE) - | stfd FARG1, -8(BASE) - | b ->fff_res1 - | - |// Fallback FP number to bit conversion. - |->fff_tobit_fb: - |.if DUALNUM - | lfd FARG1, 0(BASE) - | bgt ->fff_fallback - | fadd FARG1, FARG1, TOBIT - | stfd FARG1, TMPD - | lwz CARG1, TMPD_LO - | blr - |.endif - |->fff_bitop_fb: - |.if DUALNUM - | lfd FARG1, 0(TMP1) - | bgt ->fff_fallback - | fadd FARG1, FARG1, TOBIT - | stfd FARG1, TMPD - | lwz CARG2, TMPD_LO - | blr - |.endif - | - |//----------------------------------------------------------------------- - | - |->fff_fallback: // Call fast function fallback handler. - | // BASE = new base, RB = CFUNC, RC = nargs*8 - | lp TMP3, CFUNC:RB->f - | add TMP1, BASE, NARGS8:RC - | lwz PC, FRAME_PC(BASE) // Fallback may overwrite PC. - | addi TMP0, TMP1, 8*LUA_MINSTACK - | lwz TMP2, L->maxstack - | stw PC, SAVE_PC // Redundant (but a defined value). - | .toc lp TMP3, 0(TMP3) - | cmplw TMP0, TMP2 - | stp BASE, L->base - | stp TMP1, L->top - | mr CARG1, L - | bgt >5 // Need to grow stack. - | mtctr TMP3 - | bctrl // (lua_State *L) - | // Either throws an error, or recovers and returns -1, 0 or nresults+1. - | lp BASE, L->base - | cmpwi CRET1, 0 - | slwi RD, CRET1, 3 - | la RA, -8(BASE) - | bgt ->fff_res // Returned nresults+1? - |1: // Returned 0 or -1: retry fast path. - | lp TMP0, L->top - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | sub NARGS8:RC, TMP0, BASE - | bne ->vm_call_tail // Returned -1? - | ins_callt // Returned 0: retry fast path. - | - |// Reconstruct previous base for vmeta_call during tailcall. - |->vm_call_tail: - | andix. TMP0, PC, FRAME_TYPE - | rlwinm TMP1, PC, 0, 0, 28 - | bne >3 - | lwz INS, -4(PC) - | decode_RA8 TMP1, INS - | addi TMP1, TMP1, 8 - |3: - | sub TMP2, BASE, TMP1 - | b ->vm_call_dispatch // Resolve again for tailcall. - | - |5: // Grow stack for fallback handler. - | li CARG2, LUA_MINSTACK - | bl extern lj_state_growstack // (lua_State *L, int n) - | lp BASE, L->base - | cmpw TMP0, TMP0 // Set 4*cr0+eq to force retry. - | b <1 - | - |->fff_gcstep: // Call GC step function. - | // BASE = new base, RC = nargs*8 - | mflr SAVE0 - | stp BASE, L->base - | add TMP0, BASE, NARGS8:RC - | stw PC, SAVE_PC // Redundant (but a defined value). - | stp TMP0, L->top - | mr CARG1, L - | bl extern lj_gc_step // (lua_State *L) - | lp BASE, L->base - | mtlr SAVE0 - | lp TMP0, L->top - | sub NARGS8:RC, TMP0, BASE - | lwz CFUNC:RB, FRAME_FUNC(BASE) - | blr - | - |//----------------------------------------------------------------------- - |//-- Special dispatch targets ------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_record: // Dispatch target for recording phase. - |.if JIT - | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | andix. TMP0, TMP3, HOOK_VMEVENT // No recording while in vmevent. - | bne >5 - | // Decrement the hookcount for consistency, but always do the call. - | lwz TMP2, DISPATCH_GL(hookcount)(DISPATCH) - | andix. TMP0, TMP3, HOOK_ACTIVE - | bne >1 - | subi TMP2, TMP2, 1 - | andi. TMP0, TMP3, LUA_MASKLINE|LUA_MASKCOUNT - | beqy >1 - | stw TMP2, DISPATCH_GL(hookcount)(DISPATCH) - | b >1 - |.endif - | - |->vm_rethook: // Dispatch target for return hooks. - | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | andix. TMP0, TMP3, HOOK_ACTIVE // Hook already active? - | beq >1 - |5: // Re-dispatch to static ins. - | addi TMP1, TMP1, GG_DISP2STATIC // Assumes decode_OPP TMP1, INS. - | lpx TMP0, DISPATCH, TMP1 - | mtctr TMP0 - | bctr - | - |->vm_inshook: // Dispatch target for instr/line hooks. - | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | lwz TMP2, DISPATCH_GL(hookcount)(DISPATCH) - | andix. TMP0, TMP3, HOOK_ACTIVE // Hook already active? - | rlwinm TMP0, TMP3, 31-LUA_HOOKLINE, 31, 0 - | bne <5 - | - | cmpwi cr1, TMP0, 0 - | addic. TMP2, TMP2, -1 - | beq cr1, <5 - | stw TMP2, DISPATCH_GL(hookcount)(DISPATCH) - | beq >1 - | bge cr1, <5 - |1: - | mr CARG1, L - | stw MULTRES, SAVE_MULTRES - | mr CARG2, PC - | stp BASE, L->base - | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. - | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) - |3: - | lp BASE, L->base - |4: // Re-dispatch to static ins. - | lwz INS, -4(PC) - | decode_OPP TMP1, INS - | decode_RB8 RB, INS - | addi TMP1, TMP1, GG_DISP2STATIC - | decode_RD8 RD, INS - | lpx TMP0, DISPATCH, TMP1 - | decode_RA8 RA, INS - | decode_RC8 RC, INS - | mtctr TMP0 - | bctr - | - |->cont_hook: // Continue from hook yield. - | addi PC, PC, 4 - | lwz MULTRES, -20(RB) // Restore MULTRES for *M ins. - | b <4 - | - |->vm_hotloop: // Hot loop counter underflow. - |.if JIT - | lwz LFUNC:TMP1, FRAME_FUNC(BASE) - | addi CARG1, DISPATCH, GG_DISP2J - | stw PC, SAVE_PC - | lwz TMP1, LFUNC:TMP1->pc - | mr CARG2, PC - | stw L, DISPATCH_J(L)(DISPATCH) - | lbz TMP1, PC2PROTO(framesize)(TMP1) - | stp BASE, L->base - | slwi TMP1, TMP1, 3 - | add TMP1, BASE, TMP1 - | stp TMP1, L->top - | bl extern lj_trace_hot // (jit_State *J, const BCIns *pc) - | b <3 - |.endif - | - |->vm_callhook: // Dispatch target for call hooks. - | mr CARG2, PC - |.if JIT - | b >1 - |.endif - | - |->vm_hotcall: // Hot call counter underflow. - |.if JIT - | ori CARG2, PC, 1 - |1: - |.endif - | add TMP0, BASE, RC - | stw PC, SAVE_PC - | mr CARG1, L - | stp BASE, L->base - | sub RA, RA, BASE - | stp TMP0, L->top - | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc) - | // Returns ASMFunction. - | lp BASE, L->base - | lp TMP0, L->top - | stw ZERO, SAVE_PC // Invalidate for subsequent line hook. - | sub NARGS8:RC, TMP0, BASE - | add RA, BASE, RA - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | lwz INS, -4(PC) - | mtctr CRET1 - | bctr - | - |->cont_stitch: // Trace stitching. - |.if JIT - | // RA = resultptr, RB = meta base - | lwz INS, -4(PC) - | lwz TRACE:TMP2, -20(RB) // Save previous trace. - | addic. TMP1, MULTRES, -8 - | decode_RA8 RC, INS // Call base. - | beq >2 - |1: // Move results down. - | lfd f0, 0(RA) - | addic. TMP1, TMP1, -8 - | addi RA, RA, 8 - | stfdx f0, BASE, RC - | addi RC, RC, 8 - | bne <1 - |2: - | decode_RA8 RA, INS - | decode_RB8 RB, INS - | add RA, RA, RB - |3: - | cmplw RA, RC - | bgt >9 // More results wanted? - | - | lhz TMP3, TRACE:TMP2->traceno - | lhz RD, TRACE:TMP2->link - | cmpw RD, TMP3 - | cmpwi cr1, RD, 0 - | beq ->cont_nop // Blacklisted. - | slwi RD, RD, 3 - | bne cr1, =>BC_JLOOP // Jump to stitched trace. - | - | // Stitch a new trace to the previous trace. - | stw TMP3, DISPATCH_J(exitno)(DISPATCH) - | stp L, DISPATCH_J(L)(DISPATCH) - | stp BASE, L->base - | addi CARG1, DISPATCH, GG_DISP2J - | mr CARG2, PC - | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) - | lp BASE, L->base - | b ->cont_nop - | - |9: - | stwx TISNIL, BASE, RC - | addi RC, RC, 8 - | b <3 - |.endif - | - |->vm_profhook: // Dispatch target for profiler hook. -#if LJ_HASPROFILE - | mr CARG1, L - | stw MULTRES, SAVE_MULTRES - | mr CARG2, PC - | stp BASE, L->base - | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) - | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. - | lp BASE, L->base - | subi PC, PC, 4 - | b ->cont_nop -#endif - | - |//----------------------------------------------------------------------- - |//-- Trace exit handler ------------------------------------------------- - |//----------------------------------------------------------------------- - | - |.macro savex_, a, b, c, d - | stfd f..a, 16+a*8(sp) - | stfd f..b, 16+b*8(sp) - | stfd f..c, 16+c*8(sp) - | stfd f..d, 16+d*8(sp) - |.endmacro - | - |->vm_exit_handler: - |.if JIT - | addi sp, sp, -(16+32*8+32*4) - | stmw r2, 16+32*8+2*4(sp) - | addi DISPATCH, JGL, -GG_DISP2G-32768 - | li CARG2, ~LJ_VMST_EXIT - | lwz CARG1, 16+32*8+32*4(sp) // Get stack chain. - | stw CARG2, DISPATCH_GL(vmstate)(DISPATCH) - | savex_ 0,1,2,3 - | stw CARG1, 0(sp) // Store extended stack chain. - | clrso TMP1 - | savex_ 4,5,6,7 - | addi CARG2, sp, 16+32*8+32*4 // Recompute original value of sp. - | savex_ 8,9,10,11 - | stw CARG2, 16+32*8+1*4(sp) // Store sp in RID_SP. - | savex_ 12,13,14,15 - | mflr CARG3 - | li TMP1, 0 - | savex_ 16,17,18,19 - | stw TMP1, 16+32*8+0*4(sp) // Clear RID_TMP. - | savex_ 20,21,22,23 - | lhz CARG4, 2(CARG3) // Load trace number. - | savex_ 24,25,26,27 - | lwz L, DISPATCH_GL(cur_L)(DISPATCH) - | savex_ 28,29,30,31 - | sub CARG3, TMP0, CARG3 // Compute exit number. - | lp BASE, DISPATCH_GL(jit_base)(DISPATCH) - | srwi CARG3, CARG3, 2 - | stp L, DISPATCH_J(L)(DISPATCH) - | subi CARG3, CARG3, 2 - | stp BASE, L->base - | stw CARG4, DISPATCH_J(parent)(DISPATCH) - | stw TMP1, DISPATCH_GL(jit_base)(DISPATCH) - | addi CARG1, DISPATCH, GG_DISP2J - | stw CARG3, DISPATCH_J(exitno)(DISPATCH) - | addi CARG2, sp, 16 - | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) - | // Returns MULTRES (unscaled) or negated error code. - | lp TMP1, L->cframe - | lwz TMP2, 0(sp) - | lp BASE, L->base - |.if GPR64 - | rldicr sp, TMP1, 0, 61 - |.else - | rlwinm sp, TMP1, 0, 0, 29 - |.endif - | lwz PC, SAVE_PC // Get SAVE_PC. - | stw TMP2, 0(sp) - | stw L, SAVE_L // Set SAVE_L (on-trace resume/yield). - | b >1 - |.endif - |->vm_exit_interp: - |.if JIT - | // CARG1 = MULTRES or negated error code, BASE, PC and JGL set. - | lwz L, SAVE_L - | addi DISPATCH, JGL, -GG_DISP2G-32768 - | stp BASE, L->base - |1: - | cmpwi CARG1, 0 - | blt >9 // Check for error from exit. - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | slwi MULTRES, CARG1, 3 - | li TMP2, 0 - | stw MULTRES, SAVE_MULTRES - | lwz TMP1, LFUNC:RB->pc - | stw TMP2, DISPATCH_GL(jit_base)(DISPATCH) - | lwz KBASE, PC2PROTO(k)(TMP1) - | // Setup type comparison constants. - | li TISNUM, LJ_TISNUM - | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | stw TMP3, TMPD - | li ZERO, 0 - | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). - | lfs TOBIT, TMPD - | stw TMP3, TMPD - | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) - | li TISNIL, LJ_TNIL - | stw TMP0, TONUM_HI - | lfs TONUM, TMPD - | // Modified copy of ins_next which handles function header dispatch, too. - | lwz INS, 0(PC) - | addi PC, PC, 4 - | // Assumes TISNIL == ~LJ_VMST_INTERP == -1. - | stw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) - | decode_OPP TMP1, INS - | decode_RA8 RA, INS - | lpx TMP0, DISPATCH, TMP1 - | mtctr TMP0 - | cmplwi TMP1, BC_FUNCF*4 // Function header? - | bge >2 - | decode_RB8 RB, INS - | decode_RD8 RD, INS - | decode_RC8 RC, INS - | bctr - |2: - | cmplwi TMP1, (BC_FUNCC+2)*4 // Fast function? - | blt >3 - | // Check frame below fast function. - | lwz TMP1, FRAME_PC(BASE) - | andix. TMP0, TMP1, FRAME_TYPE - | bney >3 // Trace stitching continuation? - | // Otherwise set KBASE for Lua function below fast function. - | lwz TMP2, -4(TMP1) - | decode_RA8 TMP0, TMP2 - | sub TMP1, BASE, TMP0 - | lwz LFUNC:TMP2, -12(TMP1) - | lwz TMP1, LFUNC:TMP2->pc - | lwz KBASE, PC2PROTO(k)(TMP1) - |3: - | subi RC, MULTRES, 8 - | add RA, RA, BASE - | bctr - | - |9: // Rethrow error from the right C frame. - | neg CARG2, CARG1 - | mr CARG1, L - | bl extern lj_err_throw // (lua_State *L, int errcode) - |.endif - | - |//----------------------------------------------------------------------- - |//-- Math helper functions ---------------------------------------------- - |//----------------------------------------------------------------------- - | - |// NYI: Use internal implementations of floor, ceil, trunc. - | - |->vm_modi: - | divwo. TMP0, CARG1, CARG2 - | bso >1 - |.if GPR64 - | xor CARG3, CARG1, CARG2 - | cmpwi CARG3, 0 - |.else - | xor. CARG3, CARG1, CARG2 - |.endif - | mullw TMP0, TMP0, CARG2 - | sub CARG1, CARG1, TMP0 - | bgelr - | cmpwi CARG1, 0; beqlr - | add CARG1, CARG1, CARG2 - | blr - |1: - | cmpwi CARG2, 0 - | li CARG1, 0 - | beqlr - | clrso TMP0 // Clear SO for -2147483648 % -1 and return 0. - | blr - | - |//----------------------------------------------------------------------- - |//-- Miscellaneous functions -------------------------------------------- - |//----------------------------------------------------------------------- - | - |// void lj_vm_cachesync(void *start, void *end) - |// Flush D-Cache and invalidate I-Cache. Assumes 32 byte cache line size. - |// This is a good lower bound, except for very ancient PPC models. - |->vm_cachesync: - |.if JIT or FFI - | // Compute start of first cache line and number of cache lines. - | rlwinm CARG1, CARG1, 0, 0, 26 - | sub CARG2, CARG2, CARG1 - | addi CARG2, CARG2, 31 - | rlwinm. CARG2, CARG2, 27, 5, 31 - | beqlr - | mtctr CARG2 - | mr CARG3, CARG1 - |1: // Flush D-Cache. - | dcbst r0, CARG1 - | addi CARG1, CARG1, 32 - | bdnz <1 - | sync - | mtctr CARG2 - |1: // Invalidate I-Cache. - | icbi r0, CARG3 - | addi CARG3, CARG3, 32 - | bdnz <1 - | isync - | blr - |.endif - | - |//----------------------------------------------------------------------- - |//-- FFI helper functions ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |// Handler for callback functions. Callback slot number in r11, g in r12. - |->vm_ffi_callback: - |.if FFI - |.type CTSTATE, CTState, PC - | saveregs - | lwz CTSTATE, GL:r12->ctype_state - | addi DISPATCH, r12, GG_G2DISP - | stw r11, CTSTATE->cb.slot - | stw r3, CTSTATE->cb.gpr[0] - | stfd f1, CTSTATE->cb.fpr[0] - | stw r4, CTSTATE->cb.gpr[1] - | stfd f2, CTSTATE->cb.fpr[1] - | stw r5, CTSTATE->cb.gpr[2] - | stfd f3, CTSTATE->cb.fpr[2] - | stw r6, CTSTATE->cb.gpr[3] - | stfd f4, CTSTATE->cb.fpr[3] - | stw r7, CTSTATE->cb.gpr[4] - | stfd f5, CTSTATE->cb.fpr[4] - | stw r8, CTSTATE->cb.gpr[5] - | stfd f6, CTSTATE->cb.fpr[5] - | stw r9, CTSTATE->cb.gpr[6] - | stfd f7, CTSTATE->cb.fpr[6] - | stw r10, CTSTATE->cb.gpr[7] - | stfd f8, CTSTATE->cb.fpr[7] - | addi TMP0, sp, CFRAME_SPACE+8 - | stw TMP0, CTSTATE->cb.stack - | mr CARG1, CTSTATE - | stw CTSTATE, SAVE_PC // Any value outside of bytecode is ok. - | mr CARG2, sp - | bl extern lj_ccallback_enter // (CTState *cts, void *cf) - | // Returns lua_State *. - | lp BASE, L:CRET1->base - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | lp RC, L:CRET1->top - | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | li ZERO, 0 - | mr L, CRET1 - | stw TMP3, TMPD - | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). - | stw TMP0, TONUM_HI - | li TISNIL, LJ_TNIL - | li_vmstate INTERP - | lfs TOBIT, TMPD - | stw TMP3, TMPD - | sub RC, RC, BASE - | st_vmstate - | lfs TONUM, TMPD - | ins_callt - |.endif - | - |->cont_ffi_callback: // Return from FFI callback. - |.if FFI - | lwz CTSTATE, DISPATCH_GL(ctype_state)(DISPATCH) - | stp BASE, L->base - | stp RB, L->top - | stp L, CTSTATE->L - | mr CARG1, CTSTATE - | mr CARG2, RA - | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) - | lwz CRET1, CTSTATE->cb.gpr[0] - | lfd FARG1, CTSTATE->cb.fpr[0] - | lwz CRET2, CTSTATE->cb.gpr[1] - | b ->vm_leave_unw - |.endif - | - |->vm_ffi_call: // Call C function via FFI. - | // Caveat: needs special frame unwinding, see below. - |.if FFI - | .type CCSTATE, CCallState, CARG1 - | lwz TMP1, CCSTATE->spadj - | mflr TMP0 - | lbz CARG2, CCSTATE->nsp - | lbz CARG3, CCSTATE->nfpr - | neg TMP1, TMP1 - | stw TMP0, 4(sp) - | cmpwi cr1, CARG3, 0 - | mr TMP2, sp - | addic. CARG2, CARG2, -1 - | stwux sp, sp, TMP1 - | crnot 4*cr1+eq, 4*cr1+eq // For vararg calls. - | stw r14, -4(TMP2) - | stw CCSTATE, -8(TMP2) - | mr r14, TMP2 - | la TMP1, CCSTATE->stack - | slwi CARG2, CARG2, 2 - | blty >2 - | la TMP2, 8(sp) - |1: - | lwzx TMP0, TMP1, CARG2 - | stwx TMP0, TMP2, CARG2 - | addic. CARG2, CARG2, -4 - | bge <1 - |2: - | bney cr1, >3 - | lfd f1, CCSTATE->fpr[0] - | lfd f2, CCSTATE->fpr[1] - | lfd f3, CCSTATE->fpr[2] - | lfd f4, CCSTATE->fpr[3] - | lfd f5, CCSTATE->fpr[4] - | lfd f6, CCSTATE->fpr[5] - | lfd f7, CCSTATE->fpr[6] - | lfd f8, CCSTATE->fpr[7] - |3: - | lp TMP0, CCSTATE->func - | lwz CARG2, CCSTATE->gpr[1] - | lwz CARG3, CCSTATE->gpr[2] - | lwz CARG4, CCSTATE->gpr[3] - | lwz CARG5, CCSTATE->gpr[4] - | mtctr TMP0 - | lwz r8, CCSTATE->gpr[5] - | lwz r9, CCSTATE->gpr[6] - | lwz r10, CCSTATE->gpr[7] - | lwz CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. - | bctrl - | lwz CCSTATE:TMP1, -8(r14) - | lwz TMP2, -4(r14) - | lwz TMP0, 4(r14) - | stw CARG1, CCSTATE:TMP1->gpr[0] - | stfd FARG1, CCSTATE:TMP1->fpr[0] - | stw CARG2, CCSTATE:TMP1->gpr[1] - | mtlr TMP0 - | stw CARG3, CCSTATE:TMP1->gpr[2] - | mr sp, r14 - | stw CARG4, CCSTATE:TMP1->gpr[3] - | mr r14, TMP2 - | blr - |.endif - |// Note: vm_ffi_call must be the last function in this object file! - | - |//----------------------------------------------------------------------- -} - -/* Generate the code for a single instruction. */ -static void build_ins(BuildCtx *ctx, BCOp op, int defop) -{ - int vk = 0; - |=>defop: - - switch (op) { - - /* -- Comparison ops ---------------------------------------------------- */ - - /* Remember: all ops branch for a true comparison, fall through otherwise. */ - - case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: - | // RA = src1*8, RD = src2*8, JMP with RD = target - |.if DUALNUM - | lwzux TMP0, RA, BASE - | addi PC, PC, 4 - | lwz CARG2, 4(RA) - | lwzux TMP1, RD, BASE - | lwz TMP2, -4(PC) - | checknum cr0, TMP0 - | lwz CARG3, 4(RD) - | decode_RD4 TMP2, TMP2 - | checknum cr1, TMP1 - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) - | bne cr0, >7 - | bne cr1, >8 - | cmpw CARG2, CARG3 - if (op == BC_ISLT) { - | bge >2 - } else if (op == BC_ISGE) { - | blt >2 - } else if (op == BC_ISLE) { - | bgt >2 - } else { - | ble >2 - } - |1: - | add PC, PC, TMP2 - |2: - | ins_next - | - |7: // RA is not an integer. - | bgt cr0, ->vmeta_comp - | // RA is a number. - | lfd f0, 0(RA) - | bgt cr1, ->vmeta_comp - | blt cr1, >4 - | // RA is a number, RD is an integer. - | tonum_i f1, CARG3 - | b >5 - | - |8: // RA is an integer, RD is not an integer. - | bgt cr1, ->vmeta_comp - | // RA is an integer, RD is a number. - | tonum_i f0, CARG2 - |4: - | lfd f1, 0(RD) - |5: - | fcmpu cr0, f0, f1 - if (op == BC_ISLT) { - | bge <2 - } else if (op == BC_ISGE) { - | blt <2 - } else if (op == BC_ISLE) { - | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq - | bge <2 - } else { - | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq - | blt <2 - } - | b <1 - |.else - | lwzx TMP0, BASE, RA - | addi PC, PC, 4 - | lfdx f0, BASE, RA - | lwzx TMP1, BASE, RD - | checknum cr0, TMP0 - | lwz TMP2, -4(PC) - | lfdx f1, BASE, RD - | checknum cr1, TMP1 - | decode_RD4 TMP2, TMP2 - | bge cr0, ->vmeta_comp - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) - | bge cr1, ->vmeta_comp - | fcmpu cr0, f0, f1 - if (op == BC_ISLT) { - | bge >1 - } else if (op == BC_ISGE) { - | blt >1 - } else if (op == BC_ISLE) { - | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq - | bge >1 - } else { - | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq - | blt >1 - } - | add PC, PC, TMP2 - |1: - | ins_next - |.endif - break; - - case BC_ISEQV: case BC_ISNEV: - vk = op == BC_ISEQV; - | // RA = src1*8, RD = src2*8, JMP with RD = target - |.if DUALNUM - | lwzux TMP0, RA, BASE - | addi PC, PC, 4 - | lwz CARG2, 4(RA) - | lwzux TMP1, RD, BASE - | checknum cr0, TMP0 - | lwz TMP2, -4(PC) - | checknum cr1, TMP1 - | decode_RD4 TMP2, TMP2 - | lwz CARG3, 4(RD) - | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) - if (vk) { - | ble cr7, ->BC_ISEQN_Z - } else { - | ble cr7, ->BC_ISNEN_Z - } - |.else - | lwzux TMP0, RA, BASE - | lwz TMP2, 0(PC) - | lfd f0, 0(RA) - | addi PC, PC, 4 - | lwzux TMP1, RD, BASE - | checknum cr0, TMP0 - | decode_RD4 TMP2, TMP2 - | lfd f1, 0(RD) - | checknum cr1, TMP1 - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) - | bge cr0, >5 - | bge cr1, >5 - | fcmpu cr0, f0, f1 - if (vk) { - | bne >1 - | add PC, PC, TMP2 - } else { - | beq >1 - | add PC, PC, TMP2 - } - |1: - | ins_next - |.endif - |5: // Either or both types are not numbers. - |.if not DUALNUM - | lwz CARG2, 4(RA) - | lwz CARG3, 4(RD) - |.endif - |.if FFI - | cmpwi cr7, TMP0, LJ_TCDATA - | cmpwi cr5, TMP1, LJ_TCDATA - |.endif - | not TMP3, TMP0 - | cmplw TMP0, TMP1 - | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive? - |.if FFI - | cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq - |.endif - | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata? - |.if FFI - | beq cr7, ->vmeta_equal_cd - |.endif - | cmplw cr5, CARG2, CARG3 - | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive. - | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type. - | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv. - | mr SAVE0, PC - | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2. - | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2. - if (vk) { - | bne cr0, >6 - | add PC, PC, TMP2 - |6: - } else { - | beq cr0, >6 - | add PC, PC, TMP2 - |6: - } - |.if DUALNUM - | bge cr0, >2 // Done if 1 or 2. - |1: - | ins_next - |2: - |.else - | blt cr0, <1 // Done if 1 or 2. - |.endif - | blt cr6, <1 // Done if not tab/ud. - | - | // Different tables or userdatas. Need to check __eq metamethod. - | // Field metatable must be at same offset for GCtab and GCudata! - | lwz TAB:TMP2, TAB:CARG2->metatable - | li CARG4, 1-vk // ne = 0 or 1. - | cmplwi TAB:TMP2, 0 - | beq <1 // No metatable? - | lbz TMP2, TAB:TMP2->nomm - | andix. TMP2, TMP2, 1<vmeta_equal // Handle __eq metamethod. - break; - - case BC_ISEQS: case BC_ISNES: - vk = op == BC_ISEQS; - | // RA = src*8, RD = str_const*8 (~), JMP with RD = target - | lwzux TMP0, RA, BASE - | srwi RD, RD, 1 - | lwz STR:TMP3, 4(RA) - | lwz TMP2, 0(PC) - | subfic RD, RD, -4 - | addi PC, PC, 4 - |.if FFI - | cmpwi TMP0, LJ_TCDATA - |.endif - | lwzx STR:TMP1, KBASE, RD // KBASE-4-str_const*4 - | .gpr64 extsw TMP0, TMP0 - | subfic TMP0, TMP0, LJ_TSTR - |.if FFI - | beq ->vmeta_equal_cd - |.endif - | sub TMP1, STR:TMP1, STR:TMP3 - | or TMP0, TMP0, TMP1 - | decode_RD4 TMP2, TMP2 - | subfic TMP0, TMP0, 0 - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) - | subfe TMP1, TMP1, TMP1 - if (vk) { - | andc TMP2, TMP2, TMP1 - } else { - | and TMP2, TMP2, TMP1 - } - | add PC, PC, TMP2 - | ins_next - break; - - case BC_ISEQN: case BC_ISNEN: - vk = op == BC_ISEQN; - | // RA = src*8, RD = num_const*8, JMP with RD = target - |.if DUALNUM - | lwzux TMP0, RA, BASE - | addi PC, PC, 4 - | lwz CARG2, 4(RA) - | lwzux TMP1, RD, KBASE - | checknum cr0, TMP0 - | lwz TMP2, -4(PC) - | checknum cr1, TMP1 - | decode_RD4 TMP2, TMP2 - | lwz CARG3, 4(RD) - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) - if (vk) { - |->BC_ISEQN_Z: - } else { - |->BC_ISNEN_Z: - } - | bne cr0, >7 - | bne cr1, >8 - | cmpw CARG2, CARG3 - |4: - |.else - if (vk) { - |->BC_ISEQN_Z: // Dummy label. - } else { - |->BC_ISNEN_Z: // Dummy label. - } - | lwzx TMP0, BASE, RA - | addi PC, PC, 4 - | lfdx f0, BASE, RA - | lwz TMP2, -4(PC) - | lfdx f1, KBASE, RD - | decode_RD4 TMP2, TMP2 - | checknum TMP0 - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) - | bge >3 - | fcmpu cr0, f0, f1 - |.endif - if (vk) { - | bne >1 - | add PC, PC, TMP2 - |1: - |.if not FFI - |3: - |.endif - } else { - | beq >2 - |1: - |.if not FFI - |3: - |.endif - | add PC, PC, TMP2 - |2: - } - | ins_next - |.if FFI - |3: - | cmpwi TMP0, LJ_TCDATA - | beq ->vmeta_equal_cd - | b <1 - |.endif - |.if DUALNUM - |7: // RA is not an integer. - | bge cr0, <3 - | // RA is a number. - | lfd f0, 0(RA) - | blt cr1, >1 - | // RA is a number, RD is an integer. - | tonum_i f1, CARG3 - | b >2 - | - |8: // RA is an integer, RD is a number. - | tonum_i f0, CARG2 - |1: - | lfd f1, 0(RD) - |2: - | fcmpu cr0, f0, f1 - | b <4 - |.endif - break; - - case BC_ISEQP: case BC_ISNEP: - vk = op == BC_ISEQP; - | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target - | lwzx TMP0, BASE, RA - | srwi TMP1, RD, 3 - | lwz TMP2, 0(PC) - | not TMP1, TMP1 - | addi PC, PC, 4 - |.if FFI - | cmpwi TMP0, LJ_TCDATA - |.endif - | sub TMP0, TMP0, TMP1 - |.if FFI - | beq ->vmeta_equal_cd - |.endif - | decode_RD4 TMP2, TMP2 - | .gpr64 extsw TMP0, TMP0 - | addic TMP0, TMP0, -1 - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) - | subfe TMP1, TMP1, TMP1 - if (vk) { - | and TMP2, TMP2, TMP1 - } else { - | andc TMP2, TMP2, TMP1 - } - | add PC, PC, TMP2 - | ins_next - break; - - /* -- Unary test and copy ops ------------------------------------------- */ - - case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: - | // RA = dst*8 or unused, RD = src*8, JMP with RD = target - | lwzx TMP0, BASE, RD - | lwz INS, 0(PC) - | addi PC, PC, 4 - if (op == BC_IST || op == BC_ISF) { - | .gpr64 extsw TMP0, TMP0 - | subfic TMP0, TMP0, LJ_TTRUE - | decode_RD4 TMP2, INS - | subfe TMP1, TMP1, TMP1 - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) - if (op == BC_IST) { - | andc TMP2, TMP2, TMP1 - } else { - | and TMP2, TMP2, TMP1 - } - | add PC, PC, TMP2 - } else { - | li TMP1, LJ_TFALSE - | lfdx f0, BASE, RD - | cmplw TMP0, TMP1 - if (op == BC_ISTC) { - | bge >1 - } else { - | blt >1 - } - | addis PC, PC, -(BCBIAS_J*4 >> 16) - | decode_RD4 TMP2, INS - | stfdx f0, BASE, RA - | add PC, PC, TMP2 - |1: - } - | ins_next - break; - - case BC_ISTYPE: - | // RA = src*8, RD = -type*8 - | lwzx TMP0, BASE, RA - | srwi TMP1, RD, 3 - | ins_next1 - |.if not PPE and not GPR64 - | add. TMP0, TMP0, TMP1 - |.else - | neg TMP1, TMP1 - | cmpw TMP0, TMP1 - |.endif - | bne ->vmeta_istype - | ins_next2 - break; - case BC_ISNUM: - | // RA = src*8, RD = -(TISNUM-1)*8 - | lwzx TMP0, BASE, RA - | ins_next1 - | checknum TMP0 - | bge ->vmeta_istype - | ins_next2 - break; - - /* -- Unary ops --------------------------------------------------------- */ - - case BC_MOV: - | // RA = dst*8, RD = src*8 - | ins_next1 - | lfdx f0, BASE, RD - | stfdx f0, BASE, RA - | ins_next2 - break; - case BC_NOT: - | // RA = dst*8, RD = src*8 - | ins_next1 - | lwzx TMP0, BASE, RD - | .gpr64 extsw TMP0, TMP0 - | subfic TMP1, TMP0, LJ_TTRUE - | adde TMP0, TMP0, TMP1 - | stwx TMP0, BASE, RA - | ins_next2 - break; - case BC_UNM: - | // RA = dst*8, RD = src*8 - | lwzux TMP1, RD, BASE - | lwz TMP0, 4(RD) - | checknum TMP1 - |.if DUALNUM - | bne >5 - |.if GPR64 - | lus TMP2, 0x8000 - | neg TMP0, TMP0 - | cmplw TMP0, TMP2 - | beq >4 - |.else - | nego. TMP0, TMP0 - | bso >4 - |1: - |.endif - | ins_next1 - | stwux TISNUM, RA, BASE - | stw TMP0, 4(RA) - |3: - | ins_next2 - |4: - |.if not GPR64 - | // Potential overflow. - | checkov TMP1, <1 // Ignore unrelated overflow. - |.endif - | lus TMP1, 0x41e0 // 2^31. - | li TMP0, 0 - | b >7 - |.endif - |5: - | bge ->vmeta_unm - | xoris TMP1, TMP1, 0x8000 - |7: - | ins_next1 - | stwux TMP1, RA, BASE - | stw TMP0, 4(RA) - |.if DUALNUM - | b <3 - |.else - | ins_next2 - |.endif - break; - case BC_LEN: - | // RA = dst*8, RD = src*8 - | lwzux TMP0, RD, BASE - | lwz CARG1, 4(RD) - | checkstr TMP0; bne >2 - | lwz CRET1, STR:CARG1->len - |1: - |.if DUALNUM - | ins_next1 - | stwux TISNUM, RA, BASE - | stw CRET1, 4(RA) - |.else - | tonum_u f0, CRET1 // Result is a non-negative integer. - | ins_next1 - | stfdx f0, BASE, RA - |.endif - | ins_next2 - |2: - | checktab TMP0; bne ->vmeta_len -#if LJ_52 - | lwz TAB:TMP2, TAB:CARG1->metatable - | cmplwi TAB:TMP2, 0 - | bne >9 - |3: -#endif - |->BC_LEN_Z: - | bl extern lj_tab_len // (GCtab *t) - | // Returns uint32_t (but less than 2^31). - | b <1 -#if LJ_52 - |9: - | lbz TMP0, TAB:TMP2->nomm - | andix. TMP0, TMP0, 1<vmeta_len -#endif - break; - - /* -- Binary ops -------------------------------------------------------- */ - - |.macro ins_arithpre - | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 - ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); - ||switch (vk) { - ||case 0: - | lwzx TMP1, BASE, RB - | .if DUALNUM - | lwzx TMP2, KBASE, RC - | .endif - | lfdx f14, BASE, RB - | lfdx f15, KBASE, RC - | .if DUALNUM - | checknum cr0, TMP1 - | checknum cr1, TMP2 - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | bge ->vmeta_arith_vn - | .else - | checknum TMP1; bge ->vmeta_arith_vn - | .endif - || break; - ||case 1: - | lwzx TMP1, BASE, RB - | .if DUALNUM - | lwzx TMP2, KBASE, RC - | .endif - | lfdx f15, BASE, RB - | lfdx f14, KBASE, RC - | .if DUALNUM - | checknum cr0, TMP1 - | checknum cr1, TMP2 - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | bge ->vmeta_arith_nv - | .else - | checknum TMP1; bge ->vmeta_arith_nv - | .endif - || break; - ||default: - | lwzx TMP1, BASE, RB - | lwzx TMP2, BASE, RC - | lfdx f14, BASE, RB - | lfdx f15, BASE, RC - | checknum cr0, TMP1 - | checknum cr1, TMP2 - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | bge ->vmeta_arith_vv - || break; - ||} - |.endmacro - | - |.macro ins_arithfallback, ins - ||switch (vk) { - ||case 0: - | ins ->vmeta_arith_vn2 - || break; - ||case 1: - | ins ->vmeta_arith_nv2 - || break; - ||default: - | ins ->vmeta_arith_vv2 - || break; - ||} - |.endmacro - | - |.macro intmod, a, b, c - | bl ->vm_modi - |.endmacro - | - |.macro fpmod, a, b, c - |->BC_MODVN_Z: - | fdiv FARG1, b, c - | // NYI: Use internal implementation of floor. - | blex floor // floor(b/c) - | fmul a, FARG1, c - | fsub a, b, a // b - floor(b/c)*c - |.endmacro - | - |.macro ins_arithfp, fpins - | ins_arithpre - |.if "fpins" == "fpmod_" - | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. - |.else - | fpins f0, f14, f15 - | ins_next1 - | stfdx f0, BASE, RA - | ins_next2 - |.endif - |.endmacro - | - |.macro ins_arithdn, intins, fpins - | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 - ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); - ||switch (vk) { - ||case 0: - | lwzux TMP1, RB, BASE - | lwzux TMP2, RC, KBASE - | lwz CARG1, 4(RB) - | checknum cr0, TMP1 - | lwz CARG2, 4(RC) - || break; - ||case 1: - | lwzux TMP1, RB, BASE - | lwzux TMP2, RC, KBASE - | lwz CARG2, 4(RB) - | checknum cr0, TMP1 - | lwz CARG1, 4(RC) - || break; - ||default: - | lwzux TMP1, RB, BASE - | lwzux TMP2, RC, BASE - | lwz CARG1, 4(RB) - | checknum cr0, TMP1 - | lwz CARG2, 4(RC) - || break; - ||} - | checknum cr1, TMP2 - | bne >5 - | bne cr1, >5 - | intins CARG1, CARG1, CARG2 - | bso >4 - |1: - | ins_next1 - | stwux TISNUM, RA, BASE - | stw CARG1, 4(RA) - |2: - | ins_next2 - |4: // Overflow. - | checkov TMP0, <1 // Ignore unrelated overflow. - | ins_arithfallback b - |5: // FP variant. - ||if (vk == 1) { - | lfd f15, 0(RB) - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | lfd f14, 0(RC) - ||} else { - | lfd f14, 0(RB) - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | lfd f15, 0(RC) - ||} - | ins_arithfallback bge - |.if "fpins" == "fpmod_" - | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. - |.else - | fpins f0, f14, f15 - | ins_next1 - | stfdx f0, BASE, RA - | b <2 - |.endif - |.endmacro - | - |.macro ins_arith, intins, fpins - |.if DUALNUM - | ins_arithdn intins, fpins - |.else - | ins_arithfp fpins - |.endif - |.endmacro - - case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: - |.if GPR64 - |.macro addo32., y, a, b - | // Need to check overflow for (a<<32) + (b<<32). - | rldicr TMP0, a, 32, 31 - | rldicr TMP3, b, 32, 31 - | addo. TMP0, TMP0, TMP3 - | add y, a, b - |.endmacro - | ins_arith addo32., fadd - |.else - | ins_arith addo., fadd - |.endif - break; - case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: - |.if GPR64 - |.macro subo32., y, a, b - | // Need to check overflow for (a<<32) - (b<<32). - | rldicr TMP0, a, 32, 31 - | rldicr TMP3, b, 32, 31 - | subo. TMP0, TMP0, TMP3 - | sub y, a, b - |.endmacro - | ins_arith subo32., fsub - |.else - | ins_arith subo., fsub - |.endif - break; - case BC_MULVN: case BC_MULNV: case BC_MULVV: - | ins_arith mullwo., fmul - break; - case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: - | ins_arithfp fdiv - break; - case BC_MODVN: - | ins_arith intmod, fpmod - break; - case BC_MODNV: case BC_MODVV: - | ins_arith intmod, fpmod_ - break; - case BC_POW: - | // NYI: (partial) integer arithmetic. - | lwzx TMP1, BASE, RB - | lfdx FARG1, BASE, RB - | lwzx TMP2, BASE, RC - | lfdx FARG2, BASE, RC - | checknum cr0, TMP1 - | checknum cr1, TMP2 - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | bge ->vmeta_arith_vv - | blex pow - | ins_next1 - | stfdx FARG1, BASE, RA - | ins_next2 - break; - - case BC_CAT: - | // RA = dst*8, RB = src_start*8, RC = src_end*8 - | sub CARG3, RC, RB - | stp BASE, L->base - | add CARG2, BASE, RC - | mr SAVE0, RB - |->BC_CAT_Z: - | stw PC, SAVE_PC - | mr CARG1, L - | srwi CARG3, CARG3, 3 - | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left) - | // Returns NULL (finished) or TValue * (metamethod). - | cmplwi CRET1, 0 - | lp BASE, L->base - | bne ->vmeta_binop - | ins_next1 - | lfdx f0, BASE, SAVE0 // Copy result from RB to RA. - | stfdx f0, BASE, RA - | ins_next2 - break; - - /* -- Constant ops ------------------------------------------------------ */ - - case BC_KSTR: - | // RA = dst*8, RD = str_const*8 (~) - | srwi TMP1, RD, 1 - | subfic TMP1, TMP1, -4 - | ins_next1 - | lwzx TMP0, KBASE, TMP1 // KBASE-4-str_const*4 - | li TMP2, LJ_TSTR - | stwux TMP2, RA, BASE - | stw TMP0, 4(RA) - | ins_next2 - break; - case BC_KCDATA: - |.if FFI - | // RA = dst*8, RD = cdata_const*8 (~) - | srwi TMP1, RD, 1 - | subfic TMP1, TMP1, -4 - | ins_next1 - | lwzx TMP0, KBASE, TMP1 // KBASE-4-cdata_const*4 - | li TMP2, LJ_TCDATA - | stwux TMP2, RA, BASE - | stw TMP0, 4(RA) - | ins_next2 - |.endif - break; - case BC_KSHORT: - | // RA = dst*8, RD = int16_literal*8 - |.if DUALNUM - | slwi RD, RD, 13 - | srawi RD, RD, 16 - | ins_next1 - | stwux TISNUM, RA, BASE - | stw RD, 4(RA) - | ins_next2 - |.else - | // The soft-float approach is faster. - | slwi RD, RD, 13 - | srawi TMP1, RD, 31 - | xor TMP2, TMP1, RD - | sub TMP2, TMP2, TMP1 // TMP2 = abs(x) - | cntlzw TMP3, TMP2 - | subfic TMP1, TMP3, 0x40d // TMP1 = exponent-1 - | slw TMP2, TMP2, TMP3 // TMP2 = left aligned mantissa - | subfic TMP3, RD, 0 - | slwi TMP1, TMP1, 20 - | rlwimi RD, TMP2, 21, 1, 31 // hi = sign(x) | (mantissa>>11) - | subfe TMP0, TMP0, TMP0 - | add RD, RD, TMP1 // hi = hi + exponent-1 - | and RD, RD, TMP0 // hi = x == 0 ? 0 : hi - | ins_next1 - | stwux RD, RA, BASE - | stw ZERO, 4(RA) - | ins_next2 - |.endif - break; - case BC_KNUM: - | // RA = dst*8, RD = num_const*8 - | ins_next1 - | lfdx f0, KBASE, RD - | stfdx f0, BASE, RA - | ins_next2 - break; - case BC_KPRI: - | // RA = dst*8, RD = primitive_type*8 (~) - | srwi TMP1, RD, 3 - | not TMP0, TMP1 - | ins_next1 - | stwx TMP0, BASE, RA - | ins_next2 - break; - case BC_KNIL: - | // RA = base*8, RD = end*8 - | stwx TISNIL, BASE, RA - | addi RA, RA, 8 - |1: - | stwx TISNIL, BASE, RA - | cmpw RA, RD - | addi RA, RA, 8 - | blt <1 - | ins_next_ - break; - - /* -- Upvalue and function ops ------------------------------------------ */ - - case BC_UGET: - | // RA = dst*8, RD = uvnum*8 - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | srwi RD, RD, 1 - | addi RD, RD, offsetof(GCfuncL, uvptr) - | lwzx UPVAL:RB, LFUNC:RB, RD - | ins_next1 - | lwz TMP1, UPVAL:RB->v - | lfd f0, 0(TMP1) - | stfdx f0, BASE, RA - | ins_next2 - break; - case BC_USETV: - | // RA = uvnum*8, RD = src*8 - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | srwi RA, RA, 1 - | addi RA, RA, offsetof(GCfuncL, uvptr) - | lfdux f0, RD, BASE - | lwzx UPVAL:RB, LFUNC:RB, RA - | lbz TMP3, UPVAL:RB->marked - | lwz CARG2, UPVAL:RB->v - | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) - | lbz TMP0, UPVAL:RB->closed - | lwz TMP2, 0(RD) - | stfd f0, 0(CARG2) - | cmplwi cr1, TMP0, 0 - | lwz TMP1, 4(RD) - | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq - | subi TMP2, TMP2, (LJ_TNUMX+1) - | bne >2 // Upvalue is closed and black? - |1: - | ins_next - | - |2: // Check if new value is collectable. - | cmplwi TMP2, LJ_TISGCV - (LJ_TNUMX+1) - | bge <1 // tvisgcv(v) - | lbz TMP3, GCOBJ:TMP1->gch.marked - | andix. TMP3, TMP3, LJ_GC_WHITES // iswhite(v) - | la CARG1, GG_DISP2G(DISPATCH) - | // Crossed a write barrier. Move the barrier forward. - | beq <1 - | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) - | b <1 - break; - case BC_USETS: - | // RA = uvnum*8, RD = str_const*8 (~) - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | srwi TMP1, RD, 1 - | srwi RA, RA, 1 - | subfic TMP1, TMP1, -4 - | addi RA, RA, offsetof(GCfuncL, uvptr) - | lwzx STR:TMP1, KBASE, TMP1 // KBASE-4-str_const*4 - | lwzx UPVAL:RB, LFUNC:RB, RA - | lbz TMP3, UPVAL:RB->marked - | lwz CARG2, UPVAL:RB->v - | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) - | lbz TMP3, STR:TMP1->marked - | lbz TMP2, UPVAL:RB->closed - | li TMP0, LJ_TSTR - | stw STR:TMP1, 4(CARG2) - | stw TMP0, 0(CARG2) - | bne >2 - |1: - | ins_next - | - |2: // Check if string is white and ensure upvalue is closed. - | andix. TMP3, TMP3, LJ_GC_WHITES // iswhite(str) - | cmplwi cr1, TMP2, 0 - | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq - | la CARG1, GG_DISP2G(DISPATCH) - | // Crossed a write barrier. Move the barrier forward. - | beq <1 - | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) - | b <1 - break; - case BC_USETN: - | // RA = uvnum*8, RD = num_const*8 - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | srwi RA, RA, 1 - | addi RA, RA, offsetof(GCfuncL, uvptr) - | lfdx f0, KBASE, RD - | lwzx UPVAL:RB, LFUNC:RB, RA - | ins_next1 - | lwz TMP1, UPVAL:RB->v - | stfd f0, 0(TMP1) - | ins_next2 - break; - case BC_USETP: - | // RA = uvnum*8, RD = primitive_type*8 (~) - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | srwi RA, RA, 1 - | srwi TMP0, RD, 3 - | addi RA, RA, offsetof(GCfuncL, uvptr) - | not TMP0, TMP0 - | lwzx UPVAL:RB, LFUNC:RB, RA - | ins_next1 - | lwz TMP1, UPVAL:RB->v - | stw TMP0, 0(TMP1) - | ins_next2 - break; - - case BC_UCLO: - | // RA = level*8, RD = target - | lwz TMP1, L->openupval - | branch_RD // Do this first since RD is not saved. - | stp BASE, L->base - | cmplwi TMP1, 0 - | mr CARG1, L - | beq >1 - | add CARG2, BASE, RA - | bl extern lj_func_closeuv // (lua_State *L, TValue *level) - | lp BASE, L->base - |1: - | ins_next - break; - - case BC_FNEW: - | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype) - | srwi TMP1, RD, 1 - | stp BASE, L->base - | subfic TMP1, TMP1, -4 - | stw PC, SAVE_PC - | lwzx CARG2, KBASE, TMP1 // KBASE-4-tab_const*4 - | mr CARG1, L - | lwz CARG3, FRAME_FUNC(BASE) - | // (lua_State *L, GCproto *pt, GCfuncL *parent) - | bl extern lj_func_newL_gc - | // Returns GCfuncL *. - | lp BASE, L->base - | li TMP0, LJ_TFUNC - | stwux TMP0, RA, BASE - | stw LFUNC:CRET1, 4(RA) - | ins_next - break; - - /* -- Table ops --------------------------------------------------------- */ - - case BC_TNEW: - case BC_TDUP: - | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~) - | lwz TMP0, DISPATCH_GL(gc.total)(DISPATCH) - | mr CARG1, L - | lwz TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) - | stp BASE, L->base - | cmplw TMP0, TMP1 - | stw PC, SAVE_PC - | bge >5 - |1: - if (op == BC_TNEW) { - | rlwinm CARG2, RD, 29, 21, 31 - | rlwinm CARG3, RD, 18, 27, 31 - | cmpwi CARG2, 0x7ff; beq >3 - |2: - | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) - | // Returns Table *. - } else { - | srwi TMP1, RD, 1 - | subfic TMP1, TMP1, -4 - | lwzx CARG2, KBASE, TMP1 // KBASE-4-tab_const*4 - | bl extern lj_tab_dup // (lua_State *L, Table *kt) - | // Returns Table *. - } - | lp BASE, L->base - | li TMP0, LJ_TTAB - | stwux TMP0, RA, BASE - | stw TAB:CRET1, 4(RA) - | ins_next - if (op == BC_TNEW) { - |3: - | li CARG2, 0x801 - | b <2 - } - |5: - | mr SAVE0, RD - | bl extern lj_gc_step_fixtop // (lua_State *L) - | mr RD, SAVE0 - | mr CARG1, L - | b <1 - break; - - case BC_GGET: - | // RA = dst*8, RD = str_const*8 (~) - case BC_GSET: - | // RA = src*8, RD = str_const*8 (~) - | lwz LFUNC:TMP2, FRAME_FUNC(BASE) - | srwi TMP1, RD, 1 - | lwz TAB:RB, LFUNC:TMP2->env - | subfic TMP1, TMP1, -4 - | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4 - if (op == BC_GGET) { - | b ->BC_TGETS_Z - } else { - | b ->BC_TSETS_Z - } - break; - - case BC_TGETV: - | // RA = dst*8, RB = table*8, RC = key*8 - | lwzux CARG1, RB, BASE - | lwzux CARG2, RC, BASE - | lwz TAB:RB, 4(RB) - |.if DUALNUM - | lwz RC, 4(RC) - |.else - | lfd f0, 0(RC) - |.endif - | checktab CARG1 - | checknum cr1, CARG2 - | bne ->vmeta_tgetv - |.if DUALNUM - | lwz TMP0, TAB:RB->asize - | bne cr1, >5 - | lwz TMP1, TAB:RB->array - | cmplw TMP0, RC - | slwi TMP2, RC, 3 - |.else - | bge cr1, >5 - | // Convert number key to integer, check for integerness and range. - | fctiwz f1, f0 - | fadd f2, f0, TOBIT - | stfd f1, TMPD - | lwz TMP0, TAB:RB->asize - | fsub f2, f2, TOBIT - | lwz TMP2, TMPD_LO - | lwz TMP1, TAB:RB->array - | fcmpu cr1, f0, f2 - | cmplw cr0, TMP0, TMP2 - | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+eq - | slwi TMP2, TMP2, 3 - |.endif - | ble ->vmeta_tgetv // Integer key and in array part? - | lwzx TMP0, TMP1, TMP2 - | lfdx f14, TMP1, TMP2 - | checknil TMP0; beq >2 - |1: - | ins_next1 - | stfdx f14, BASE, RA - | ins_next2 - | - |2: // Check for __index if table value is nil. - | lwz TAB:TMP2, TAB:RB->metatable - | cmplwi TAB:TMP2, 0 - | beq <1 // No metatable: done. - | lbz TMP0, TAB:TMP2->nomm - | andix. TMP0, TMP0, 1<vmeta_tgetv - | - |5: - | checkstr CARG2; bne ->vmeta_tgetv - |.if not DUALNUM - | lwz STR:RC, 4(RC) - |.endif - | b ->BC_TGETS_Z // String key? - break; - case BC_TGETS: - | // RA = dst*8, RB = table*8, RC = str_const*8 (~) - | lwzux CARG1, RB, BASE - | srwi TMP1, RC, 1 - | lwz TAB:RB, 4(RB) - | subfic TMP1, TMP1, -4 - | checktab CARG1 - | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4 - | bne ->vmeta_tgets1 - |->BC_TGETS_Z: - | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 - | lwz TMP0, TAB:RB->hmask - | lwz TMP1, STR:RC->hash - | lwz NODE:TMP2, TAB:RB->node - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask - | slwi TMP0, TMP1, 5 - | slwi TMP1, TMP1, 3 - | sub TMP1, TMP0, TMP1 - | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) - |1: - | lwz CARG1, NODE:TMP2->key - | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2) - | lwz CARG2, NODE:TMP2->val - | lwz TMP1, 4+offsetof(Node, val)(NODE:TMP2) - | checkstr CARG1; bne >4 - | cmpw TMP0, STR:RC; bne >4 - | checknil CARG2; beq >5 // Key found, but nil value? - |3: - | stwux CARG2, RA, BASE - | stw TMP1, 4(RA) - | ins_next - | - |4: // Follow hash chain. - | lwz NODE:TMP2, NODE:TMP2->next - | cmplwi NODE:TMP2, 0 - | bne <1 - | // End of hash chain: key not found, nil result. - | li CARG2, LJ_TNIL - | - |5: // Check for __index if table value is nil. - | lwz TAB:TMP2, TAB:RB->metatable - | cmplwi TAB:TMP2, 0 - | beq <3 // No metatable: done. - | lbz TMP0, TAB:TMP2->nomm - | andix. TMP0, TMP0, 1<vmeta_tgets - break; - case BC_TGETB: - | // RA = dst*8, RB = table*8, RC = index*8 - | lwzux CARG1, RB, BASE - | srwi TMP0, RC, 3 - | lwz TAB:RB, 4(RB) - | checktab CARG1; bne ->vmeta_tgetb - | lwz TMP1, TAB:RB->asize - | lwz TMP2, TAB:RB->array - | cmplw TMP0, TMP1; bge ->vmeta_tgetb - | lwzx TMP1, TMP2, RC - | lfdx f0, TMP2, RC - | checknil TMP1; beq >5 - |1: - | ins_next1 - | stfdx f0, BASE, RA - | ins_next2 - | - |5: // Check for __index if table value is nil. - | lwz TAB:TMP2, TAB:RB->metatable - | cmplwi TAB:TMP2, 0 - | beq <1 // No metatable: done. - | lbz TMP2, TAB:TMP2->nomm - | andix. TMP2, TMP2, 1<vmeta_tgetb // Caveat: preserve TMP0! - break; - case BC_TGETR: - | // RA = dst*8, RB = table*8, RC = key*8 - | add RB, BASE, RB - | lwz TAB:CARG1, 4(RB) - |.if DUALNUM - | add RC, BASE, RC - | lwz TMP0, TAB:CARG1->asize - | lwz CARG2, 4(RC) - | lwz TMP1, TAB:CARG1->array - |.else - | lfdx f0, BASE, RC - | lwz TMP0, TAB:CARG1->asize - | toint CARG2, f0 - | lwz TMP1, TAB:CARG1->array - |.endif - | cmplw TMP0, CARG2 - | slwi TMP2, CARG2, 3 - | ble ->vmeta_tgetr // In array part? - | lfdx f14, TMP1, TMP2 - |->BC_TGETR_Z: - | ins_next1 - | stfdx f14, BASE, RA - | ins_next2 - break; - - case BC_TSETV: - | // RA = src*8, RB = table*8, RC = key*8 - | lwzux CARG1, RB, BASE - | lwzux CARG2, RC, BASE - | lwz TAB:RB, 4(RB) - |.if DUALNUM - | lwz RC, 4(RC) - |.else - | lfd f0, 0(RC) - |.endif - | checktab CARG1 - | checknum cr1, CARG2 - | bne ->vmeta_tsetv - |.if DUALNUM - | lwz TMP0, TAB:RB->asize - | bne cr1, >5 - | lwz TMP1, TAB:RB->array - | cmplw TMP0, RC - | slwi TMP0, RC, 3 - |.else - | bge cr1, >5 - | // Convert number key to integer, check for integerness and range. - | fctiwz f1, f0 - | fadd f2, f0, TOBIT - | stfd f1, TMPD - | lwz TMP0, TAB:RB->asize - | fsub f2, f2, TOBIT - | lwz TMP2, TMPD_LO - | lwz TMP1, TAB:RB->array - | fcmpu cr1, f0, f2 - | cmplw cr0, TMP0, TMP2 - | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+eq - | slwi TMP0, TMP2, 3 - |.endif - | ble ->vmeta_tsetv // Integer key and in array part? - | lwzx TMP2, TMP1, TMP0 - | lbz TMP3, TAB:RB->marked - | lfdx f14, BASE, RA - | checknil TMP2; beq >3 - |1: - | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table) - | stfdx f14, TMP1, TMP0 - | bne >7 - |2: - | ins_next - | - |3: // Check for __newindex if previous value is nil. - | lwz TAB:TMP2, TAB:RB->metatable - | cmplwi TAB:TMP2, 0 - | beq <1 // No metatable: done. - | lbz TMP2, TAB:TMP2->nomm - | andix. TMP2, TMP2, 1<vmeta_tsetv - | - |5: - | checkstr CARG2; bne ->vmeta_tsetv - |.if not DUALNUM - | lwz STR:RC, 4(RC) - |.endif - | b ->BC_TSETS_Z // String key? - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, TMP3, TMP0 - | b <2 - break; - case BC_TSETS: - | // RA = src*8, RB = table*8, RC = str_const*8 (~) - | lwzux CARG1, RB, BASE - | srwi TMP1, RC, 1 - | lwz TAB:RB, 4(RB) - | subfic TMP1, TMP1, -4 - | checktab CARG1 - | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4 - | bne ->vmeta_tsets1 - |->BC_TSETS_Z: - | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = src*8 - | lwz TMP0, TAB:RB->hmask - | lwz TMP1, STR:RC->hash - | lwz NODE:TMP2, TAB:RB->node - | stb ZERO, TAB:RB->nomm // Clear metamethod cache. - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask - | lfdx f14, BASE, RA - | slwi TMP0, TMP1, 5 - | slwi TMP1, TMP1, 3 - | sub TMP1, TMP0, TMP1 - | lbz TMP3, TAB:RB->marked - | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) - |1: - | lwz CARG1, NODE:TMP2->key - | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2) - | lwz CARG2, NODE:TMP2->val - | lwz NODE:TMP1, NODE:TMP2->next - | checkstr CARG1; bne >5 - | cmpw TMP0, STR:RC; bne >5 - | checknil CARG2; beq >4 // Key found, but nil value? - |2: - | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) - | stfd f14, NODE:TMP2->val - | bne >7 - |3: - | ins_next - | - |4: // Check for __newindex if previous value is nil. - | lwz TAB:TMP1, TAB:RB->metatable - | cmplwi TAB:TMP1, 0 - | beq <2 // No metatable: done. - | lbz TMP0, TAB:TMP1->nomm - | andix. TMP0, TMP0, 1<vmeta_tsets - | - |5: // Follow hash chain. - | cmplwi NODE:TMP1, 0 - | mr NODE:TMP2, NODE:TMP1 - | bne <1 - | // End of hash chain: key not found, add a new one. - | - | // But check for __newindex first. - | lwz TAB:TMP1, TAB:RB->metatable - | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) - | stw PC, SAVE_PC - | mr CARG1, L - | cmplwi TAB:TMP1, 0 - | stp BASE, L->base - | beq >6 // No metatable: continue. - | lbz TMP0, TAB:TMP1->nomm - | andix. TMP0, TMP0, 1<vmeta_tsets // 'no __newindex' flag NOT set: check. - |6: - | li TMP0, LJ_TSTR - | stw STR:RC, 4(CARG3) - | mr CARG2, TAB:RB - | stw TMP0, 0(CARG3) - | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) - | // Returns TValue *. - | lp BASE, L->base - | stfd f14, 0(CRET1) - | b <3 // No 2nd write barrier needed. - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, TMP3, TMP0 - | b <3 - break; - case BC_TSETB: - | // RA = src*8, RB = table*8, RC = index*8 - | lwzux CARG1, RB, BASE - | srwi TMP0, RC, 3 - | lwz TAB:RB, 4(RB) - | checktab CARG1; bne ->vmeta_tsetb - | lwz TMP1, TAB:RB->asize - | lwz TMP2, TAB:RB->array - | lbz TMP3, TAB:RB->marked - | cmplw TMP0, TMP1 - | lfdx f14, BASE, RA - | bge ->vmeta_tsetb - | lwzx TMP1, TMP2, RC - | checknil TMP1; beq >5 - |1: - | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) - | stfdx f14, TMP2, RC - | bne >7 - |2: - | ins_next - | - |5: // Check for __newindex if previous value is nil. - | lwz TAB:TMP1, TAB:RB->metatable - | cmplwi TAB:TMP1, 0 - | beq <1 // No metatable: done. - | lbz TMP1, TAB:TMP1->nomm - | andix. TMP1, TMP1, 1<vmeta_tsetb // Caveat: preserve TMP0! - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, TMP3, TMP0 - | b <2 - break; - case BC_TSETR: - | // RA = dst*8, RB = table*8, RC = key*8 - | add RB, BASE, RB - | lwz TAB:CARG2, 4(RB) - |.if DUALNUM - | add RC, BASE, RC - | lbz TMP3, TAB:CARG2->marked - | lwz TMP0, TAB:CARG2->asize - | lwz CARG3, 4(RC) - | lwz TMP1, TAB:CARG2->array - |.else - | lfdx f0, BASE, RC - | lbz TMP3, TAB:CARG2->marked - | lwz TMP0, TAB:CARG2->asize - | toint CARG3, f0 - | lwz TMP1, TAB:CARG2->array - |.endif - | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table) - | bne >7 - |2: - | cmplw TMP0, CARG3 - | slwi TMP2, CARG3, 3 - | lfdx f14, BASE, RA - | ble ->vmeta_tsetr // In array part? - | ins_next1 - | stfdx f14, TMP1, TMP2 - | ins_next2 - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP3, TMP2 - | b <2 - break; - - - case BC_TSETM: - | // RA = base*8 (table at base-1), RD = num_const*8 (start index) - | add RA, BASE, RA - |1: - | add TMP3, KBASE, RD - | lwz TAB:CARG2, -4(RA) // Guaranteed to be a table. - | addic. TMP0, MULTRES, -8 - | lwz TMP3, 4(TMP3) // Integer constant is in lo-word. - | srwi CARG3, TMP0, 3 - | beq >4 // Nothing to copy? - | add CARG3, CARG3, TMP3 - | lwz TMP2, TAB:CARG2->asize - | slwi TMP1, TMP3, 3 - | lbz TMP3, TAB:CARG2->marked - | cmplw CARG3, TMP2 - | add TMP2, RA, TMP0 - | lwz TMP0, TAB:CARG2->array - | bgt >5 - | add TMP1, TMP1, TMP0 - | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) - |3: // Copy result slots to table. - | lfd f0, 0(RA) - | addi RA, RA, 8 - | cmpw cr1, RA, TMP2 - | stfd f0, 0(TMP1) - | addi TMP1, TMP1, 8 - | blt cr1, <3 - | bne >7 - |4: - | ins_next - | - |5: // Need to resize array part. - | stp BASE, L->base - | mr CARG1, L - | stw PC, SAVE_PC - | mr SAVE0, RD - | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) - | // Must not reallocate the stack. - | mr RD, SAVE0 - | b <1 - | - |7: // Possible table write barrier for any value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP3, TMP0 - | b <4 - break; - - /* -- Calls and vararg handling ----------------------------------------- */ - - case BC_CALLM: - | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8 - | add NARGS8:RC, NARGS8:RC, MULTRES - | // Fall through. Assumes BC_CALL follows. - break; - case BC_CALL: - | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8 - | mr TMP2, BASE - | lwzux TMP0, BASE, RA - | lwz LFUNC:RB, 4(BASE) - | subi NARGS8:RC, NARGS8:RC, 8 - | addi BASE, BASE, 8 - | checkfunc TMP0; bne ->vmeta_call - | ins_call - break; - - case BC_CALLMT: - | // RA = base*8, (RB = 0,) RC = extra_nargs*8 - | add NARGS8:RC, NARGS8:RC, MULTRES - | // Fall through. Assumes BC_CALLT follows. - break; - case BC_CALLT: - | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 - | lwzux TMP0, RA, BASE - | lwz LFUNC:RB, 4(RA) - | subi NARGS8:RC, NARGS8:RC, 8 - | lwz TMP1, FRAME_PC(BASE) - | checkfunc TMP0 - | addi RA, RA, 8 - | bne ->vmeta_callt - |->BC_CALLT_Z: - | andix. TMP0, TMP1, FRAME_TYPE // Caveat: preserve cr0 until the crand. - | lbz TMP3, LFUNC:RB->ffid - | xori TMP2, TMP1, FRAME_VARG - | cmplwi cr1, NARGS8:RC, 0 - | bne >7 - |1: - | stw LFUNC:RB, FRAME_FUNC(BASE) // Copy function down, but keep PC. - | li TMP2, 0 - | cmplwi cr7, TMP3, 1 // (> FF_C) Calling a fast function? - | beq cr1, >3 - |2: - | addi TMP3, TMP2, 8 - | lfdx f0, RA, TMP2 - | cmplw cr1, TMP3, NARGS8:RC - | stfdx f0, BASE, TMP2 - | mr TMP2, TMP3 - | bne cr1, <2 - |3: - | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+gt - | beq >5 - |4: - | ins_callt - | - |5: // Tailcall to a fast function with a Lua frame below. - | lwz INS, -4(TMP1) - | decode_RA8 RA, INS - | sub TMP1, BASE, RA - | lwz LFUNC:TMP1, FRAME_FUNC-8(TMP1) - | lwz TMP1, LFUNC:TMP1->pc - | lwz KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE. - | b <4 - | - |7: // Tailcall from a vararg function. - | andix. TMP0, TMP2, FRAME_TYPEP - | bne <1 // Vararg frame below? - | sub BASE, BASE, TMP2 // Relocate BASE down. - | lwz TMP1, FRAME_PC(BASE) - | andix. TMP0, TMP1, FRAME_TYPE - | b <1 - break; - - case BC_ITERC: - | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8)) - | mr TMP2, BASE - | add BASE, BASE, RA - | lwz TMP1, -24(BASE) - | lwz LFUNC:RB, -20(BASE) - | lfd f1, -8(BASE) - | lfd f0, -16(BASE) - | stw TMP1, 0(BASE) // Copy callable. - | stw LFUNC:RB, 4(BASE) - | checkfunc TMP1 - | stfd f1, 16(BASE) // Copy control var. - | li NARGS8:RC, 16 // Iterators get 2 arguments. - | stfdu f0, 8(BASE) // Copy state. - | bne ->vmeta_call - | ins_call - break; - - case BC_ITERN: - | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) - |.if JIT - | // NYI: add hotloop, record BC_ITERN. - |.endif - | add RA, BASE, RA - | lwz TAB:RB, -12(RA) - | lwz RC, -4(RA) // Get index from control var. - | lwz TMP0, TAB:RB->asize - | lwz TMP1, TAB:RB->array - | addi PC, PC, 4 - |1: // Traverse array part. - | cmplw RC, TMP0 - | slwi TMP3, RC, 3 - | bge >5 // Index points after array part? - | lwzx TMP2, TMP1, TMP3 - | lfdx f0, TMP1, TMP3 - | checknil TMP2 - | lwz INS, -4(PC) - | beq >4 - |.if DUALNUM - | stw RC, 4(RA) - | stw TISNUM, 0(RA) - |.else - | tonum_u f1, RC - |.endif - | addi RC, RC, 1 - | addis TMP3, PC, -(BCBIAS_J*4 >> 16) - | stfd f0, 8(RA) - | decode_RD4 TMP1, INS - | stw RC, -4(RA) // Update control var. - | add PC, TMP1, TMP3 - |.if not DUALNUM - | stfd f1, 0(RA) - |.endif - |3: - | ins_next - | - |4: // Skip holes in array part. - | addi RC, RC, 1 - | b <1 - | - |5: // Traverse hash part. - | lwz TMP1, TAB:RB->hmask - | sub RC, RC, TMP0 - | lwz TMP2, TAB:RB->node - |6: - | cmplw RC, TMP1 // End of iteration? Branch to ITERL+1. - | slwi TMP3, RC, 5 - | bgty <3 - | slwi RB, RC, 3 - | sub TMP3, TMP3, RB - | lwzx RB, TMP2, TMP3 - | lfdx f0, TMP2, TMP3 - | add NODE:TMP3, TMP2, TMP3 - | checknil RB - | lwz INS, -4(PC) - | beq >7 - | lfd f1, NODE:TMP3->key - | addis TMP2, PC, -(BCBIAS_J*4 >> 16) - | stfd f0, 8(RA) - | add RC, RC, TMP0 - | decode_RD4 TMP1, INS - | stfd f1, 0(RA) - | addi RC, RC, 1 - | add PC, TMP1, TMP2 - | stw RC, -4(RA) // Update control var. - | b <3 - | - |7: // Skip holes in hash part. - | addi RC, RC, 1 - | b <6 - break; - - case BC_ISNEXT: - | // RA = base*8, RD = target (points to ITERN) - | add RA, BASE, RA - | lwz TMP0, -24(RA) - | lwz CFUNC:TMP1, -20(RA) - | lwz TMP2, -16(RA) - | lwz TMP3, -8(RA) - | cmpwi cr0, TMP2, LJ_TTAB - | cmpwi cr1, TMP0, LJ_TFUNC - | cmpwi cr6, TMP3, LJ_TNIL - | bne cr1, >5 - | lbz TMP1, CFUNC:TMP1->ffid - | crand 4*cr0+eq, 4*cr0+eq, 4*cr6+eq - | cmpwi cr7, TMP1, FF_next_N - | srwi TMP0, RD, 1 - | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq - | add TMP3, PC, TMP0 - | bne cr0, >5 - | lus TMP1, 0xfffe - | ori TMP1, TMP1, 0x7fff - | stw ZERO, -4(RA) // Initialize control var. - | stw TMP1, -8(RA) - | addis PC, TMP3, -(BCBIAS_J*4 >> 16) - |1: - | ins_next - |5: // Despecialize bytecode if any of the checks fail. - | li TMP0, BC_JMP - | li TMP1, BC_ITERC - | stb TMP0, -1(PC) - | addis PC, TMP3, -(BCBIAS_J*4 >> 16) - | stb TMP1, 3(PC) - | b <1 - break; - - case BC_VARG: - | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 - | lwz TMP0, FRAME_PC(BASE) - | add RC, BASE, RC - | add RA, BASE, RA - | addi RC, RC, FRAME_VARG - | add TMP2, RA, RB - | subi TMP3, BASE, 8 // TMP3 = vtop - | sub RC, RC, TMP0 // RC = vbase - | // Note: RC may now be even _above_ BASE if nargs was < numparams. - | cmplwi cr1, RB, 0 - |.if PPE - | sub TMP1, TMP3, RC - | cmpwi TMP1, 0 - |.else - | sub. TMP1, TMP3, RC - |.endif - | beq cr1, >5 // Copy all varargs? - | subi TMP2, TMP2, 16 - | ble >2 // No vararg slots? - |1: // Copy vararg slots to destination slots. - | lfd f0, 0(RC) - | addi RC, RC, 8 - | stfd f0, 0(RA) - | cmplw RA, TMP2 - | cmplw cr1, RC, TMP3 - | bge >3 // All destination slots filled? - | addi RA, RA, 8 - | blt cr1, <1 // More vararg slots? - |2: // Fill up remainder with nil. - | stw TISNIL, 0(RA) - | cmplw RA, TMP2 - | addi RA, RA, 8 - | blt <2 - |3: - | ins_next - | - |5: // Copy all varargs. - | lwz TMP0, L->maxstack - | li MULTRES, 8 // MULTRES = (0+1)*8 - | bley <3 // No vararg slots? - | add TMP2, RA, TMP1 - | cmplw TMP2, TMP0 - | addi MULTRES, TMP1, 8 - | bgt >7 - |6: - | lfd f0, 0(RC) - | addi RC, RC, 8 - | stfd f0, 0(RA) - | cmplw RC, TMP3 - | addi RA, RA, 8 - | blt <6 // More vararg slots? - | b <3 - | - |7: // Grow stack for varargs. - | mr CARG1, L - | stp RA, L->top - | sub SAVE0, RC, BASE // Need delta, because BASE may change. - | stp BASE, L->base - | sub RA, RA, BASE - | stw PC, SAVE_PC - | srwi CARG2, TMP1, 3 - | bl extern lj_state_growstack // (lua_State *L, int n) - | lp BASE, L->base - | add RA, BASE, RA - | add RC, BASE, SAVE0 - | subi TMP3, BASE, 8 - | b <6 - break; - - /* -- Returns ----------------------------------------------------------- */ - - case BC_RETM: - | // RA = results*8, RD = extra_nresults*8 - | add RD, RD, MULTRES // MULTRES >= 8, so RD >= 8. - | // Fall through. Assumes BC_RET follows. - break; - - case BC_RET: - | // RA = results*8, RD = (nresults+1)*8 - | lwz PC, FRAME_PC(BASE) - | add RA, BASE, RA - | mr MULTRES, RD - |1: - | andix. TMP0, PC, FRAME_TYPE - | xori TMP1, PC, FRAME_VARG - | bne ->BC_RETV_Z - | - |->BC_RET_Z: - | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return - | lwz INS, -4(PC) - | cmpwi RD, 8 - | subi TMP2, BASE, 8 - | subi RC, RD, 8 - | decode_RB8 RB, INS - | beq >3 - | li TMP1, 0 - |2: - | addi TMP3, TMP1, 8 - | lfdx f0, RA, TMP1 - | cmpw TMP3, RC - | stfdx f0, TMP2, TMP1 - | beq >3 - | addi TMP1, TMP3, 8 - | lfdx f1, RA, TMP3 - | cmpw TMP1, RC - | stfdx f1, TMP2, TMP3 - | bne <2 - |3: - |5: - | cmplw RB, RD - | decode_RA8 RA, INS - | bgt >6 - | sub BASE, TMP2, RA - | lwz LFUNC:TMP1, FRAME_FUNC(BASE) - | ins_next1 - | lwz TMP1, LFUNC:TMP1->pc - | lwz KBASE, PC2PROTO(k)(TMP1) - | ins_next2 - | - |6: // Fill up results with nil. - | subi TMP1, RD, 8 - | addi RD, RD, 8 - | stwx TISNIL, TMP2, TMP1 - | b <5 - | - |->BC_RETV_Z: // Non-standard return case. - | andix. TMP2, TMP1, FRAME_TYPEP - | bne ->vm_return - | // Return from vararg function: relocate BASE down. - | sub BASE, BASE, TMP1 - | lwz PC, FRAME_PC(BASE) - | b <1 - break; - - case BC_RET0: case BC_RET1: - | // RA = results*8, RD = (nresults+1)*8 - | lwz PC, FRAME_PC(BASE) - | add RA, BASE, RA - | mr MULTRES, RD - | andix. TMP0, PC, FRAME_TYPE - | xori TMP1, PC, FRAME_VARG - | bney ->BC_RETV_Z - | - | lwz INS, -4(PC) - | subi TMP2, BASE, 8 - | decode_RB8 RB, INS - if (op == BC_RET1) { - | lfd f0, 0(RA) - | stfd f0, 0(TMP2) - } - |5: - | cmplw RB, RD - | decode_RA8 RA, INS - | bgt >6 - | sub BASE, TMP2, RA - | lwz LFUNC:TMP1, FRAME_FUNC(BASE) - | ins_next1 - | lwz TMP1, LFUNC:TMP1->pc - | lwz KBASE, PC2PROTO(k)(TMP1) - | ins_next2 - | - |6: // Fill up results with nil. - | subi TMP1, RD, 8 - | addi RD, RD, 8 - | stwx TISNIL, TMP2, TMP1 - | b <5 - break; - - /* -- Loops and branches ------------------------------------------------ */ - - case BC_FORL: - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_IFORL follows. - break; - - case BC_JFORI: - case BC_JFORL: -#if !LJ_HASJIT - break; -#endif - case BC_FORI: - case BC_IFORL: - | // RA = base*8, RD = target (after end of loop or start of loop) - vk = (op == BC_IFORL || op == BC_JFORL); - |.if DUALNUM - | // Integer loop. - | lwzux TMP1, RA, BASE - | lwz CARG1, FORL_IDX*8+4(RA) - | cmplw cr0, TMP1, TISNUM - if (vk) { - | lwz CARG3, FORL_STEP*8+4(RA) - | bne >9 - |.if GPR64 - | // Need to check overflow for (a<<32) + (b<<32). - | rldicr TMP0, CARG1, 32, 31 - | rldicr TMP2, CARG3, 32, 31 - | add CARG1, CARG1, CARG3 - | addo. TMP0, TMP0, TMP2 - |.else - | addo. CARG1, CARG1, CARG3 - |.endif - | cmpwi cr6, CARG3, 0 - | lwz CARG2, FORL_STOP*8+4(RA) - | bso >6 - |4: - | stw CARG1, FORL_IDX*8+4(RA) - } else { - | lwz TMP3, FORL_STEP*8(RA) - | lwz CARG3, FORL_STEP*8+4(RA) - | lwz TMP2, FORL_STOP*8(RA) - | lwz CARG2, FORL_STOP*8+4(RA) - | cmplw cr7, TMP3, TISNUM - | cmplw cr1, TMP2, TISNUM - | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq - | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq - | cmpwi cr6, CARG3, 0 - | bne >9 - } - | blt cr6, >5 - | cmpw CARG1, CARG2 - |1: - | stw TISNUM, FORL_EXT*8(RA) - if (op != BC_JFORL) { - | srwi RD, RD, 1 - } - | stw CARG1, FORL_EXT*8+4(RA) - if (op != BC_JFORL) { - | add RD, PC, RD - } - if (op == BC_FORI) { - | bgt >3 // See FP loop below. - } else if (op == BC_JFORI) { - | addis PC, RD, -(BCBIAS_J*4 >> 16) - | bley >7 - } else if (op == BC_IFORL) { - | bgt >2 - | addis PC, RD, -(BCBIAS_J*4 >> 16) - } else { - | bley =>BC_JLOOP - } - |2: - | ins_next - |5: // Invert check for negative step. - | cmpw CARG2, CARG1 - | b <1 - if (vk) { - |6: // Potential overflow. - | checkov TMP0, <4 // Ignore unrelated overflow. - | b <2 - } - |.endif - if (vk) { - |.if DUALNUM - |9: // FP loop. - | lfd f1, FORL_IDX*8(RA) - |.else - | lfdux f1, RA, BASE - |.endif - | lfd f3, FORL_STEP*8(RA) - | lfd f2, FORL_STOP*8(RA) - | lwz TMP3, FORL_STEP*8(RA) - | fadd f1, f1, f3 - | stfd f1, FORL_IDX*8(RA) - } else { - |.if DUALNUM - |9: // FP loop. - |.else - | lwzux TMP1, RA, BASE - | lwz TMP3, FORL_STEP*8(RA) - | lwz TMP2, FORL_STOP*8(RA) - | cmplw cr0, TMP1, TISNUM - | cmplw cr7, TMP3, TISNUM - | cmplw cr1, TMP2, TISNUM - |.endif - | lfd f1, FORL_IDX*8(RA) - | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | lfd f2, FORL_STOP*8(RA) - | bge ->vmeta_for - } - | cmpwi cr6, TMP3, 0 - if (op != BC_JFORL) { - | srwi RD, RD, 1 - } - | stfd f1, FORL_EXT*8(RA) - if (op != BC_JFORL) { - | add RD, PC, RD - } - | fcmpu cr0, f1, f2 - if (op == BC_JFORI) { - | addis PC, RD, -(BCBIAS_J*4 >> 16) - } - | blt cr6, >5 - if (op == BC_FORI) { - | bgt >3 - } else if (op == BC_IFORL) { - |.if DUALNUM - | bgty <2 - |.else - | bgt >2 - |.endif - |1: - | addis PC, RD, -(BCBIAS_J*4 >> 16) - } else if (op == BC_JFORI) { - | bley >7 - } else { - | bley =>BC_JLOOP - } - |.if DUALNUM - | b <2 - |.else - |2: - | ins_next - |.endif - |5: // Negative step. - if (op == BC_FORI) { - | bge <2 - |3: // Used by integer loop, too. - | addis PC, RD, -(BCBIAS_J*4 >> 16) - } else if (op == BC_IFORL) { - | bgey <1 - } else if (op == BC_JFORI) { - | bgey >7 - } else { - | bgey =>BC_JLOOP - } - | b <2 - if (op == BC_JFORI) { - |7: - | lwz INS, -4(PC) - | decode_RD8 RD, INS - | b =>BC_JLOOP - } - break; - - case BC_ITERL: - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_IITERL follows. - break; - - case BC_JITERL: -#if !LJ_HASJIT - break; -#endif - case BC_IITERL: - | // RA = base*8, RD = target - | lwzux TMP1, RA, BASE - | lwz TMP2, 4(RA) - | checknil TMP1; beq >1 // Stop if iterator returned nil. - if (op == BC_JITERL) { - | stw TMP1, -8(RA) - | stw TMP2, -4(RA) - | b =>BC_JLOOP - } else { - | branch_RD // Otherwise save control var + branch. - | stw TMP1, -8(RA) - | stw TMP2, -4(RA) - } - |1: - | ins_next - break; - - case BC_LOOP: - | // RA = base*8, RD = target (loop extent) - | // Note: RA/RD is only used by trace recorder to determine scope/extent - | // This opcode does NOT jump, it's only purpose is to detect a hot loop. - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_ILOOP follows. - break; - - case BC_ILOOP: - | // RA = base*8, RD = target (loop extent) - | ins_next - break; - - case BC_JLOOP: - |.if JIT - | // RA = base*8 (ignored), RD = traceno*8 - | lwz TMP1, DISPATCH_J(trace)(DISPATCH) - | srwi RD, RD, 1 - | // Traces on PPC don't store the trace number, so use 0. - | stw ZERO, DISPATCH_GL(vmstate)(DISPATCH) - | lwzx TRACE:TMP2, TMP1, RD - | clrso TMP1 - | lp TMP2, TRACE:TMP2->mcode - | stw BASE, DISPATCH_GL(jit_base)(DISPATCH) - | mtctr TMP2 - | addi JGL, DISPATCH, GG_DISP2G+32768 - | stw L, DISPATCH_GL(tmpbuf.L)(DISPATCH) - | bctr - |.endif - break; - - case BC_JMP: - | // RA = base*8 (only used by trace recorder), RD = target - | branch_RD - | ins_next - break; - - /* -- Function headers -------------------------------------------------- */ - - case BC_FUNCF: - |.if JIT - | hotcall - |.endif - case BC_FUNCV: /* NYI: compiled vararg functions. */ - | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. - break; - - case BC_JFUNCF: -#if !LJ_HASJIT - break; -#endif - case BC_IFUNCF: - | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 - | lwz TMP2, L->maxstack - | lbz TMP1, -4+PC2PROTO(numparams)(PC) - | lwz KBASE, -4+PC2PROTO(k)(PC) - | cmplw RA, TMP2 - | slwi TMP1, TMP1, 3 - | bgt ->vm_growstack_l - if (op != BC_JFUNCF) { - | ins_next1 - } - |2: - | cmplw NARGS8:RC, TMP1 // Check for missing parameters. - | blt >3 - if (op == BC_JFUNCF) { - | decode_RD8 RD, INS - | b =>BC_JLOOP - } else { - | ins_next2 - } - | - |3: // Clear missing parameters. - | stwx TISNIL, BASE, NARGS8:RC - | addi NARGS8:RC, NARGS8:RC, 8 - | b <2 - break; - - case BC_JFUNCV: -#if !LJ_HASJIT - break; -#endif - | NYI // NYI: compiled vararg functions - break; /* NYI: compiled vararg functions. */ - - case BC_IFUNCV: - | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 - | lwz TMP2, L->maxstack - | add TMP1, BASE, RC - | add TMP0, RA, RC - | stw LFUNC:RB, 4(TMP1) // Store copy of LFUNC. - | addi TMP3, RC, 8+FRAME_VARG - | lwz KBASE, -4+PC2PROTO(k)(PC) - | cmplw TMP0, TMP2 - | stw TMP3, 0(TMP1) // Store delta + FRAME_VARG. - | bge ->vm_growstack_l - | lbz TMP2, -4+PC2PROTO(numparams)(PC) - | mr RA, BASE - | mr RC, TMP1 - | ins_next1 - | cmpwi TMP2, 0 - | addi BASE, TMP1, 8 - | beq >3 - |1: - | cmplw RA, RC // Less args than parameters? - | lwz TMP0, 0(RA) - | lwz TMP3, 4(RA) - | bge >4 - | stw TISNIL, 0(RA) // Clear old fixarg slot (help the GC). - | addi RA, RA, 8 - |2: - | addic. TMP2, TMP2, -1 - | stw TMP0, 8(TMP1) - | stw TMP3, 12(TMP1) - | addi TMP1, TMP1, 8 - | bne <1 - |3: - | ins_next2 - | - |4: // Clear missing parameters. - | li TMP0, LJ_TNIL - | b <2 - break; - - case BC_FUNCC: - case BC_FUNCCW: - | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8 - if (op == BC_FUNCC) { - | lp RD, CFUNC:RB->f - } else { - | lp RD, DISPATCH_GL(wrapf)(DISPATCH) - } - | add TMP1, RA, NARGS8:RC - | lwz TMP2, L->maxstack - | .toc lp TMP3, 0(RD) - | add RC, BASE, NARGS8:RC - | stp BASE, L->base - | cmplw TMP1, TMP2 - | stp RC, L->top - | li_vmstate C - |.if TOC - | mtctr TMP3 - |.else - | mtctr RD - |.endif - if (op == BC_FUNCCW) { - | lp CARG2, CFUNC:RB->f - } - | mr CARG1, L - | bgt ->vm_growstack_c // Need to grow stack. - | .toc lp TOCREG, TOC_OFS(RD) - | .tocenv lp ENVREG, ENV_OFS(RD) - | st_vmstate - | bctrl // (lua_State *L [, lua_CFunction f]) - | // Returns nresults. - | lp BASE, L->base - | .toc ld TOCREG, SAVE_TOC - | slwi RD, CRET1, 3 - | lp TMP1, L->top - | li_vmstate INTERP - | lwz PC, FRAME_PC(BASE) // Fetch PC of caller. - | stw L, DISPATCH_GL(cur_L)(DISPATCH) - | sub RA, TMP1, RD // RA = L->top - nresults*8 - | st_vmstate - | b ->vm_returnc - break; - - /* ---------------------------------------------------------------------- */ - - default: - fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); - exit(2); - break; - } -} - -static int build_backend(BuildCtx *ctx) -{ - int op; - - dasm_growpc(Dst, BC__MAX); - - build_subroutines(ctx); - - |.code_op - for (op = 0; op < BC__MAX; op++) - build_ins(ctx, (BCOp)op, op); - - return BC__MAX; -} - -/* Emit pseudo frame-info for all assembler functions. */ -static void emit_asm_debug(BuildCtx *ctx) -{ - int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); - int i; - switch (ctx->mode) { - case BUILD_elfasm: - fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); - fprintf(ctx->fp, - ".Lframe0:\n" - "\t.long .LECIE0-.LSCIE0\n" - ".LSCIE0:\n" - "\t.long 0xffffffff\n" - "\t.byte 0x1\n" - "\t.string \"\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -4\n" - "\t.byte 65\n" - "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n" - "\t.align 2\n" - ".LECIE0:\n\n"); - fprintf(ctx->fp, - ".LSFDE0:\n" - "\t.long .LEFDE0-.LASFDE0\n" - ".LASFDE0:\n" - "\t.long .Lframe0\n" - "\t.long .Lbegin\n" - "\t.long %d\n" - "\t.byte 0xe\n\t.uleb128 %d\n" - "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" - "\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n", - fcofs, CFRAME_SIZE); - for (i = 14; i <= 31; i++) - fprintf(ctx->fp, - "\t.byte %d\n\t.uleb128 %d\n" - "\t.byte %d\n\t.uleb128 %d\n", - 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i)); - fprintf(ctx->fp, - "\t.align 2\n" - ".LEFDE0:\n\n"); -#if LJ_HASFFI - fprintf(ctx->fp, - ".LSFDE1:\n" - "\t.long .LEFDE1-.LASFDE1\n" - ".LASFDE1:\n" - "\t.long .Lframe0\n" -#if LJ_TARGET_PS3 - "\t.long .lj_vm_ffi_call\n" -#else - "\t.long lj_vm_ffi_call\n" -#endif - "\t.long %d\n" - "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" - "\t.byte 0x8e\n\t.uleb128 2\n" - "\t.byte 0xd\n\t.uleb128 0xe\n" - "\t.align 2\n" - ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); -#endif -#if !LJ_NO_UNWIND - fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); - fprintf(ctx->fp, - ".Lframe1:\n" - "\t.long .LECIE1-.LSCIE1\n" - ".LSCIE1:\n" - "\t.long 0\n" - "\t.byte 0x1\n" - "\t.string \"zPR\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -4\n" - "\t.byte 65\n" - "\t.uleb128 6\n" /* augmentation length */ - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.long lj_err_unwind_dwarf-.\n" - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n" - "\t.align 2\n" - ".LECIE1:\n\n"); - fprintf(ctx->fp, - ".LSFDE2:\n" - "\t.long .LEFDE2-.LASFDE2\n" - ".LASFDE2:\n" - "\t.long .LASFDE2-.Lframe1\n" - "\t.long .Lbegin-.\n" - "\t.long %d\n" - "\t.uleb128 0\n" /* augmentation length */ - "\t.byte 0xe\n\t.uleb128 %d\n" - "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" - "\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n", - fcofs, CFRAME_SIZE); - for (i = 14; i <= 31; i++) - fprintf(ctx->fp, - "\t.byte %d\n\t.uleb128 %d\n" - "\t.byte %d\n\t.uleb128 %d\n", - 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i)); - fprintf(ctx->fp, - "\t.align 2\n" - ".LEFDE2:\n\n"); -#if LJ_HASFFI - fprintf(ctx->fp, - ".Lframe2:\n" - "\t.long .LECIE2-.LSCIE2\n" - ".LSCIE2:\n" - "\t.long 0\n" - "\t.byte 0x1\n" - "\t.string \"zR\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -4\n" - "\t.byte 65\n" - "\t.uleb128 1\n" /* augmentation length */ - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n" - "\t.align 2\n" - ".LECIE2:\n\n"); - fprintf(ctx->fp, - ".LSFDE3:\n" - "\t.long .LEFDE3-.LASFDE3\n" - ".LASFDE3:\n" - "\t.long .LASFDE3-.Lframe2\n" - "\t.long lj_vm_ffi_call-.\n" - "\t.long %d\n" - "\t.uleb128 0\n" /* augmentation length */ - "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" - "\t.byte 0x8e\n\t.uleb128 2\n" - "\t.byte 0xd\n\t.uleb128 0xe\n" - "\t.align 2\n" - ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); -#endif -#endif - break; - default: - break; - } -} - diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc deleted file mode 100644 index 211ae7b922..0000000000 --- a/src/vm_x86.dasc +++ /dev/null @@ -1,5780 +0,0 @@ -|// Low-level VM code for x86 CPUs. -|// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -| -|.if P64 -|.arch x64 -|.else -|.arch x86 -|.endif -|.section code_op, code_sub -| -|.actionlist build_actionlist -|.globals GLOB_ -|.globalnames globnames -|.externnames extnames -| -|//----------------------------------------------------------------------- -| -|.if P64 -|.define X64, 1 -|.if WIN -|.define X64WIN, 1 -|.endif -|.endif -| -|// Fixed register assignments for the interpreter. -|// This is very fragile and has many dependencies. Caveat emptor. -|.define BASE, edx // Not C callee-save, refetched anyway. -|.if not X64 -|.define KBASE, edi // Must be C callee-save. -|.define KBASEa, KBASE -|.define PC, esi // Must be C callee-save. -|.define PCa, PC -|.define DISPATCH, ebx // Must be C callee-save. -|.elif X64WIN -|.define KBASE, edi // Must be C callee-save. -|.define KBASEa, rdi -|.define PC, esi // Must be C callee-save. -|.define PCa, rsi -|.define DISPATCH, ebx // Must be C callee-save. -|.else -|.define KBASE, r15d // Must be C callee-save. -|.define KBASEa, r15 -|.define PC, ebx // Must be C callee-save. -|.define PCa, rbx -|.define DISPATCH, r14d // Must be C callee-save. -|.endif -| -|.define RA, ecx -|.define RAH, ch -|.define RAL, cl -|.define RB, ebp // Must be ebp (C callee-save). -|.define RC, eax // Must be eax. -|.define RCW, ax -|.define RCH, ah -|.define RCL, al -|.define OP, RB -|.define RD, RC -|.define RDW, RCW -|.define RDL, RCL -|.if X64 -|.define RAa, rcx -|.define RBa, rbp -|.define RCa, rax -|.define RDa, rax -|.else -|.define RAa, RA -|.define RBa, RB -|.define RCa, RC -|.define RDa, RD -|.endif -| -|.if not X64 -|.define FCARG1, ecx // x86 fastcall arguments. -|.define FCARG2, edx -|.elif X64WIN -|.define CARG1, rcx // x64/WIN64 C call arguments. -|.define CARG2, rdx -|.define CARG3, r8 -|.define CARG4, r9 -|.define CARG1d, ecx -|.define CARG2d, edx -|.define CARG3d, r8d -|.define CARG4d, r9d -|.define FCARG1, CARG1d // Upwards compatible to x86 fastcall. -|.define FCARG2, CARG2d -|.else -|.define CARG1, rdi // x64/POSIX C call arguments. -|.define CARG2, rsi -|.define CARG3, rdx -|.define CARG4, rcx -|.define CARG5, r8 -|.define CARG6, r9 -|.define CARG1d, edi -|.define CARG2d, esi -|.define CARG3d, edx -|.define CARG4d, ecx -|.define CARG5d, r8d -|.define CARG6d, r9d -|.define FCARG1, CARG1d // Simulate x86 fastcall. -|.define FCARG2, CARG2d -|.endif -| -|// Type definitions. Some of these are only used for documentation. -|.type L, lua_State -|.type GL, global_State -|.type TVALUE, TValue -|.type GCOBJ, GCobj -|.type STR, GCstr -|.type TAB, GCtab -|.type LFUNC, GCfuncL -|.type CFUNC, GCfuncC -|.type PROTO, GCproto -|.type UPVAL, GCupval -|.type NODE, Node -|.type NARGS, int -|.type TRACE, GCtrace -|.type SBUF, SBuf -| -|// Stack layout while in interpreter. Must match with lj_frame.h. -|//----------------------------------------------------------------------- -|.if not X64 // x86 stack layout. -| -|.if WIN -| -|.define CFRAME_SPACE, aword*9 // Delta for esp (see <--). -|.macro saveregs_ -| push edi; push esi; push ebx -| push extern lj_err_unwind_win -| fs; push dword [0] -| fs; mov [0], esp -| sub esp, CFRAME_SPACE -|.endmacro -|.macro restoreregs -| add esp, CFRAME_SPACE -| fs; pop dword [0] -| pop edi // Short for esp += 4. -| pop ebx; pop esi; pop edi; pop ebp -|.endmacro -| -|.else -| -|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--). -|.macro saveregs_ -| push edi; push esi; push ebx -| sub esp, CFRAME_SPACE -|.endmacro -|.macro restoreregs -| add esp, CFRAME_SPACE -| pop ebx; pop esi; pop edi; pop ebp -|.endmacro -| -|.endif -| -|.macro saveregs -| push ebp; saveregs_ -|.endmacro -| -|.if WIN -|.define SAVE_ERRF, aword [esp+aword*19] // vm_pcall/vm_cpcall only. -|.define SAVE_NRES, aword [esp+aword*18] -|.define SAVE_CFRAME, aword [esp+aword*17] -|.define SAVE_L, aword [esp+aword*16] -|//----- 16 byte aligned, ^^^ arguments from C caller -|.define SAVE_RET, aword [esp+aword*15] //<-- esp entering interpreter. -|.define SAVE_R4, aword [esp+aword*14] -|.define SAVE_R3, aword [esp+aword*13] -|.define SAVE_R2, aword [esp+aword*12] -|//----- 16 byte aligned -|.define SAVE_R1, aword [esp+aword*11] -|.define SEH_FUNC, aword [esp+aword*10] -|.define SEH_NEXT, aword [esp+aword*9] //<-- esp after register saves. -|.define UNUSED2, aword [esp+aword*8] -|//----- 16 byte aligned -|.define UNUSED1, aword [esp+aword*7] -|.define SAVE_PC, aword [esp+aword*6] -|.define TMP2, aword [esp+aword*5] -|.define TMP1, aword [esp+aword*4] -|//----- 16 byte aligned -|.define ARG4, aword [esp+aword*3] -|.define ARG3, aword [esp+aword*2] -|.define ARG2, aword [esp+aword*1] -|.define ARG1, aword [esp] //<-- esp while in interpreter. -|//----- 16 byte aligned, ^^^ arguments for C callee -|.else -|.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only. -|.define SAVE_NRES, aword [esp+aword*14] -|.define SAVE_CFRAME, aword [esp+aword*13] -|.define SAVE_L, aword [esp+aword*12] -|//----- 16 byte aligned, ^^^ arguments from C caller -|.define SAVE_RET, aword [esp+aword*11] //<-- esp entering interpreter. -|.define SAVE_R4, aword [esp+aword*10] -|.define SAVE_R3, aword [esp+aword*9] -|.define SAVE_R2, aword [esp+aword*8] -|//----- 16 byte aligned -|.define SAVE_R1, aword [esp+aword*7] //<-- esp after register saves. -|.define SAVE_PC, aword [esp+aword*6] -|.define TMP2, aword [esp+aword*5] -|.define TMP1, aword [esp+aword*4] -|//----- 16 byte aligned -|.define ARG4, aword [esp+aword*3] -|.define ARG3, aword [esp+aword*2] -|.define ARG2, aword [esp+aword*1] -|.define ARG1, aword [esp] //<-- esp while in interpreter. -|//----- 16 byte aligned, ^^^ arguments for C callee -|.endif -| -|// FPARGx overlaps ARGx and ARG(x+1) on x86. -|.define FPARG3, qword [esp+qword*1] -|.define FPARG1, qword [esp] -|// TMPQ overlaps TMP1/TMP2. ARG5/MULTRES overlap TMP1/TMP2 (and TMPQ). -|.define TMPQ, qword [esp+aword*4] -|.define TMP3, ARG4 -|.define ARG5, TMP1 -|.define TMPa, TMP1 -|.define MULTRES, TMP2 -| -|// Arguments for vm_call and vm_pcall. -|.define INARG_BASE, SAVE_CFRAME // Overwritten by SAVE_CFRAME! -| -|// Arguments for vm_cpcall. -|.define INARG_CP_CALL, SAVE_ERRF -|.define INARG_CP_UD, SAVE_NRES -|.define INARG_CP_FUNC, SAVE_CFRAME -| -|//----------------------------------------------------------------------- -|.elif X64WIN // x64/Windows stack layout -| -|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). -|.macro saveregs_ -| push rdi; push rsi; push rbx -| sub rsp, CFRAME_SPACE -|.endmacro -|.macro saveregs -| push rbp; saveregs_ -|.endmacro -|.macro restoreregs -| add rsp, CFRAME_SPACE -| pop rbx; pop rsi; pop rdi; pop rbp -|.endmacro -| -|.define SAVE_CFRAME, aword [rsp+aword*13] -|.define SAVE_PC, dword [rsp+dword*25] -|.define SAVE_L, dword [rsp+dword*24] -|.define SAVE_ERRF, dword [rsp+dword*23] -|.define SAVE_NRES, dword [rsp+dword*22] -|.define TMP2, dword [rsp+dword*21] -|.define TMP1, dword [rsp+dword*20] -|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter -|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter. -|.define SAVE_R4, aword [rsp+aword*8] -|.define SAVE_R3, aword [rsp+aword*7] -|.define SAVE_R2, aword [rsp+aword*6] -|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves. -|.define ARG5, aword [rsp+aword*4] -|.define CSAVE_4, aword [rsp+aword*3] -|.define CSAVE_3, aword [rsp+aword*2] -|.define CSAVE_2, aword [rsp+aword*1] -|.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter. -|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee -| -|// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ). -|.define TMPQ, qword [rsp+aword*10] -|.define MULTRES, TMP2 -|.define TMPa, ARG5 -|.define ARG5d, dword [rsp+aword*4] -|.define TMP3, ARG5d -| -|//----------------------------------------------------------------------- -|.else // x64/POSIX stack layout -| -|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). -|.macro saveregs_ -| push rbx; push r15; push r14 -|.if NO_UNWIND -| push r13; push r12 -|.endif -| sub rsp, CFRAME_SPACE -|.endmacro -|.macro saveregs -| push rbp; saveregs_ -|.endmacro -|.macro restoreregs -| add rsp, CFRAME_SPACE -|.if NO_UNWIND -| pop r12; pop r13 -|.endif -| pop r14; pop r15; pop rbx; pop rbp -|.endmacro -| -|//----- 16 byte aligned, -|.if NO_UNWIND -|.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter. -|.define SAVE_R4, aword [rsp+aword*10] -|.define SAVE_R3, aword [rsp+aword*9] -|.define SAVE_R2, aword [rsp+aword*8] -|.define SAVE_R1, aword [rsp+aword*7] -|.define SAVE_RU2, aword [rsp+aword*6] -|.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves. -|.else -|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter. -|.define SAVE_R4, aword [rsp+aword*8] -|.define SAVE_R3, aword [rsp+aword*7] -|.define SAVE_R2, aword [rsp+aword*6] -|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves. -|.endif -|.define SAVE_CFRAME, aword [rsp+aword*4] -|.define SAVE_PC, dword [rsp+dword*7] -|.define SAVE_L, dword [rsp+dword*6] -|.define SAVE_ERRF, dword [rsp+dword*5] -|.define SAVE_NRES, dword [rsp+dword*4] -|.define TMPa, aword [rsp+aword*1] -|.define TMP2, dword [rsp+dword*1] -|.define TMP1, dword [rsp] //<-- rsp while in interpreter. -|//----- 16 byte aligned -| -|// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ). -|.define TMPQ, qword [rsp] -|.define TMP3, dword [rsp+aword*1] -|.define MULTRES, TMP2 -| -|.endif -| -|//----------------------------------------------------------------------- -| -|// Instruction headers. -|.macro ins_A; .endmacro -|.macro ins_AD; .endmacro -|.macro ins_AJ; .endmacro -|.macro ins_ABC; movzx RB, RCH; movzx RC, RCL; .endmacro -|.macro ins_AB_; movzx RB, RCH; .endmacro -|.macro ins_A_C; movzx RC, RCL; .endmacro -|.macro ins_AND; not RDa; .endmacro -| -|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster). -|.macro ins_NEXT -| mov RC, [PC] -| movzx RA, RCH -| movzx OP, RCL -| add PC, 4 -| shr RC, 16 -|.if X64 -| jmp aword [DISPATCH+OP*8] -|.else -| jmp aword [DISPATCH+OP*4] -|.endif -|.endmacro -| -|// Instruction footer. -|.if 1 -| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. -| .define ins_next, ins_NEXT -| .define ins_next_, ins_NEXT -|.else -| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. -| // Affects only certain kinds of benchmarks (and only with -j off). -| // Around 10%-30% slower on Core2, a lot more slower on P4. -| .macro ins_next -| jmp ->ins_next -| .endmacro -| .macro ins_next_ -| ->ins_next: -| ins_NEXT -| .endmacro -|.endif -| -|// Call decode and dispatch. -|.macro ins_callt -| // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-4] = PC -| mov PC, LFUNC:RB->pc -| mov RA, [PC] -| movzx OP, RAL -| movzx RA, RAH -| add PC, 4 -|.if X64 -| jmp aword [DISPATCH+OP*8] -|.else -| jmp aword [DISPATCH+OP*4] -|.endif -|.endmacro -| -|.macro ins_call -| // BASE = new base, RB = LFUNC, RD = nargs+1 -| mov [BASE-4], PC -| ins_callt -|.endmacro -| -|//----------------------------------------------------------------------- -| -|// Macros to test operand types. -|.macro checktp, reg, tp; cmp dword [BASE+reg*8+4], tp; .endmacro -|.macro checknum, reg, target; checktp reg, LJ_TISNUM; jae target; .endmacro -|.macro checkint, reg, target; checktp reg, LJ_TISNUM; jne target; .endmacro -|.macro checkstr, reg, target; checktp reg, LJ_TSTR; jne target; .endmacro -|.macro checktab, reg, target; checktp reg, LJ_TTAB; jne target; .endmacro -| -|// These operands must be used with movzx. -|.define PC_OP, byte [PC-4] -|.define PC_RA, byte [PC-3] -|.define PC_RB, byte [PC-1] -|.define PC_RC, byte [PC-2] -|.define PC_RD, word [PC-2] -| -|.macro branchPC, reg -| lea PC, [PC+reg*4-BCBIAS_J*4] -|.endmacro -| -|// Assumes DISPATCH is relative to GL. -#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) -#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) -| -#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) -| -|// Decrement hashed hotcount and trigger trace recorder if zero. -|.macro hotloop, reg -| mov reg, PC -| shr reg, 1 -| and reg, HOTCOUNT_PCMASK -| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP -| jb ->vm_hotloop -|.endmacro -| -|.macro hotcall, reg -| mov reg, PC -| shr reg, 1 -| and reg, HOTCOUNT_PCMASK -| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL -| jb ->vm_hotcall -|.endmacro -| -|// Set current VM state. -|.macro set_vmstate, st -| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st -|.endmacro -| -|// x87 compares. -|.macro fcomparepp // Compare and pop st0 >< st1. -| fucomip st1 -| fpop -|.endmacro -| -|.macro fpop1; fstp st1; .endmacro -| -|// Synthesize SSE FP constants. -|.macro sseconst_abs, reg, tmp // Synthesize abs mask. -|.if X64 -| mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp -|.else -| pxor reg, reg; pcmpeqd reg, reg; psrlq reg, 1 -|.endif -|.endmacro -| -|.macro sseconst_hi, reg, tmp, val // Synthesize hi-32 bit const. -|.if X64 -| mov64 tmp, U64x(val,00000000); movd reg, tmp -|.else -| mov tmp, 0x .. val; movd reg, tmp; pshufd reg, reg, 0x51 -|.endif -|.endmacro -| -|.macro sseconst_sign, reg, tmp // Synthesize sign mask. -| sseconst_hi reg, tmp, 80000000 -|.endmacro -|.macro sseconst_1, reg, tmp // Synthesize 1.0. -| sseconst_hi reg, tmp, 3ff00000 -|.endmacro -|.macro sseconst_m1, reg, tmp // Synthesize -1.0. -| sseconst_hi reg, tmp, bff00000 -|.endmacro -|.macro sseconst_2p52, reg, tmp // Synthesize 2^52. -| sseconst_hi reg, tmp, 43300000 -|.endmacro -|.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51. -| sseconst_hi reg, tmp, 43380000 -|.endmacro -| -|// Move table write barrier back. Overwrites reg. -|.macro barrierback, tab, reg -| and byte tab->marked, (uint8_t)~LJ_GC_BLACK // black2gray(tab) -| mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)] -| mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab -| mov tab->gclist, reg -|.endmacro -| -|//----------------------------------------------------------------------- - -/* Generate subroutines used by opcodes and other parts of the VM. */ -/* The .code_sub section should be last to help static branch prediction. */ -static void build_subroutines(BuildCtx *ctx) -{ - |.code_sub - | - |//----------------------------------------------------------------------- - |//-- Return handling ---------------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_returnp: - | test PC, FRAME_P - | jz ->cont_dispatch - | - | // Return from pcall or xpcall fast func. - | and PC, -8 - | sub BASE, PC // Restore caller base. - | lea RAa, [RA+PC-8] // Rebase RA and prepend one result. - | mov PC, [BASE-4] // Fetch PC of previous frame. - | // Prepending may overwrite the pcall frame, so do it at the end. - | mov dword [BASE+RA+4], LJ_TTRUE // Prepend true to results. - | - |->vm_returnc: - | add RD, 1 // RD = nresults+1 - | jz ->vm_unwind_yield - | mov MULTRES, RD - | test PC, FRAME_TYPE - | jz ->BC_RET_Z // Handle regular return to Lua. - | - |->vm_return: - | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return - | xor PC, FRAME_C - | test PC, FRAME_TYPE - | jnz ->vm_returnp - | - | // Return to C. - | set_vmstate C - | and PC, -8 - | sub PC, BASE - | neg PC // Previous base = BASE - delta. - | - | sub RD, 1 - | jz >2 - |1: // Move results down. - |.if X64 - | mov RBa, [BASE+RA] - | mov [BASE-8], RBa - |.else - | mov RB, [BASE+RA] - | mov [BASE-8], RB - | mov RB, [BASE+RA+4] - | mov [BASE-4], RB - |.endif - | add BASE, 8 - | sub RD, 1 - | jnz <1 - |2: - | mov L:RB, SAVE_L - | mov L:RB->base, PC - |3: - | mov RD, MULTRES - | mov RA, SAVE_NRES // RA = wanted nresults+1 - |4: - | cmp RA, RD - | jne >6 // More/less results wanted? - |5: - | sub BASE, 8 - | mov L:RB->top, BASE - | - |->vm_leave_cp: - | mov RAa, SAVE_CFRAME // Restore previous C frame. - | mov L:RB->cframe, RAa - | xor eax, eax // Ok return status for vm_pcall. - | - |->vm_leave_unw: - | restoreregs - | ret - | - |6: - | jb >7 // Less results wanted? - | // More results wanted. Check stack size and fill up results with nil. - | cmp BASE, L:RB->maxstack - | ja >8 - | mov dword [BASE-4], LJ_TNIL - | add BASE, 8 - | add RD, 1 - | jmp <4 - | - |7: // Less results wanted. - | test RA, RA - | jz <5 // But check for LUA_MULTRET+1. - | sub RA, RD // Negative result! - | lea BASE, [BASE+RA*8] // Correct top. - | jmp <5 - | - |8: // Corner case: need to grow stack for filling up results. - | // This can happen if: - | // - A C function grows the stack (a lot). - | // - The GC shrinks the stack in between. - | // - A return back from a lua_call() with (high) nresults adjustment. - | mov L:RB->top, BASE // Save current top held in BASE (yes). - | mov MULTRES, RD // Need to fill only remainder with nil. - | mov FCARG2, RA - | mov FCARG1, L:RB - | call extern lj_state_growstack@8 // (lua_State *L, int n) - | mov BASE, L:RB->top // Need the (realloced) L->top in BASE. - | jmp <3 - | - |->vm_unwind_yield: - | mov al, LUA_YIELD - | jmp ->vm_unwind_c_eh - | - |->vm_unwind_c@8: // Unwind C stack, return from vm_pcall. - | // (void *cframe, int errcode) - |.if X64 - | mov eax, CARG2d // Error return status for vm_pcall. - | mov rsp, CARG1 - |.else - | mov eax, FCARG2 // Error return status for vm_pcall. - | mov esp, FCARG1 - |.if WIN - | lea FCARG1, SEH_NEXT - | fs; mov [0], FCARG1 - |.endif - |.endif - |->vm_unwind_c_eh: // Landing pad for external unwinder. - | mov L:RB, SAVE_L - | mov GL:RB, L:RB->glref - | mov dword GL:RB->vmstate, ~LJ_VMST_C - | jmp ->vm_leave_unw - | - |->vm_unwind_rethrow: - |.if X64 and not X64WIN - | mov FCARG1, SAVE_L - | mov FCARG2, eax - | restoreregs - | jmp extern lj_err_throw@8 // (lua_State *L, int errcode) - |.endif - | - |->vm_unwind_ff@4: // Unwind C stack, return from ff pcall. - | // (void *cframe) - |.if X64 - | and CARG1, CFRAME_RAWMASK - | mov rsp, CARG1 - |.else - | and FCARG1, CFRAME_RAWMASK - | mov esp, FCARG1 - |.if WIN - | lea FCARG1, SEH_NEXT - | fs; mov [0], FCARG1 - |.endif - |.endif - |->vm_unwind_ff_eh: // Landing pad for external unwinder. - | mov L:RB, SAVE_L - | mov RAa, -8 // Results start at BASE+RA = BASE-8. - | mov RD, 1+1 // Really 1+2 results, incr. later. - | mov BASE, L:RB->base - | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. - | add DISPATCH, GG_G2DISP - | mov PC, [BASE-4] // Fetch PC of previous frame. - | mov dword [BASE-4], LJ_TFALSE // Prepend false to error message. - | set_vmstate INTERP - | jmp ->vm_returnc // Increments RD/MULTRES and returns. - | - |.if WIN and not X64 - |->vm_rtlunwind@16: // Thin layer around RtlUnwind. - | // (void *cframe, void *excptrec, void *unwinder, int errcode) - | mov [esp], FCARG1 // Return value for RtlUnwind. - | push FCARG2 // Exception record for RtlUnwind. - | push 0 // Ignored by RtlUnwind. - | push dword [FCARG1+CFRAME_OFS_SEH] - | call extern RtlUnwind@16 // Violates ABI (clobbers too much). - | mov FCARG1, eax - | mov FCARG2, [esp+4] // errcode (for vm_unwind_c). - | ret // Jump to unwinder. - |.endif - | - |//----------------------------------------------------------------------- - |//-- Grow stack for calls ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_growstack_c: // Grow stack for C function. - | mov FCARG2, LUA_MINSTACK - | jmp >2 - | - |->vm_growstack_v: // Grow stack for vararg Lua function. - | sub RD, 8 - | jmp >1 - | - |->vm_growstack_f: // Grow stack for fixarg Lua function. - | // BASE = new base, RD = nargs+1, RB = L, PC = first PC - | lea RD, [BASE+NARGS:RD*8-8] - |1: - | movzx RA, byte [PC-4+PC2PROTO(framesize)] - | add PC, 4 // Must point after first instruction. - | mov L:RB->base, BASE - | mov L:RB->top, RD - | mov SAVE_PC, PC - | mov FCARG2, RA - |2: - | // RB = L, L->base = new base, L->top = top - | mov FCARG1, L:RB - | call extern lj_state_growstack@8 // (lua_State *L, int n) - | mov BASE, L:RB->base - | mov RD, L:RB->top - | mov LFUNC:RB, [BASE-8] - | sub RD, BASE - | shr RD, 3 - | add NARGS:RD, 1 - | // BASE = new base, RB = LFUNC, RD = nargs+1 - | ins_callt // Just retry the call. - | - |//----------------------------------------------------------------------- - |//-- Entry points into the assembler VM --------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_resume: // Setup C frame and resume thread. - | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) - | saveregs - |.if X64 - | mov L:RB, CARG1d // Caveat: CARG1d may be RA. - | mov SAVE_L, CARG1d - | mov RA, CARG2d - |.else - | mov L:RB, SAVE_L - | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME! - |.endif - | mov PC, FRAME_CP - | xor RD, RD - | lea KBASEa, [esp+CFRAME_RESUME] - | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. - | add DISPATCH, GG_G2DISP - | mov SAVE_PC, RD // Any value outside of bytecode is ok. - | mov SAVE_CFRAME, RDa - |.if X64 - | mov SAVE_NRES, RD - | mov SAVE_ERRF, RD - |.endif - | mov L:RB->cframe, KBASEa - | cmp byte L:RB->status, RDL - | je >2 // Initial resume (like a call). - | - | // Resume after yield (like a return). - | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB - | set_vmstate INTERP - | mov byte L:RB->status, RDL - | mov BASE, L:RB->base - | mov RD, L:RB->top - | sub RD, RA - | shr RD, 3 - | add RD, 1 // RD = nresults+1 - | sub RA, BASE // RA = resultofs - | mov PC, [BASE-4] - | mov MULTRES, RD - | test PC, FRAME_TYPE - | jz ->BC_RET_Z - | jmp ->vm_return - | - |->vm_pcall: // Setup protected C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) - | saveregs - | mov PC, FRAME_CP - |.if X64 - | mov SAVE_ERRF, CARG4d - |.endif - | jmp >1 - | - |->vm_call: // Setup C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1) - | saveregs - | mov PC, FRAME_C - | - |1: // Entry point for vm_pcall above (PC = ftype). - |.if X64 - | mov SAVE_NRES, CARG3d - | mov L:RB, CARG1d // Caveat: CARG1d may be RA. - | mov SAVE_L, CARG1d - | mov RA, CARG2d - |.else - | mov L:RB, SAVE_L - | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME! - |.endif - | - | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. - | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. - | mov SAVE_CFRAME, KBASEa - | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. - | add DISPATCH, GG_G2DISP - |.if X64 - | mov L:RB->cframe, rsp - |.else - | mov L:RB->cframe, esp - |.endif - | - |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype). - | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB - | set_vmstate INTERP - | mov BASE, L:RB->base // BASE = old base (used in vmeta_call). - | add PC, RA - | sub PC, BASE // PC = frame delta + frame type - | - | mov RD, L:RB->top - | sub RD, RA - | shr NARGS:RD, 3 - | add NARGS:RD, 1 // RD = nargs+1 - | - |->vm_call_dispatch: - | mov LFUNC:RB, [RA-8] - | cmp dword [RA-4], LJ_TFUNC - | jne ->vmeta_call // Ensure KBASE defined and != BASE. - | - |->vm_call_dispatch_f: - | mov BASE, RA - | ins_call - | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC - | - |->vm_cpcall: // Setup protected C frame, call C. - | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) - | saveregs - |.if X64 - | mov L:RB, CARG1d // Caveat: CARG1d may be RA. - | mov SAVE_L, CARG1d - |.else - | mov L:RB, SAVE_L - | // Caveat: INARG_CP_* and SAVE_CFRAME/SAVE_NRES/SAVE_ERRF overlap! - | mov RC, INARG_CP_UD // Get args before they are overwritten. - | mov RA, INARG_CP_FUNC - | mov BASE, INARG_CP_CALL - |.endif - | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. - | - | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). - | sub KBASE, L:RB->top - | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. - | mov SAVE_ERRF, 0 // No error function. - | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame. - | add DISPATCH, GG_G2DISP - | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). - | - |.if X64 - | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. - | mov SAVE_CFRAME, KBASEa - | mov L:RB->cframe, rsp - | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB - | - | call CARG4 // (lua_State *L, lua_CFunction func, void *ud) - |.else - | mov ARG3, RC // Have to copy args downwards. - | mov ARG2, RA - | mov ARG1, L:RB - | - | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. - | mov SAVE_CFRAME, KBASE - | mov L:RB->cframe, esp - | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB - | - | call BASE // (lua_State *L, lua_CFunction func, void *ud) - |.endif - | // TValue * (new base) or NULL returned in eax (RC). - | test RC, RC - | jz ->vm_leave_cp // No base? Just remove C frame. - | mov RA, RC - | mov PC, FRAME_CP - | jmp <2 // Else continue with the call. - | - |//----------------------------------------------------------------------- - |//-- Metamethod handling ------------------------------------------------ - |//----------------------------------------------------------------------- - | - |//-- Continuation dispatch ---------------------------------------------- - | - |->cont_dispatch: - | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES) - | add RA, BASE - | and PC, -8 - | mov RB, BASE - | sub BASE, PC // Restore caller BASE. - | mov dword [RA+RD*8-4], LJ_TNIL // Ensure one valid arg. - | mov RC, RA // ... in [RC] - | mov PC, [RB-12] // Restore PC from [cont|PC]. - |.if X64 - | movsxd RAa, dword [RB-16] // May be negative on WIN64 with debug. - |.if FFI - | cmp RA, 1 - | jbe >1 - |.endif - | lea KBASEa, qword [=>0] - | add RAa, KBASEa - |.else - | mov RA, dword [RB-16] - |.if FFI - | cmp RA, 1 - | jbe >1 - |.endif - |.endif - | mov LFUNC:KBASE, [BASE-8] - | mov KBASE, LFUNC:KBASE->pc - | mov KBASE, [KBASE+PC2PROTO(k)] - | // BASE = base, RC = result, RB = meta base - | jmp RAa // Jump to continuation. - | - |.if FFI - |1: - | je ->cont_ffi_callback // cont = 1: return from FFI callback. - | // cont = 0: Tail call from C function. - | sub RB, BASE - | shr RB, 3 - | lea RD, [RB-1] - | jmp ->vm_call_tail - |.endif - | - |->cont_cat: // BASE = base, RC = result, RB = mbase - | movzx RA, PC_RB - | sub RB, 16 - | lea RA, [BASE+RA*8] - | sub RA, RB - | je ->cont_ra - | neg RA - | shr RA, 3 - |.if X64WIN - | mov CARG3d, RA - | mov L:CARG1d, SAVE_L - | mov L:CARG1d->base, BASE - | mov RCa, [RC] - | mov [RB], RCa - | mov CARG2d, RB - |.elif X64 - | mov L:CARG1d, SAVE_L - | mov L:CARG1d->base, BASE - | mov CARG3d, RA - | mov RAa, [RC] - | mov [RB], RAa - | mov CARG2d, RB - |.else - | mov ARG3, RA - | mov RA, [RC+4] - | mov RC, [RC] - | mov [RB+4], RA - | mov [RB], RC - | mov ARG2, RB - |.endif - | jmp ->BC_CAT_Z - | - |//-- Table indexing metamethods ----------------------------------------- - | - |->vmeta_tgets: - | mov TMP1, RC // RC = GCstr * - | mov TMP2, LJ_TSTR - | lea RCa, TMP1 // Store temp. TValue in TMP1/TMP2. - | cmp PC_OP, BC_GGET - | jne >1 - | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. - | mov [RA], TAB:RB // RB = GCtab * - | mov dword [RA+4], LJ_TTAB - | mov RB, RA - | jmp >2 - | - |->vmeta_tgetb: - | movzx RC, PC_RC - |.if DUALNUM - | mov TMP2, LJ_TISNUM - | mov TMP1, RC - |.else - | cvtsi2sd xmm0, RC - | movsd TMPQ, xmm0 - |.endif - | lea RCa, TMPQ // Store temp. TValue in TMPQ. - | jmp >1 - | - |->vmeta_tgetv: - | movzx RC, PC_RC // Reload TValue *k from RC. - | lea RC, [BASE+RC*8] - |1: - | movzx RB, PC_RB // Reload TValue *t from RB. - | lea RB, [BASE+RB*8] - |2: - |.if X64 - | mov L:CARG1d, SAVE_L - | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE. - | mov CARG2d, RB - | mov CARG3, RCa // May be 64 bit ptr to stack. - | mov L:RB, L:CARG1d - |.else - | mov ARG2, RB - | mov L:RB, SAVE_L - | mov ARG3, RC - | mov ARG1, L:RB - | mov L:RB->base, BASE - |.endif - | mov SAVE_PC, PC - | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) - | // TValue * (finished) or NULL (metamethod) returned in eax (RC). - | mov BASE, L:RB->base - | test RC, RC - | jz >3 - |->cont_ra: // BASE = base, RC = result - | movzx RA, PC_RA - |.if X64 - | mov RBa, [RC] - | mov [BASE+RA*8], RBa - |.else - | mov RB, [RC+4] - | mov RC, [RC] - | mov [BASE+RA*8+4], RB - | mov [BASE+RA*8], RC - |.endif - | ins_next - | - |3: // Call __index metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k - | mov RA, L:RB->top - | mov [RA-12], PC // [cont|PC] - | lea PC, [RA+FRAME_CONT] - | sub PC, BASE - | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here. - | mov NARGS:RD, 2+1 // 2 args for func(t, k). - | jmp ->vm_call_dispatch_f - | - |->vmeta_tgetr: - | mov FCARG1, TAB:RB - | mov RB, BASE // Save BASE. - | mov FCARG2, RC // Caveat: FCARG2 == BASE - | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key) - | // cTValue * or NULL returned in eax (RC). - | movzx RA, PC_RA - | mov BASE, RB // Restore BASE. - | test RC, RC - | jnz ->BC_TGETR_Z - | mov dword [BASE+RA*8+4], LJ_TNIL - | jmp ->BC_TGETR2_Z - | - |//----------------------------------------------------------------------- - | - |->vmeta_tsets: - | mov TMP1, RC // RC = GCstr * - | mov TMP2, LJ_TSTR - | lea RCa, TMP1 // Store temp. TValue in TMP1/TMP2. - | cmp PC_OP, BC_GSET - | jne >1 - | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. - | mov [RA], TAB:RB // RB = GCtab * - | mov dword [RA+4], LJ_TTAB - | mov RB, RA - | jmp >2 - | - |->vmeta_tsetb: - | movzx RC, PC_RC - |.if DUALNUM - | mov TMP2, LJ_TISNUM - | mov TMP1, RC - |.else - | cvtsi2sd xmm0, RC - | movsd TMPQ, xmm0 - |.endif - | lea RCa, TMPQ // Store temp. TValue in TMPQ. - | jmp >1 - | - |->vmeta_tsetv: - | movzx RC, PC_RC // Reload TValue *k from RC. - | lea RC, [BASE+RC*8] - |1: - | movzx RB, PC_RB // Reload TValue *t from RB. - | lea RB, [BASE+RB*8] - |2: - |.if X64 - | mov L:CARG1d, SAVE_L - | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE. - | mov CARG2d, RB - | mov CARG3, RCa // May be 64 bit ptr to stack. - | mov L:RB, L:CARG1d - |.else - | mov ARG2, RB - | mov L:RB, SAVE_L - | mov ARG3, RC - | mov ARG1, L:RB - | mov L:RB->base, BASE - |.endif - | mov SAVE_PC, PC - | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) - | // TValue * (finished) or NULL (metamethod) returned in eax (RC). - | mov BASE, L:RB->base - | test RC, RC - | jz >3 - | // NOBARRIER: lj_meta_tset ensures the table is not black. - | movzx RA, PC_RA - |.if X64 - | mov RBa, [BASE+RA*8] - | mov [RC], RBa - |.else - | mov RB, [BASE+RA*8+4] - | mov RA, [BASE+RA*8] - | mov [RC+4], RB - | mov [RC], RA - |.endif - |->cont_nop: // BASE = base, (RC = result) - | ins_next - | - |3: // Call __newindex metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) - | mov RA, L:RB->top - | mov [RA-12], PC // [cont|PC] - | movzx RC, PC_RA - | // Copy value to third argument. - |.if X64 - | mov RBa, [BASE+RC*8] - | mov [RA+16], RBa - |.else - | mov RB, [BASE+RC*8+4] - | mov RC, [BASE+RC*8] - | mov [RA+20], RB - | mov [RA+16], RC - |.endif - | lea PC, [RA+FRAME_CONT] - | sub PC, BASE - | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here. - | mov NARGS:RD, 3+1 // 3 args for func(t, k, v). - | jmp ->vm_call_dispatch_f - | - |->vmeta_tsetr: - |.if X64WIN - | mov L:CARG1d, SAVE_L - | mov CARG3d, RC - | mov L:CARG1d->base, BASE - | xchg CARG2d, TAB:RB // Caveat: CARG2d == BASE. - |.elif X64 - | mov L:CARG1d, SAVE_L - | mov CARG2d, TAB:RB - | mov L:CARG1d->base, BASE - | mov RB, BASE // Save BASE. - | mov CARG3d, RC // Caveat: CARG3d == BASE. - |.else - | mov L:RA, SAVE_L - | mov ARG2, TAB:RB - | mov RB, BASE // Save BASE. - | mov ARG3, RC - | mov ARG1, L:RA - | mov L:RA->base, BASE - |.endif - | mov SAVE_PC, PC - | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) - | // TValue * returned in eax (RC). - | movzx RA, PC_RA - | mov BASE, RB // Restore BASE. - | jmp ->BC_TSETR_Z - | - |//-- Comparison metamethods --------------------------------------------- - | - |->vmeta_comp: - |.if X64 - | mov L:RB, SAVE_L - | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d == BASE. - |.if X64WIN - | lea CARG3d, [BASE+RD*8] - | lea CARG2d, [BASE+RA*8] - |.else - | lea CARG2d, [BASE+RA*8] - | lea CARG3d, [BASE+RD*8] - |.endif - | mov CARG1d, L:RB // Caveat: CARG1d/CARG4d == RA. - | movzx CARG4d, PC_OP - |.else - | movzx RB, PC_OP - | lea RD, [BASE+RD*8] - | lea RA, [BASE+RA*8] - | mov ARG4, RB - | mov L:RB, SAVE_L - | mov ARG3, RD - | mov ARG2, RA - | mov ARG1, L:RB - | mov L:RB->base, BASE - |.endif - | mov SAVE_PC, PC - | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) - | // 0/1 or TValue * (metamethod) returned in eax (RC). - |3: - | mov BASE, L:RB->base - | cmp RC, 1 - | ja ->vmeta_binop - |4: - | lea PC, [PC+4] - | jb >6 - |5: - | movzx RD, PC_RD - | branchPC RD - |6: - | ins_next - | - |->cont_condt: // BASE = base, RC = result - | add PC, 4 - | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is true. - | jb <5 - | jmp <6 - | - |->cont_condf: // BASE = base, RC = result - | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is false. - | jmp <4 - | - |->vmeta_equal: - | sub PC, 4 - |.if X64WIN - | mov CARG3d, RD - | mov CARG4d, RB - | mov L:RB, SAVE_L - | mov L:RB->base, BASE // Caveat: CARG2d == BASE. - | mov CARG2d, RA - | mov CARG1d, L:RB // Caveat: CARG1d == RA. - |.elif X64 - | mov CARG2d, RA - | mov CARG4d, RB // Caveat: CARG4d == RA. - | mov L:RB, SAVE_L - | mov L:RB->base, BASE // Caveat: CARG3d == BASE. - | mov CARG3d, RD - | mov CARG1d, L:RB - |.else - | mov ARG4, RB - | mov L:RB, SAVE_L - | mov ARG3, RD - | mov ARG2, RA - | mov ARG1, L:RB - | mov L:RB->base, BASE - |.endif - | mov SAVE_PC, PC - | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) - | // 0/1 or TValue * (metamethod) returned in eax (RC). - | jmp <3 - | - |->vmeta_equal_cd: - |.if FFI - | sub PC, 4 - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - | mov FCARG1, L:RB - | mov FCARG2, dword [PC-4] - | mov SAVE_PC, PC - | call extern lj_meta_equal_cd@8 // (lua_State *L, BCIns ins) - | // 0/1 or TValue * (metamethod) returned in eax (RC). - | jmp <3 - |.endif - | - |->vmeta_istype: - |.if X64 - | mov L:RB, SAVE_L - | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE. - | mov CARG2d, RA - | movzx CARG3d, PC_RD - | mov L:CARG1d, L:RB - |.else - | movzx RD, PC_RD - | mov ARG2, RA - | mov L:RB, SAVE_L - | mov ARG3, RD - | mov ARG1, L:RB - | mov L:RB->base, BASE - |.endif - | mov SAVE_PC, PC - | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) - | mov BASE, L:RB->base - | jmp <6 - | - |//-- Arithmetic metamethods --------------------------------------------- - | - |->vmeta_arith_vno: - |.if DUALNUM - | movzx RB, PC_RB - |.endif - |->vmeta_arith_vn: - | lea RC, [KBASE+RC*8] - | jmp >1 - | - |->vmeta_arith_nvo: - |.if DUALNUM - | movzx RC, PC_RC - |.endif - |->vmeta_arith_nv: - | lea RC, [KBASE+RC*8] - | lea RB, [BASE+RB*8] - | xchg RB, RC - | jmp >2 - | - |->vmeta_unm: - | lea RC, [BASE+RD*8] - | mov RB, RC - | jmp >2 - | - |->vmeta_arith_vvo: - |.if DUALNUM - | movzx RB, PC_RB - |.endif - |->vmeta_arith_vv: - | lea RC, [BASE+RC*8] - |1: - | lea RB, [BASE+RB*8] - |2: - | lea RA, [BASE+RA*8] - |.if X64WIN - | mov CARG3d, RB - | mov CARG4d, RC - | movzx RC, PC_OP - | mov ARG5d, RC - | mov L:RB, SAVE_L - | mov L:RB->base, BASE // Caveat: CARG2d == BASE. - | mov CARG2d, RA - | mov CARG1d, L:RB // Caveat: CARG1d == RA. - |.elif X64 - | movzx CARG5d, PC_OP - | mov CARG2d, RA - | mov CARG4d, RC // Caveat: CARG4d == RA. - | mov L:CARG1d, SAVE_L - | mov L:CARG1d->base, BASE // Caveat: CARG3d == BASE. - | mov CARG3d, RB - | mov L:RB, L:CARG1d - |.else - | mov ARG3, RB - | mov L:RB, SAVE_L - | mov ARG4, RC - | movzx RC, PC_OP - | mov ARG2, RA - | mov ARG5, RC - | mov ARG1, L:RB - | mov L:RB->base, BASE - |.endif - | mov SAVE_PC, PC - | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) - | // NULL (finished) or TValue * (metamethod) returned in eax (RC). - | mov BASE, L:RB->base - | test RC, RC - | jz ->cont_nop - | - | // Call metamethod for binary op. - |->vmeta_binop: - | // BASE = base, RC = new base, stack = cont/func/o1/o2 - | mov RA, RC - | sub RC, BASE - | mov [RA-12], PC // [cont|PC] - | lea PC, [RC+FRAME_CONT] - | mov NARGS:RD, 2+1 // 2 args for func(o1, o2). - | jmp ->vm_call_dispatch - | - |->vmeta_len: - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - | lea FCARG2, [BASE+RD*8] // Caveat: FCARG2 == BASE - | mov L:FCARG1, L:RB - | mov SAVE_PC, PC - | call extern lj_meta_len@8 // (lua_State *L, TValue *o) - | // NULL (retry) or TValue * (metamethod) returned in eax (RC). - | mov BASE, L:RB->base -#if LJ_52 - | test RC, RC - | jne ->vmeta_binop // Binop call for compatibility. - | movzx RD, PC_RD - | mov TAB:FCARG1, [BASE+RD*8] - | jmp ->BC_LEN_Z -#else - | jmp ->vmeta_binop // Binop call for compatibility. -#endif - | - |//-- Call metamethod ---------------------------------------------------- - | - |->vmeta_call_ra: - | lea RA, [BASE+RA*8+8] - |->vmeta_call: // Resolve and call __call metamethod. - | // BASE = old base, RA = new base, RC = nargs+1, PC = return - | mov TMP2, RA // Save RA, RC for us. - | mov TMP1, NARGS:RD - | sub RA, 8 - |.if X64 - | mov L:RB, SAVE_L - | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE. - | mov CARG2d, RA - | lea CARG3d, [RA+NARGS:RD*8] - | mov CARG1d, L:RB // Caveat: CARG1d may be RA. - |.else - | lea RC, [RA+NARGS:RD*8] - | mov L:RB, SAVE_L - | mov ARG2, RA - | mov ARG3, RC - | mov ARG1, L:RB - | mov L:RB->base, BASE // This is the callers base! - |.endif - | mov SAVE_PC, PC - | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) - | mov BASE, L:RB->base - | mov RA, TMP2 - | mov NARGS:RD, TMP1 - | mov LFUNC:RB, [RA-8] - | add NARGS:RD, 1 - | // This is fragile. L->base must not move, KBASE must always be defined. - | cmp KBASE, BASE // Continue with CALLT if flag set. - | je ->BC_CALLT_Z - | mov BASE, RA - | ins_call // Otherwise call resolved metamethod. - | - |//-- Argument coercion for 'for' statement ------------------------------ - | - |->vmeta_for: - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - | mov FCARG2, RA // Caveat: FCARG2 == BASE - | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA - | mov SAVE_PC, PC - | call extern lj_meta_for@8 // (lua_State *L, TValue *base) - | mov BASE, L:RB->base - | mov RC, [PC-4] - | movzx RA, RCH - | movzx OP, RCL - | shr RC, 16 - |.if X64 - | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Retry FORI or JFORI. - |.else - | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] // Retry FORI or JFORI. - |.endif - | - |//----------------------------------------------------------------------- - |//-- Fast functions ----------------------------------------------------- - |//----------------------------------------------------------------------- - | - |.macro .ffunc, name - |->ff_ .. name: - |.endmacro - | - |.macro .ffunc_1, name - |->ff_ .. name: - | cmp NARGS:RD, 1+1; jb ->fff_fallback - |.endmacro - | - |.macro .ffunc_2, name - |->ff_ .. name: - | cmp NARGS:RD, 2+1; jb ->fff_fallback - |.endmacro - | - |.macro .ffunc_nsse, name, op - | .ffunc_1 name - | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback - | op xmm0, qword [BASE] - |.endmacro - | - |.macro .ffunc_nsse, name - | .ffunc_nsse name, movsd - |.endmacro - | - |.macro .ffunc_nnsse, name - | .ffunc_2 name - | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback - | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback - | movsd xmm0, qword [BASE] - | movsd xmm1, qword [BASE+8] - |.endmacro - | - |.macro .ffunc_nnr, name - | .ffunc_2 name - | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback - | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback - | fld qword [BASE+8] - | fld qword [BASE] - |.endmacro - | - |// Inlined GC threshold check. Caveat: uses label 1. - |.macro ffgccheck - | mov RB, [DISPATCH+DISPATCH_GL(gc.total)] - | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)] - | jb >1 - | call ->fff_gcstep - |1: - |.endmacro - | - |//-- Base library: checks ----------------------------------------------- - | - |.ffunc_1 assert - | mov RB, [BASE+4] - | cmp RB, LJ_TISTRUECOND; jae ->fff_fallback - | mov PC, [BASE-4] - | mov MULTRES, RD - | mov [BASE-4], RB - | mov RB, [BASE] - | mov [BASE-8], RB - | sub RD, 2 - | jz >2 - | mov RA, BASE - |1: - | add RA, 8 - |.if X64 - | mov RBa, [RA] - | mov [RA-8], RBa - |.else - | mov RB, [RA+4] - | mov [RA-4], RB - | mov RB, [RA] - | mov [RA-8], RB - |.endif - | sub RD, 1 - | jnz <1 - |2: - | mov RD, MULTRES - | jmp ->fff_res_ - | - |.ffunc_1 type - | mov RB, [BASE+4] - |.if X64 - | mov RA, RB - | sar RA, 15 - | cmp RA, -2 - | je >3 - |.endif - | mov RC, ~LJ_TNUMX - | not RB - | cmp RC, RB - | cmova RC, RB - |2: - | mov CFUNC:RB, [BASE-8] - | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))] - | mov PC, [BASE-4] - | mov dword [BASE-4], LJ_TSTR - | mov [BASE-8], STR:RC - | jmp ->fff_res1 - |.if X64 - |3: - | mov RC, ~LJ_TLIGHTUD - | jmp <2 - |.endif - | - |//-- Base library: getters and setters --------------------------------- - | - |.ffunc_1 getmetatable - | mov RB, [BASE+4] - | mov PC, [BASE-4] - | cmp RB, LJ_TTAB; jne >6 - |1: // Field metatable must be at same offset for GCtab and GCudata! - | mov TAB:RB, [BASE] - | mov TAB:RB, TAB:RB->metatable - |2: - | test TAB:RB, TAB:RB - | mov dword [BASE-4], LJ_TNIL - | jz ->fff_res1 - | mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+4*(GCROOT_MMNAME+MM_metatable)] - | mov dword [BASE-4], LJ_TTAB // Store metatable as default result. - | mov [BASE-8], TAB:RB - | mov RA, TAB:RB->hmask - | and RA, STR:RC->hash - | imul RA, #NODE - | add NODE:RA, TAB:RB->node - |3: // Rearranged logic, because we expect _not_ to find the key. - | cmp dword NODE:RA->key.it, LJ_TSTR - | jne >4 - | cmp dword NODE:RA->key.gcr, STR:RC - | je >5 - |4: - | mov NODE:RA, NODE:RA->next - | test NODE:RA, NODE:RA - | jnz <3 - | jmp ->fff_res1 // Not found, keep default result. - |5: - | mov RB, [RA+4] - | cmp RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value. - | mov RC, [RA] - | mov [BASE-4], RB // Return value of mt.__metatable. - | mov [BASE-8], RC - | jmp ->fff_res1 - | - |6: - | cmp RB, LJ_TUDATA; je <1 - |.if X64 - | cmp RB, LJ_TNUMX; ja >8 - | cmp RB, LJ_TISNUM; jbe >7 - | mov RB, LJ_TLIGHTUD - | jmp >8 - |7: - |.else - | cmp RB, LJ_TISNUM; ja >8 - |.endif - | mov RB, LJ_TNUMX - |8: - | not RB - | mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(gcroot[GCROOT_BASEMT])] - | jmp <2 - | - |.ffunc_2 setmetatable - | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback - | // Fast path: no mt for table yet and not clearing the mt. - | mov TAB:RB, [BASE] - | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback - | cmp dword [BASE+12], LJ_TTAB; jne ->fff_fallback - | mov TAB:RC, [BASE+8] - | mov TAB:RB->metatable, TAB:RC - | mov PC, [BASE-4] - | mov dword [BASE-4], LJ_TTAB // Return original table. - | mov [BASE-8], TAB:RB - | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) - | jz >1 - | // Possible write barrier. Table is black, but skip iswhite(mt) check. - | barrierback TAB:RB, RC - |1: - | jmp ->fff_res1 - | - |.ffunc_2 rawget - | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback - |.if X64WIN - | mov RB, BASE // Save BASE. - | lea CARG3d, [BASE+8] - | mov CARG2d, [BASE] // Caveat: CARG2d == BASE. - | mov CARG1d, SAVE_L - |.elif X64 - | mov RB, BASE // Save BASE. - | mov CARG2d, [BASE] - | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE. - | mov CARG1d, SAVE_L - |.else - | mov TAB:RD, [BASE] - | mov L:RB, SAVE_L - | mov ARG2, TAB:RD - | mov ARG1, L:RB - | mov RB, BASE // Save BASE. - | add BASE, 8 - | mov ARG3, BASE - |.endif - | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) - | // cTValue * returned in eax (RD). - | mov BASE, RB // Restore BASE. - | // Copy table slot. - |.if X64 - | mov RBa, [RD] - | mov PC, [BASE-4] - | mov [BASE-8], RBa - |.else - | mov RB, [RD] - | mov RD, [RD+4] - | mov PC, [BASE-4] - | mov [BASE-8], RB - | mov [BASE-4], RD - |.endif - | jmp ->fff_res1 - | - |//-- Base library: conversions ------------------------------------------ - | - |.ffunc tonumber - | // Only handles the number case inline (without a base argument). - | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. - | cmp dword [BASE+4], LJ_TISNUM - |.if DUALNUM - | jne >1 - | mov RB, dword [BASE]; jmp ->fff_resi - |1: - | ja ->fff_fallback - |.else - | jae ->fff_fallback - |.endif - | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 - | - |.ffunc_1 tostring - | // Only handles the string or number case inline. - | mov PC, [BASE-4] - | cmp dword [BASE+4], LJ_TSTR; jne >3 - | // A __tostring method in the string base metatable is ignored. - | mov STR:RD, [BASE] - |2: - | mov dword [BASE-4], LJ_TSTR - | mov [BASE-8], STR:RD - | jmp ->fff_res1 - |3: // Handle numbers inline, unless a number base metatable is present. - | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback - | cmp dword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0 - | jne ->fff_fallback - | ffgccheck // Caveat: uses label 1. - | mov L:RB, SAVE_L - | mov L:RB->base, BASE // Add frame since C call can throw. - | mov SAVE_PC, PC // Redundant (but a defined value). - |.if X64 and not X64WIN - | mov FCARG2, BASE // Otherwise: FCARG2 == BASE - |.endif - | mov L:FCARG1, L:RB - |.if DUALNUM - | call extern lj_strfmt_number@8 // (lua_State *L, cTValue *o) - |.else - | call extern lj_strfmt_num@8 // (lua_State *L, lua_Number *np) - |.endif - | // GCstr returned in eax (RD). - | mov BASE, L:RB->base - | jmp <2 - | - |//-- Base library: iterators ------------------------------------------- - | - |.ffunc_1 next - | je >2 // Missing 2nd arg? - |1: - | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback - | mov L:RB, SAVE_L - | mov L:RB->base, BASE // Add frame since C call can throw. - | mov L:RB->top, BASE // Dummy frame length is ok. - | mov PC, [BASE-4] - |.if X64WIN - | lea CARG3d, [BASE+8] - | mov CARG2d, [BASE] // Caveat: CARG2d == BASE. - | mov CARG1d, L:RB - |.elif X64 - | mov CARG2d, [BASE] - | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE. - | mov CARG1d, L:RB - |.else - | mov TAB:RD, [BASE] - | mov ARG2, TAB:RD - | mov ARG1, L:RB - | add BASE, 8 - | mov ARG3, BASE - |.endif - | mov SAVE_PC, PC // Needed for ITERN fallback. - | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) - | // Flag returned in eax (RD). - | mov BASE, L:RB->base - | test RD, RD; jz >3 // End of traversal? - | // Copy key and value to results. - |.if X64 - | mov RBa, [BASE+8] - | mov RDa, [BASE+16] - | mov [BASE-8], RBa - | mov [BASE], RDa - |.else - | mov RB, [BASE+8] - | mov RD, [BASE+12] - | mov [BASE-8], RB - | mov [BASE-4], RD - | mov RB, [BASE+16] - | mov RD, [BASE+20] - | mov [BASE], RB - | mov [BASE+4], RD - |.endif - |->fff_res2: - | mov RD, 1+2 - | jmp ->fff_res - |2: // Set missing 2nd arg to nil. - | mov dword [BASE+12], LJ_TNIL - | jmp <1 - |3: // End of traversal: return nil. - | mov dword [BASE-4], LJ_TNIL - | jmp ->fff_res1 - | - |.ffunc_1 pairs - | mov TAB:RB, [BASE] - | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback -#if LJ_52 - | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback -#endif - | mov CFUNC:RB, [BASE-8] - | mov CFUNC:RD, CFUNC:RB->upvalue[0] - | mov PC, [BASE-4] - | mov dword [BASE-4], LJ_TFUNC - | mov [BASE-8], CFUNC:RD - | mov dword [BASE+12], LJ_TNIL - | mov RD, 1+3 - | jmp ->fff_res - | - |.ffunc_2 ipairs_aux - | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback - | cmp dword [BASE+12], LJ_TISNUM - |.if DUALNUM - | jne ->fff_fallback - |.else - | jae ->fff_fallback - |.endif - | mov PC, [BASE-4] - |.if DUALNUM - | mov RD, dword [BASE+8] - | add RD, 1 - | mov dword [BASE-4], LJ_TISNUM - | mov dword [BASE-8], RD - |.else - | movsd xmm0, qword [BASE+8] - | sseconst_1 xmm1, RBa - | addsd xmm0, xmm1 - | cvttsd2si RD, xmm0 - | movsd qword [BASE-8], xmm0 - |.endif - | mov TAB:RB, [BASE] - | cmp RD, TAB:RB->asize; jae >2 // Not in array part? - | shl RD, 3 - | add RD, TAB:RB->array - |1: - | cmp dword [RD+4], LJ_TNIL; je ->fff_res0 - | // Copy array slot. - |.if X64 - | mov RBa, [RD] - | mov [BASE], RBa - |.else - | mov RB, [RD] - | mov RD, [RD+4] - | mov [BASE], RB - | mov [BASE+4], RD - |.endif - | jmp ->fff_res2 - |2: // Check for empty hash part first. Otherwise call C function. - | cmp dword TAB:RB->hmask, 0; je ->fff_res0 - | mov FCARG1, TAB:RB - | mov RB, BASE // Save BASE. - | mov FCARG2, RD // Caveat: FCARG2 == BASE - | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key) - | // cTValue * or NULL returned in eax (RD). - | mov BASE, RB - | test RD, RD - | jnz <1 - |->fff_res0: - | mov RD, 1+0 - | jmp ->fff_res - | - |.ffunc_1 ipairs - | mov TAB:RB, [BASE] - | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback -#if LJ_52 - | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback -#endif - | mov CFUNC:RB, [BASE-8] - | mov CFUNC:RD, CFUNC:RB->upvalue[0] - | mov PC, [BASE-4] - | mov dword [BASE-4], LJ_TFUNC - | mov [BASE-8], CFUNC:RD - |.if DUALNUM - | mov dword [BASE+12], LJ_TISNUM - | mov dword [BASE+8], 0 - |.else - | xorps xmm0, xmm0 - | movsd qword [BASE+8], xmm0 - |.endif - | mov RD, 1+3 - | jmp ->fff_res - | - |//-- Base library: catch errors ---------------------------------------- - | - |.ffunc_1 pcall - | lea RA, [BASE+8] - | sub NARGS:RD, 1 - | mov PC, 8+FRAME_PCALL - |1: - | movzx RB, byte [DISPATCH+DISPATCH_GL(hookmask)] - | shr RB, HOOK_ACTIVE_SHIFT - | and RB, 1 - | add PC, RB // Remember active hook before pcall. - | jmp ->vm_call_dispatch - | - |.ffunc_2 xpcall - | cmp dword [BASE+12], LJ_TFUNC; jne ->fff_fallback - | mov RB, [BASE+4] // Swap function and traceback. - | mov [BASE+12], RB - | mov dword [BASE+4], LJ_TFUNC - | mov LFUNC:RB, [BASE] - | mov PC, [BASE+8] - | mov [BASE+8], LFUNC:RB - | mov [BASE], PC - | lea RA, [BASE+16] - | sub NARGS:RD, 2 - | mov PC, 16+FRAME_PCALL - | jmp <1 - | - |//-- Coroutine library -------------------------------------------------- - | - |.macro coroutine_resume_wrap, resume - |.if resume - |.ffunc_1 coroutine_resume - | mov L:RB, [BASE] - |.else - |.ffunc coroutine_wrap_aux - | mov CFUNC:RB, [BASE-8] - | mov L:RB, CFUNC:RB->upvalue[0].gcr - |.endif - | mov PC, [BASE-4] - | mov SAVE_PC, PC - |.if X64 - | mov TMP1, L:RB - |.else - | mov ARG1, L:RB - |.endif - |.if resume - | cmp dword [BASE+4], LJ_TTHREAD; jne ->fff_fallback - |.endif - | cmp aword L:RB->cframe, 0; jne ->fff_fallback - | cmp byte L:RB->status, LUA_YIELD; ja ->fff_fallback - | mov RA, L:RB->top - | je >1 // Status != LUA_YIELD (i.e. 0)? - | cmp RA, L:RB->base // Check for presence of initial func. - | je ->fff_fallback - |1: - |.if resume - | lea PC, [RA+NARGS:RD*8-16] // Check stack space (-1-thread). - |.else - | lea PC, [RA+NARGS:RD*8-8] // Check stack space (-1). - |.endif - | cmp PC, L:RB->maxstack; ja ->fff_fallback - | mov L:RB->top, PC - | - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - |.if resume - | add BASE, 8 // Keep resumed thread in stack for GC. - |.endif - | mov L:RB->top, BASE - |.if resume - | lea RB, [BASE+NARGS:RD*8-24] // RB = end of source for stack move. - |.else - | lea RB, [BASE+NARGS:RD*8-16] // RB = end of source for stack move. - |.endif - | sub RBa, PCa // Relative to PC. - | - | cmp PC, RA - | je >3 - |2: // Move args to coroutine. - |.if X64 - | mov RCa, [PC+RB] - | mov [PC-8], RCa - |.else - | mov RC, [PC+RB+4] - | mov [PC-4], RC - | mov RC, [PC+RB] - | mov [PC-8], RC - |.endif - | sub PC, 8 - | cmp PC, RA - | jne <2 - |3: - |.if X64 - | mov CARG2d, RA - | mov CARG1d, TMP1 - |.else - | mov ARG2, RA - | xor RA, RA - | mov ARG4, RA - | mov ARG3, RA - |.endif - | call ->vm_resume // (lua_State *L, TValue *base, 0, 0) - | - | mov L:RB, SAVE_L - |.if X64 - | mov L:PC, TMP1 - |.else - | mov L:PC, ARG1 // The callee doesn't modify SAVE_L. - |.endif - | mov BASE, L:RB->base - | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB - | set_vmstate INTERP - | - | cmp eax, LUA_YIELD - | ja >8 - |4: - | mov RA, L:PC->base - | mov KBASE, L:PC->top - | mov L:PC->top, RA // Clear coroutine stack. - | mov PC, KBASE - | sub PC, RA - | je >6 // No results? - | lea RD, [BASE+PC] - | shr PC, 3 - | cmp RD, L:RB->maxstack - | ja >9 // Need to grow stack? - | - | mov RB, BASE - | sub RBa, RAa - |5: // Move results from coroutine. - |.if X64 - | mov RDa, [RA] - | mov [RA+RB], RDa - |.else - | mov RD, [RA] - | mov [RA+RB], RD - | mov RD, [RA+4] - | mov [RA+RB+4], RD - |.endif - | add RA, 8 - | cmp RA, KBASE - | jne <5 - |6: - |.if resume - | lea RD, [PC+2] // nresults+1 = 1 + true + results. - | mov dword [BASE-4], LJ_TTRUE // Prepend true to results. - |.else - | lea RD, [PC+1] // nresults+1 = 1 + results. - |.endif - |7: - | mov PC, SAVE_PC - | mov MULTRES, RD - |.if resume - | mov RAa, -8 - |.else - | xor RA, RA - |.endif - | test PC, FRAME_TYPE - | jz ->BC_RET_Z - | jmp ->vm_return - | - |8: // Coroutine returned with error (at co->top-1). - |.if resume - | mov dword [BASE-4], LJ_TFALSE // Prepend false to results. - | mov RA, L:PC->top - | sub RA, 8 - | mov L:PC->top, RA // Clear error from coroutine stack. - | // Copy error message. - |.if X64 - | mov RDa, [RA] - | mov [BASE], RDa - |.else - | mov RD, [RA] - | mov [BASE], RD - | mov RD, [RA+4] - | mov [BASE+4], RD - |.endif - | mov RD, 1+2 // nresults+1 = 1 + false + error. - | jmp <7 - |.else - | mov FCARG2, L:PC - | mov FCARG1, L:RB - | call extern lj_ffh_coroutine_wrap_err@8 // (lua_State *L, lua_State *co) - | // Error function does not return. - |.endif - | - |9: // Handle stack expansion on return from yield. - |.if X64 - | mov L:RA, TMP1 - |.else - | mov L:RA, ARG1 // The callee doesn't modify SAVE_L. - |.endif - | mov L:RA->top, KBASE // Undo coroutine stack clearing. - | mov FCARG2, PC - | mov FCARG1, L:RB - | call extern lj_state_growstack@8 // (lua_State *L, int n) - |.if X64 - | mov L:PC, TMP1 - |.else - | mov L:PC, ARG1 - |.endif - | mov BASE, L:RB->base - | jmp <4 // Retry the stack move. - |.endmacro - | - | coroutine_resume_wrap 1 // coroutine.resume - | coroutine_resume_wrap 0 // coroutine.wrap - | - |.ffunc coroutine_yield - | mov L:RB, SAVE_L - | test aword L:RB->cframe, CFRAME_RESUME - | jz ->fff_fallback - | mov L:RB->base, BASE - | lea RD, [BASE+NARGS:RD*8-8] - | mov L:RB->top, RD - | xor RD, RD - | mov aword L:RB->cframe, RDa - | mov al, LUA_YIELD - | mov byte L:RB->status, al - | jmp ->vm_leave_unw - | - |//-- Math library ------------------------------------------------------- - | - |.if not DUALNUM - |->fff_resi: // Dummy. - |.endif - | - |->fff_resn: - | mov PC, [BASE-4] - | fstp qword [BASE-8] - | jmp ->fff_res1 - | - | .ffunc_1 math_abs - |.if DUALNUM - | cmp dword [BASE+4], LJ_TISNUM; jne >2 - | mov RB, dword [BASE] - | cmp RB, 0; jns ->fff_resi - | neg RB; js >1 - |->fff_resbit: - |->fff_resi: - | mov PC, [BASE-4] - | mov dword [BASE-4], LJ_TISNUM - | mov dword [BASE-8], RB - | jmp ->fff_res1 - |1: - | mov PC, [BASE-4] - | mov dword [BASE-4], 0x41e00000 // 2^31. - | mov dword [BASE-8], 0 - | jmp ->fff_res1 - |2: - | ja ->fff_fallback - |.else - | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback - |.endif - | movsd xmm0, qword [BASE] - | sseconst_abs xmm1, RDa - | andps xmm0, xmm1 - |->fff_resxmm0: - | mov PC, [BASE-4] - | movsd qword [BASE-8], xmm0 - | // fallthrough - | - |->fff_res1: - | mov RD, 1+1 - |->fff_res: - | mov MULTRES, RD - |->fff_res_: - | test PC, FRAME_TYPE - | jnz >7 - |5: - | cmp PC_RB, RDL // More results expected? - | ja >6 - | // Adjust BASE. KBASE is assumed to be set for the calling frame. - | movzx RA, PC_RA - | not RAa // Note: ~RA = -(RA+1) - | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8 - | ins_next - | - |6: // Fill up results with nil. - | mov dword [BASE+RD*8-12], LJ_TNIL - | add RD, 1 - | jmp <5 - | - |7: // Non-standard return case. - | mov RAa, -8 // Results start at BASE+RA = BASE-8. - | jmp ->vm_return - | - |.if X64 - |.define fff_resfp, fff_resxmm0 - |.else - |.define fff_resfp, fff_resn - |.endif - | - |.macro math_round, func - | .ffunc math_ .. func - |.if DUALNUM - | cmp dword [BASE+4], LJ_TISNUM; jne >1 - | mov RB, dword [BASE]; jmp ->fff_resi - |1: - | ja ->fff_fallback - |.else - | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback - |.endif - | movsd xmm0, qword [BASE] - | call ->vm_ .. func .. _sse - |.if DUALNUM - | cvttsd2si RB, xmm0 - | cmp RB, 0x80000000 - | jne ->fff_resi - | cvtsi2sd xmm1, RB - | ucomisd xmm0, xmm1 - | jp ->fff_resxmm0 - | je ->fff_resi - |.endif - | jmp ->fff_resxmm0 - |.endmacro - | - | math_round floor - | math_round ceil - | - |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 - | - |.ffunc math_log - | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. - | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback - | movsd xmm0, qword [BASE] - |.if not X64 - | movsd FPARG1, xmm0 - |.endif - | mov RB, BASE - | call extern log - | mov BASE, RB - | jmp ->fff_resfp - | - |.macro math_extern, func - | .ffunc_nsse math_ .. func - |.if not X64 - | movsd FPARG1, xmm0 - |.endif - | mov RB, BASE - | call extern func - | mov BASE, RB - | jmp ->fff_resfp - |.endmacro - | - |.macro math_extern2, func - | .ffunc_nnsse math_ .. func - |.if not X64 - | movsd FPARG1, xmm0 - | movsd FPARG3, xmm1 - |.endif - | mov RB, BASE - | call extern func - | mov BASE, RB - | jmp ->fff_resfp - |.endmacro - | - | math_extern log10 - | math_extern exp - | math_extern sin - | math_extern cos - | math_extern tan - | math_extern asin - | math_extern acos - | math_extern atan - | math_extern sinh - | math_extern cosh - | math_extern tanh - | math_extern2 pow - | math_extern2 atan2 - | math_extern2 fmod - | - |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn - | - |.ffunc_1 math_frexp - | mov RB, [BASE+4] - | cmp RB, LJ_TISNUM; jae ->fff_fallback - | mov PC, [BASE-4] - | mov RC, [BASE] - | mov [BASE-4], RB; mov [BASE-8], RC - | shl RB, 1; cmp RB, 0xffe00000; jae >3 - | or RC, RB; jz >3 - | mov RC, 1022 - | cmp RB, 0x00200000; jb >4 - |1: - | shr RB, 21; sub RB, RC // Extract and unbias exponent. - | cvtsi2sd xmm0, RB - | mov RB, [BASE-4] - | and RB, 0x800fffff // Mask off exponent. - | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. - | mov [BASE-4], RB - |2: - | movsd qword [BASE], xmm0 - | mov RD, 1+2 - | jmp ->fff_res - |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. - | xorps xmm0, xmm0; jmp <2 - |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. - | movsd xmm0, qword [BASE] - | sseconst_hi xmm1, RBa, 43500000 // 2^54. - | mulsd xmm0, xmm1 - | movsd qword [BASE-8], xmm0 - | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 - | - |.ffunc_nsse math_modf - | mov RB, [BASE+4] - | mov PC, [BASE-4] - | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? - | movaps xmm4, xmm0 - | call ->vm_trunc_sse - | subsd xmm4, xmm0 - |1: - | movsd qword [BASE-8], xmm0 - | movsd qword [BASE], xmm4 - | mov RC, [BASE-4]; mov RB, [BASE+4] - | xor RC, RB; js >3 // Need to adjust sign? - |2: - | mov RD, 1+2 - | jmp ->fff_res - |3: - | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. - | jmp <2 - |4: - | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. - | - |.macro math_minmax, name, cmovop, sseop - | .ffunc name - | mov RA, 2 - | cmp dword [BASE+4], LJ_TISNUM - |.if DUALNUM - | jne >4 - | mov RB, dword [BASE] - |1: // Handle integers. - | cmp RA, RD; jae ->fff_resi - | cmp dword [BASE+RA*8-4], LJ_TISNUM; jne >3 - | cmp RB, dword [BASE+RA*8-8] - | cmovop RB, dword [BASE+RA*8-8] - | add RA, 1 - | jmp <1 - |3: - | ja ->fff_fallback - | // Convert intermediate result to number and continue below. - | cvtsi2sd xmm0, RB - | jmp >6 - |4: - | ja ->fff_fallback - |.else - | jae ->fff_fallback - |.endif - | - | movsd xmm0, qword [BASE] - |5: // Handle numbers or integers. - | cmp RA, RD; jae ->fff_resxmm0 - | cmp dword [BASE+RA*8-4], LJ_TISNUM - |.if DUALNUM - | jb >6 - | ja ->fff_fallback - | cvtsi2sd xmm1, dword [BASE+RA*8-8] - | jmp >7 - |.else - | jae ->fff_fallback - |.endif - |6: - | movsd xmm1, qword [BASE+RA*8-8] - |7: - | sseop xmm0, xmm1 - | add RA, 1 - | jmp <5 - |.endmacro - | - | math_minmax math_min, cmovg, minsd - | math_minmax math_max, cmovl, maxsd - | - |//-- String library ----------------------------------------------------- - | - |.ffunc string_byte // Only handle the 1-arg case here. - | cmp NARGS:RD, 1+1; jne ->fff_fallback - | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback - | mov STR:RB, [BASE] - | mov PC, [BASE-4] - | cmp dword STR:RB->len, 1 - | jb ->fff_res0 // Return no results for empty string. - | movzx RB, byte STR:RB[1] - |.if DUALNUM - | jmp ->fff_resi - |.else - | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0 - |.endif - | - |.ffunc string_char // Only handle the 1-arg case here. - | ffgccheck - | cmp NARGS:RD, 1+1; jne ->fff_fallback // *Exactly* 1 arg. - | cmp dword [BASE+4], LJ_TISNUM - |.if DUALNUM - | jne ->fff_fallback - | mov RB, dword [BASE] - | cmp RB, 255; ja ->fff_fallback - | mov TMP2, RB - |.else - | jae ->fff_fallback - | cvttsd2si RB, qword [BASE] - | cmp RB, 255; ja ->fff_fallback - | mov TMP2, RB - |.endif - |.if X64 - | mov TMP3, 1 - |.else - | mov ARG3, 1 - |.endif - | lea RDa, TMP2 // Points to stack. Little-endian. - |->fff_newstr: - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - |.if X64 - | mov CARG3d, TMP3 // Zero-extended to size_t. - | mov CARG2, RDa // May be 64 bit ptr to stack. - | mov CARG1d, L:RB - |.else - | mov ARG2, RD - | mov ARG1, L:RB - |.endif - | mov SAVE_PC, PC - | call extern lj_str_new // (lua_State *L, char *str, size_t l) - |->fff_resstr: - | // GCstr * returned in eax (RD). - | mov BASE, L:RB->base - | mov PC, [BASE-4] - | mov dword [BASE-4], LJ_TSTR - | mov [BASE-8], STR:RD - | jmp ->fff_res1 - | - |.ffunc string_sub - | ffgccheck - | mov TMP2, -1 - | cmp NARGS:RD, 1+2; jb ->fff_fallback - | jna >1 - | cmp dword [BASE+20], LJ_TISNUM - |.if DUALNUM - | jne ->fff_fallback - | mov RB, dword [BASE+16] - | mov TMP2, RB - |.else - | jae ->fff_fallback - | cvttsd2si RB, qword [BASE+16] - | mov TMP2, RB - |.endif - |1: - | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback - | cmp dword [BASE+12], LJ_TISNUM - |.if DUALNUM - | jne ->fff_fallback - |.else - | jae ->fff_fallback - |.endif - | mov STR:RB, [BASE] - | mov TMP3, STR:RB - | mov RB, STR:RB->len - |.if DUALNUM - | mov RA, dword [BASE+8] - |.else - | cvttsd2si RA, qword [BASE+8] - |.endif - | mov RC, TMP2 - | cmp RB, RC // len < end? (unsigned compare) - | jb >5 - |2: - | test RA, RA // start <= 0? - | jle >7 - |3: - | mov STR:RB, TMP3 - | sub RC, RA // start > end? - | jl ->fff_emptystr - | lea RB, [STR:RB+RA+#STR-1] - | add RC, 1 - |4: - |.if X64 - | mov TMP3, RC - |.else - | mov ARG3, RC - |.endif - | mov RD, RB - | jmp ->fff_newstr - | - |5: // Negative end or overflow. - | jl >6 - | lea RC, [RC+RB+1] // end = end+(len+1) - | jmp <2 - |6: // Overflow. - | mov RC, RB // end = len - | jmp <2 - | - |7: // Negative start or underflow. - | je >8 - | add RA, RB // start = start+(len+1) - | add RA, 1 - | jg <3 // start > 0? - |8: // Underflow. - | mov RA, 1 // start = 1 - | jmp <3 - | - |->fff_emptystr: // Range underflow. - | xor RC, RC // Zero length. Any ptr in RB is ok. - | jmp <4 - | - |.macro ffstring_op, name - | .ffunc_1 string_ .. name - | ffgccheck - | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback - | mov L:RB, SAVE_L - | lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)] - | mov L:RB->base, BASE - | mov STR:FCARG2, [BASE] // Caveat: FCARG2 == BASE - | mov RC, SBUF:FCARG1->b - | mov SBUF:FCARG1->L, L:RB - | mov SBUF:FCARG1->p, RC - | mov SAVE_PC, PC - | call extern lj_buf_putstr_ .. name .. @8 - | mov FCARG1, eax - | call extern lj_buf_tostr@4 - | jmp ->fff_resstr - |.endmacro - | - |ffstring_op reverse - |ffstring_op lower - |ffstring_op upper - | - |//-- Bit library -------------------------------------------------------- - | - |.macro .ffunc_bit, name, kind, fdef - | fdef name - |.if kind == 2 - | sseconst_tobit xmm1, RBa - |.endif - | cmp dword [BASE+4], LJ_TISNUM - |.if DUALNUM - | jne >1 - | mov RB, dword [BASE] - |.if kind > 0 - | jmp >2 - |.else - | jmp ->fff_resbit - |.endif - |1: - | ja ->fff_fallback - |.else - | jae ->fff_fallback - |.endif - | movsd xmm0, qword [BASE] - |.if kind < 2 - | sseconst_tobit xmm1, RBa - |.endif - | addsd xmm0, xmm1 - | movd RB, xmm0 - |2: - |.endmacro - | - |.macro .ffunc_bit, name, kind - | .ffunc_bit name, kind, .ffunc_1 - |.endmacro - | - |.ffunc_bit bit_tobit, 0 - | jmp ->fff_resbit - | - |.macro .ffunc_bit_op, name, ins - | .ffunc_bit name, 2 - | mov TMP2, NARGS:RD // Save for fallback. - | lea RD, [BASE+NARGS:RD*8-16] - |1: - | cmp RD, BASE - | jbe ->fff_resbit - | cmp dword [RD+4], LJ_TISNUM - |.if DUALNUM - | jne >2 - | ins RB, dword [RD] - | sub RD, 8 - | jmp <1 - |2: - | ja ->fff_fallback_bit_op - |.else - | jae ->fff_fallback_bit_op - |.endif - | movsd xmm0, qword [RD] - | addsd xmm0, xmm1 - | movd RA, xmm0 - | ins RB, RA - | sub RD, 8 - | jmp <1 - |.endmacro - | - |.ffunc_bit_op bit_band, and - |.ffunc_bit_op bit_bor, or - |.ffunc_bit_op bit_bxor, xor - | - |.ffunc_bit bit_bswap, 1 - | bswap RB - | jmp ->fff_resbit - | - |.ffunc_bit bit_bnot, 1 - | not RB - |.if DUALNUM - | jmp ->fff_resbit - |.else - |->fff_resbit: - | cvtsi2sd xmm0, RB - | jmp ->fff_resxmm0 - |.endif - | - |->fff_fallback_bit_op: - | mov NARGS:RD, TMP2 // Restore for fallback - | jmp ->fff_fallback - | - |.macro .ffunc_bit_sh, name, ins - |.if DUALNUM - | .ffunc_bit name, 1, .ffunc_2 - | // Note: no inline conversion from number for 2nd argument! - | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback - | mov RA, dword [BASE+8] - |.else - | .ffunc_nnsse name - | sseconst_tobit xmm2, RBa - | addsd xmm0, xmm2 - | addsd xmm1, xmm2 - | movd RB, xmm0 - | movd RA, xmm1 - |.endif - | ins RB, cl // Assumes RA is ecx. - | jmp ->fff_resbit - |.endmacro - | - |.ffunc_bit_sh bit_lshift, shl - |.ffunc_bit_sh bit_rshift, shr - |.ffunc_bit_sh bit_arshift, sar - |.ffunc_bit_sh bit_rol, rol - |.ffunc_bit_sh bit_ror, ror - | - |//----------------------------------------------------------------------- - | - |->fff_fallback_2: - | mov NARGS:RD, 1+2 // Other args are ignored, anyway. - | jmp ->fff_fallback - |->fff_fallback_1: - | mov NARGS:RD, 1+1 // Other args are ignored, anyway. - |->fff_fallback: // Call fast function fallback handler. - | // BASE = new base, RD = nargs+1 - | mov L:RB, SAVE_L - | mov PC, [BASE-4] // Fallback may overwrite PC. - | mov SAVE_PC, PC // Redundant (but a defined value). - | mov L:RB->base, BASE - | lea RD, [BASE+NARGS:RD*8-8] - | lea RA, [RD+8*LUA_MINSTACK] // Ensure enough space for handler. - | mov L:RB->top, RD - | mov CFUNC:RD, [BASE-8] - | cmp RA, L:RB->maxstack - | ja >5 // Need to grow stack. - |.if X64 - | mov CARG1d, L:RB - |.else - | mov ARG1, L:RB - |.endif - | call aword CFUNC:RD->f // (lua_State *L) - | mov BASE, L:RB->base - | // Either throws an error, or recovers and returns -1, 0 or nresults+1. - | test RD, RD; jg ->fff_res // Returned nresults+1? - |1: - | mov RA, L:RB->top - | sub RA, BASE - | shr RA, 3 - | test RD, RD - | lea NARGS:RD, [RA+1] - | mov LFUNC:RB, [BASE-8] - | jne ->vm_call_tail // Returned -1? - | ins_callt // Returned 0: retry fast path. - | - |// Reconstruct previous base for vmeta_call during tailcall. - |->vm_call_tail: - | mov RA, BASE - | test PC, FRAME_TYPE - | jnz >3 - | movzx RB, PC_RA - | not RBa // Note: ~RB = -(RB+1) - | lea BASE, [BASE+RB*8] // base = base - (RB+1)*8 - | jmp ->vm_call_dispatch // Resolve again for tailcall. - |3: - | mov RB, PC - | and RB, -8 - | sub BASE, RB - | jmp ->vm_call_dispatch // Resolve again for tailcall. - | - |5: // Grow stack for fallback handler. - | mov FCARG2, LUA_MINSTACK - | mov FCARG1, L:RB - | call extern lj_state_growstack@8 // (lua_State *L, int n) - | mov BASE, L:RB->base - | xor RD, RD // Simulate a return 0. - | jmp <1 // Dumb retry (goes through ff first). - | - |->fff_gcstep: // Call GC step function. - | // BASE = new base, RD = nargs+1 - | pop RBa // Must keep stack at same level. - | mov TMPa, RBa // Save return address - | mov L:RB, SAVE_L - | mov SAVE_PC, PC // Redundant (but a defined value). - | mov L:RB->base, BASE - | lea RD, [BASE+NARGS:RD*8-8] - | mov FCARG1, L:RB - | mov L:RB->top, RD - | call extern lj_gc_step@4 // (lua_State *L) - | mov BASE, L:RB->base - | mov RD, L:RB->top - | sub RD, BASE - | shr RD, 3 - | add NARGS:RD, 1 - | mov RBa, TMPa - | push RBa // Restore return address. - | ret - | - |//----------------------------------------------------------------------- - |//-- Special dispatch targets ------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_record: // Dispatch target for recording phase. - |.if JIT - | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)] - | test RDL, HOOK_VMEVENT // No recording while in vmevent. - | jnz >5 - | // Decrement the hookcount for consistency, but always do the call. - | test RDL, HOOK_ACTIVE - | jnz >1 - | test RDL, LUA_MASKLINE|LUA_MASKCOUNT - | jz >1 - | dec dword [DISPATCH+DISPATCH_GL(hookcount)] - | jmp >1 - |.endif - | - |->vm_rethook: // Dispatch target for return hooks. - | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)] - | test RDL, HOOK_ACTIVE // Hook already active? - | jnz >5 - | jmp >1 - | - |->vm_inshook: // Dispatch target for instr/line hooks. - | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)] - | test RDL, HOOK_ACTIVE // Hook already active? - | jnz >5 - | - | test RDL, LUA_MASKLINE|LUA_MASKCOUNT - | jz >5 - | dec dword [DISPATCH+DISPATCH_GL(hookcount)] - | jz >1 - | test RDL, LUA_MASKLINE - | jz >5 - |1: - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - | mov FCARG2, PC // Caveat: FCARG2 == BASE - | mov FCARG1, L:RB - | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. - | call extern lj_dispatch_ins@8 // (lua_State *L, const BCIns *pc) - |3: - | mov BASE, L:RB->base - |4: - | movzx RA, PC_RA - |5: - | movzx OP, PC_OP - | movzx RD, PC_RD - |.if X64 - | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins. - |.else - | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] // Re-dispatch to static ins. - |.endif - | - |->cont_hook: // Continue from hook yield. - | add PC, 4 - | mov RA, [RB-24] - | mov MULTRES, RA // Restore MULTRES for *M ins. - | jmp <4 - | - |->vm_hotloop: // Hot loop counter underflow. - |.if JIT - | mov LFUNC:RB, [BASE-8] // Same as curr_topL(L). - | mov RB, LFUNC:RB->pc - | movzx RD, byte [RB+PC2PROTO(framesize)] - | lea RD, [BASE+RD*8] - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - | mov L:RB->top, RD - | mov FCARG2, PC - | lea FCARG1, [DISPATCH+GG_DISP2J] - | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa - | mov SAVE_PC, PC - | call extern lj_trace_hot@8 // (jit_State *J, const BCIns *pc) - | jmp <3 - |.endif - | - |->vm_callhook: // Dispatch target for call hooks. - | mov SAVE_PC, PC - |.if JIT - | jmp >1 - |.endif - | - |->vm_hotcall: // Hot call counter underflow. - |.if JIT - | mov SAVE_PC, PC - | or PC, 1 // Marker for hot call. - |1: - |.endif - | lea RD, [BASE+NARGS:RD*8-8] - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - | mov L:RB->top, RD - | mov FCARG2, PC - | mov FCARG1, L:RB - | call extern lj_dispatch_call@8 // (lua_State *L, const BCIns *pc) - | // ASMFunction returned in eax/rax (RDa). - | mov SAVE_PC, 0 // Invalidate for subsequent line hook. - |.if JIT - | and PC, -2 - |.endif - | mov BASE, L:RB->base - | mov RAa, RDa - | mov RD, L:RB->top - | sub RD, BASE - | mov RBa, RAa - | movzx RA, PC_RA - | shr RD, 3 - | add NARGS:RD, 1 - | jmp RBa - | - |->cont_stitch: // Trace stitching. - |.if JIT - | // BASE = base, RC = result, RB = mbase - | mov TRACE:RA, [RB-24] // Save previous trace. - | mov TMP1, TRACE:RA - | mov TMP3, DISPATCH // Need one more register. - | mov DISPATCH, MULTRES - | movzx RA, PC_RA - | lea RA, [BASE+RA*8] // Call base. - | sub DISPATCH, 1 - | jz >2 - |1: // Move results down. - |.if X64 - | mov RBa, [RC] - | mov [RA], RBa - |.else - | mov RB, [RC] - | mov [RA], RB - | mov RB, [RC+4] - | mov [RA+4], RB - |.endif - | add RC, 8 - | add RA, 8 - | sub DISPATCH, 1 - | jnz <1 - |2: - | movzx RC, PC_RA - | movzx RB, PC_RB - | add RC, RB - | lea RC, [BASE+RC*8-8] - |3: - | cmp RC, RA - | ja >9 // More results wanted? - | - | mov DISPATCH, TMP3 - | mov TRACE:RD, TMP1 // Get previous trace. - | movzx RB, word TRACE:RD->traceno - | movzx RD, word TRACE:RD->link - | cmp RD, RB - | je ->cont_nop // Blacklisted. - | test RD, RD - | jne =>BC_JLOOP // Jump to stitched trace. - | - | // Stitch a new trace to the previous trace. - | mov [DISPATCH+DISPATCH_J(exitno)], RB - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - | mov FCARG2, PC - | lea FCARG1, [DISPATCH+GG_DISP2J] - | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa - | call extern lj_dispatch_stitch@8 // (jit_State *J, const BCIns *pc) - | mov BASE, L:RB->base - | jmp ->cont_nop - | - |9: // Fill up results with nil. - | mov dword [RA+4], LJ_TNIL - | add RA, 8 - | jmp <3 - |.endif - | - |->vm_profhook: // Dispatch target for profiler hook. -#if LJ_HASPROFILE - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - | mov FCARG2, PC // Caveat: FCARG2 == BASE - | mov FCARG1, L:RB - | call extern lj_dispatch_profile@8 // (lua_State *L, const BCIns *pc) - | mov BASE, L:RB->base - | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. - | sub PC, 4 - | jmp ->cont_nop -#endif - | - |//----------------------------------------------------------------------- - |//-- Trace exit handler ------------------------------------------------- - |//----------------------------------------------------------------------- - | - |// Called from an exit stub with the exit number on the stack. - |// The 16 bit exit number is stored with two (sign-extended) push imm8. - |->vm_exit_handler: - |.if JIT - |.if X64 - | push r13; push r12 - | push r11; push r10; push r9; push r8 - | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp - | push rbx; push rdx; push rcx; push rax - | movzx RC, byte [rbp-8] // Reconstruct exit number. - | mov RCH, byte [rbp-16] - | mov [rbp-8], r15; mov [rbp-16], r14 - |.else - | push ebp; lea ebp, [esp+12]; push ebp - | push ebx; push edx; push ecx; push eax - | movzx RC, byte [ebp-4] // Reconstruct exit number. - | mov RCH, byte [ebp-8] - | mov [ebp-4], edi; mov [ebp-8], esi - |.endif - | // Caveat: DISPATCH is ebx. - | mov DISPATCH, [ebp] - | mov RA, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number. - | set_vmstate EXIT - | mov [DISPATCH+DISPATCH_J(exitno)], RC - | mov [DISPATCH+DISPATCH_J(parent)], RA - |.if X64 - |.if X64WIN - | sub rsp, 16*8+4*8 // Room for SSE regs + save area. - |.else - | sub rsp, 16*8 // Room for SSE regs. - |.endif - | add rbp, -128 - | movsd qword [rbp-8], xmm15; movsd qword [rbp-16], xmm14 - | movsd qword [rbp-24], xmm13; movsd qword [rbp-32], xmm12 - | movsd qword [rbp-40], xmm11; movsd qword [rbp-48], xmm10 - | movsd qword [rbp-56], xmm9; movsd qword [rbp-64], xmm8 - | movsd qword [rbp-72], xmm7; movsd qword [rbp-80], xmm6 - | movsd qword [rbp-88], xmm5; movsd qword [rbp-96], xmm4 - | movsd qword [rbp-104], xmm3; movsd qword [rbp-112], xmm2 - | movsd qword [rbp-120], xmm1; movsd qword [rbp-128], xmm0 - |.else - | sub esp, 8*8+16 // Room for SSE regs + args. - | movsd qword [ebp-40], xmm7; movsd qword [ebp-48], xmm6 - | movsd qword [ebp-56], xmm5; movsd qword [ebp-64], xmm4 - | movsd qword [ebp-72], xmm3; movsd qword [ebp-80], xmm2 - | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0 - |.endif - | // Caveat: RB is ebp. - | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)] - | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] - | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa - | mov L:RB->base, BASE - |.if X64WIN - | lea CARG2, [rsp+4*8] - |.elif X64 - | mov CARG2, rsp - |.else - | lea FCARG2, [esp+16] - |.endif - | lea FCARG1, [DISPATCH+GG_DISP2J] - | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0 - | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex) - | // MULTRES or negated error code returned in eax (RD). - | mov RAa, L:RB->cframe - | and RAa, CFRAME_RAWMASK - |.if X64WIN - | // Reposition stack later. - |.elif X64 - | mov rsp, RAa // Reposition stack to C frame. - |.else - | mov esp, RAa // Reposition stack to C frame. - |.endif - | mov [RAa+CFRAME_OFS_L], L:RB // Set SAVE_L (on-trace resume/yield). - | mov BASE, L:RB->base - | mov PC, [RAa+CFRAME_OFS_PC] // Get SAVE_PC. - |.if X64 - | jmp >1 - |.endif - |.endif - |->vm_exit_interp: - | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set. - |.if JIT - |.if X64 - | // Restore additional callee-save registers only used in compiled code. - |.if X64WIN - | lea RAa, [rsp+9*16+4*8] - |1: - | movdqa xmm15, [RAa-9*16] - | movdqa xmm14, [RAa-8*16] - | movdqa xmm13, [RAa-7*16] - | movdqa xmm12, [RAa-6*16] - | movdqa xmm11, [RAa-5*16] - | movdqa xmm10, [RAa-4*16] - | movdqa xmm9, [RAa-3*16] - | movdqa xmm8, [RAa-2*16] - | movdqa xmm7, [RAa-1*16] - | mov rsp, RAa // Reposition stack to C frame. - | movdqa xmm6, [RAa] - | mov r15, CSAVE_3 - | mov r14, CSAVE_4 - |.else - | add rsp, 16 // Reposition stack to C frame. - |1: - |.endif - | mov r13, TMPa - | mov r12, TMPQ - |.endif - | test RD, RD; js >9 // Check for error from exit. - | mov L:RB, SAVE_L - | mov MULTRES, RD - | mov LFUNC:KBASE, [BASE-8] - | mov KBASE, LFUNC:KBASE->pc - | mov KBASE, [KBASE+PC2PROTO(k)] - | mov L:RB->base, BASE - | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0 - | set_vmstate INTERP - | // Modified copy of ins_next which handles function header dispatch, too. - | mov RC, [PC] - | movzx RA, RCH - | movzx OP, RCL - | add PC, 4 - | shr RC, 16 - | cmp OP, BC_FUNCF // Function header? - | jb >3 - | cmp OP, BC_FUNCC+2 // Fast function? - | jae >4 - |2: - | mov RC, MULTRES // RC/RD holds nres+1. - |3: - |.if X64 - | jmp aword [DISPATCH+OP*8] - |.else - | jmp aword [DISPATCH+OP*4] - |.endif - | - |4: // Check frame below fast function. - | mov RC, [BASE-4] - | test RC, FRAME_TYPE - | jnz <2 // Trace stitching continuation? - | // Otherwise set KBASE for Lua function below fast function. - | movzx RC, byte [RC-3] - | not RCa - | mov LFUNC:KBASE, [BASE+RC*8-8] - | mov KBASE, LFUNC:KBASE->pc - | mov KBASE, [KBASE+PC2PROTO(k)] - | jmp <2 - | - |9: // Rethrow error from the right C frame. - | neg RD - | mov FCARG1, L:RB - | mov FCARG2, RD - | call extern lj_err_throw@8 // (lua_State *L, int errcode) - |.endif - | - |//----------------------------------------------------------------------- - |//-- Math helper functions ---------------------------------------------- - |//----------------------------------------------------------------------- - | - |// FP value rounding. Called by math.floor/math.ceil fast functions - |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. - |.macro vm_round, name, mode, cond - |->name: - |.if not X64 and cond - | movsd xmm0, qword [esp+4] - | call ->name .. _sse - | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg. - | fld qword [esp+4] - | ret - |.endif - | - |->name .. _sse: - | sseconst_abs xmm2, RDa - | sseconst_2p52 xmm3, RDa - | movaps xmm1, xmm0 - | andpd xmm1, xmm2 // |x| - | ucomisd xmm3, xmm1 // No truncation if 2^52 <= |x|. - | jbe >1 - | andnpd xmm2, xmm0 // Isolate sign bit. - |.if mode == 2 // trunc(x)? - | movaps xmm0, xmm1 - | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52 - | subsd xmm1, xmm3 - | sseconst_1 xmm3, RDa - | cmpsd xmm0, xmm1, 1 // |x| < result? - | andpd xmm0, xmm3 - | subsd xmm1, xmm0 // If yes, subtract -1. - | orpd xmm1, xmm2 // Merge sign bit back in. - |.else - | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52 - | subsd xmm1, xmm3 - | orpd xmm1, xmm2 // Merge sign bit back in. - | .if mode == 1 // ceil(x)? - | sseconst_m1 xmm2, RDa // Must subtract -1 to preserve -0. - | cmpsd xmm0, xmm1, 6 // x > result? - | .else // floor(x)? - | sseconst_1 xmm2, RDa - | cmpsd xmm0, xmm1, 1 // x < result? - | .endif - | andpd xmm0, xmm2 - | subsd xmm1, xmm0 // If yes, subtract +-1. - |.endif - | movaps xmm0, xmm1 - |1: - | ret - |.endmacro - | - | vm_round vm_floor, 0, 1 - | vm_round vm_ceil, 1, JIT - | vm_round vm_trunc, 2, JIT - | - |// FP modulo x%y. Called by BC_MOD* and vm_arith. - |->vm_mod: - |// Args in xmm0/xmm1, return value in xmm0. - |// Caveat: xmm0-xmm5 and RC (eax) modified! - | movaps xmm5, xmm0 - | divsd xmm0, xmm1 - | sseconst_abs xmm2, RDa - | sseconst_2p52 xmm3, RDa - | movaps xmm4, xmm0 - | andpd xmm4, xmm2 // |x/y| - | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|. - | jbe >1 - | andnpd xmm2, xmm0 // Isolate sign bit. - | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52 - | subsd xmm4, xmm3 - | orpd xmm4, xmm2 // Merge sign bit back in. - | sseconst_1 xmm2, RDa - | cmpsd xmm0, xmm4, 1 // x/y < result? - | andpd xmm0, xmm2 - | subsd xmm4, xmm0 // If yes, subtract 1.0. - | movaps xmm0, xmm5 - | mulsd xmm1, xmm4 - | subsd xmm0, xmm1 - | ret - |1: - | mulsd xmm1, xmm0 - | movaps xmm0, xmm5 - | subsd xmm0, xmm1 - | ret - | - |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. - |->vm_powi_sse: - | cmp eax, 1; jle >6 // i<=1? - | // Now 1 < (unsigned)i <= 0x80000000. - |1: // Handle leading zeros. - | test eax, 1; jnz >2 - | mulsd xmm0, xmm0 - | shr eax, 1 - | jmp <1 - |2: - | shr eax, 1; jz >5 - | movaps xmm1, xmm0 - |3: // Handle trailing bits. - | mulsd xmm0, xmm0 - | shr eax, 1; jz >4 - | jnc <3 - | mulsd xmm1, xmm0 - | jmp <3 - |4: - | mulsd xmm0, xmm1 - |5: - | ret - |6: - | je <5 // x^1 ==> x - | jb >7 // x^0 ==> 1 - | neg eax - | call <1 - | sseconst_1 xmm1, RDa - | divsd xmm1, xmm0 - | movaps xmm0, xmm1 - | ret - |7: - | sseconst_1 xmm0, RDa - | ret - | - |//----------------------------------------------------------------------- - |//-- Miscellaneous functions -------------------------------------------- - |//----------------------------------------------------------------------- - | - |// int lj_vm_cpuid(uint32_t f, uint32_t res[4]) - |->vm_cpuid: - |.if X64 - | mov eax, CARG1d - | .if X64WIN; push rsi; mov rsi, CARG2; .endif - | push rbx - | xor ecx, ecx - | cpuid - | mov [rsi], eax - | mov [rsi+4], ebx - | mov [rsi+8], ecx - | mov [rsi+12], edx - | pop rbx - | .if X64WIN; pop rsi; .endif - | ret - |.else - | pushfd - | pop edx - | mov ecx, edx - | xor edx, 0x00200000 // Toggle ID bit in flags. - | push edx - | popfd - | pushfd - | pop edx - | xor eax, eax // Zero means no features supported. - | cmp ecx, edx - | jz >1 // No ID toggle means no CPUID support. - | mov eax, [esp+4] // Argument 1 is function number. - | push edi - | push ebx - | xor ecx, ecx - | cpuid - | mov edi, [esp+16] // Argument 2 is result area. - | mov [edi], eax - | mov [edi+4], ebx - | mov [edi+8], ecx - | mov [edi+12], edx - | pop ebx - | pop edi - |1: - | ret - |.endif - | - |//----------------------------------------------------------------------- - |//-- Assertions --------------------------------------------------------- - |//----------------------------------------------------------------------- - | - |->assert_bad_for_arg_type: -#ifdef LUA_USE_ASSERT - | int3 -#endif - | int3 - | - |//----------------------------------------------------------------------- - |//-- FFI helper functions ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |// Handler for callback functions. Callback slot number in ah/al. - |->vm_ffi_callback: - |.if FFI - |.type CTSTATE, CTState, PC - |.if not X64 - | sub esp, 16 // Leave room for SAVE_ERRF etc. - |.endif - | saveregs_ // ebp/rbp already saved. ebp now holds global_State *. - | lea DISPATCH, [ebp+GG_G2DISP] - | mov CTSTATE, GL:ebp->ctype_state - | movzx eax, ax - | mov CTSTATE->cb.slot, eax - |.if X64 - | mov CTSTATE->cb.gpr[0], CARG1 - | mov CTSTATE->cb.gpr[1], CARG2 - | mov CTSTATE->cb.gpr[2], CARG3 - | mov CTSTATE->cb.gpr[3], CARG4 - | movsd qword CTSTATE->cb.fpr[0], xmm0 - | movsd qword CTSTATE->cb.fpr[1], xmm1 - | movsd qword CTSTATE->cb.fpr[2], xmm2 - | movsd qword CTSTATE->cb.fpr[3], xmm3 - |.if X64WIN - | lea rax, [rsp+CFRAME_SIZE+4*8] - |.else - | lea rax, [rsp+CFRAME_SIZE] - | mov CTSTATE->cb.gpr[4], CARG5 - | mov CTSTATE->cb.gpr[5], CARG6 - | movsd qword CTSTATE->cb.fpr[4], xmm4 - | movsd qword CTSTATE->cb.fpr[5], xmm5 - | movsd qword CTSTATE->cb.fpr[6], xmm6 - | movsd qword CTSTATE->cb.fpr[7], xmm7 - |.endif - | mov CTSTATE->cb.stack, rax - | mov CARG2, rsp - |.else - | lea eax, [esp+CFRAME_SIZE+16] - | mov CTSTATE->cb.gpr[0], FCARG1 - | mov CTSTATE->cb.gpr[1], FCARG2 - | mov CTSTATE->cb.stack, eax - | mov FCARG1, [esp+CFRAME_SIZE+12] // Move around misplaced retaddr/ebp. - | mov FCARG2, [esp+CFRAME_SIZE+8] - | mov SAVE_RET, FCARG1 - | mov SAVE_R4, FCARG2 - | mov FCARG2, esp - |.endif - | mov SAVE_PC, CTSTATE // Any value outside of bytecode is ok. - | mov FCARG1, CTSTATE - | call extern lj_ccallback_enter@8 // (CTState *cts, void *cf) - | // lua_State * returned in eax (RD). - | set_vmstate INTERP - | mov BASE, L:RD->base - | mov RD, L:RD->top - | sub RD, BASE - | mov LFUNC:RB, [BASE-8] - | shr RD, 3 - | add RD, 1 - | ins_callt - |.endif - | - |->cont_ffi_callback: // Return from FFI callback. - |.if FFI - | mov L:RA, SAVE_L - | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)] - | mov aword CTSTATE->L, L:RAa - | mov L:RA->base, BASE - | mov L:RA->top, RB - | mov FCARG1, CTSTATE - | mov FCARG2, RC - | call extern lj_ccallback_leave@8 // (CTState *cts, TValue *o) - |.if X64 - | mov rax, CTSTATE->cb.gpr[0] - | movsd xmm0, qword CTSTATE->cb.fpr[0] - | jmp ->vm_leave_unw - |.else - | mov L:RB, SAVE_L - | mov eax, CTSTATE->cb.gpr[0] - | mov edx, CTSTATE->cb.gpr[1] - | cmp dword CTSTATE->cb.gpr[2], 1 - | jb >7 - | je >6 - | fld qword CTSTATE->cb.fpr[0].d - | jmp >7 - |6: - | fld dword CTSTATE->cb.fpr[0].f - |7: - | mov ecx, L:RB->top - | movzx ecx, word [ecx+6] // Get stack adjustment and copy up. - | mov SAVE_L, ecx // Must be one slot above SAVE_RET - | restoreregs - | pop ecx // Move return addr from SAVE_RET. - | add esp, [esp] // Adjust stack. - | add esp, 16 - | push ecx - | ret - |.endif - |.endif - | - |->vm_ffi_call@4: // Call C function via FFI. - | // Caveat: needs special frame unwinding, see below. - |.if FFI - |.if X64 - | .type CCSTATE, CCallState, rbx - | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1 - |.else - | .type CCSTATE, CCallState, ebx - | push ebp; mov ebp, esp; push ebx; mov CCSTATE, FCARG1 - |.endif - | - | // Readjust stack. - |.if X64 - | mov eax, CCSTATE->spadj - | sub rsp, rax - |.else - | sub esp, CCSTATE->spadj - |.if WIN - | mov CCSTATE->spadj, esp - |.endif - |.endif - | - | // Copy stack slots. - | movzx ecx, byte CCSTATE->nsp - | sub ecx, 1 - | js >2 - |1: - |.if X64 - | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)] - | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax - |.else - | mov eax, [CCSTATE+ecx*4+offsetof(CCallState, stack)] - | mov [esp+ecx*4], eax - |.endif - | sub ecx, 1 - | jns <1 - |2: - | - |.if X64 - | movzx eax, byte CCSTATE->nfpr - | mov CARG1, CCSTATE->gpr[0] - | mov CARG2, CCSTATE->gpr[1] - | mov CARG3, CCSTATE->gpr[2] - | mov CARG4, CCSTATE->gpr[3] - |.if not X64WIN - | mov CARG5, CCSTATE->gpr[4] - | mov CARG6, CCSTATE->gpr[5] - |.endif - | test eax, eax; jz >5 - | movaps xmm0, CCSTATE->fpr[0] - | movaps xmm1, CCSTATE->fpr[1] - | movaps xmm2, CCSTATE->fpr[2] - | movaps xmm3, CCSTATE->fpr[3] - |.if not X64WIN - | cmp eax, 4; jbe >5 - | movaps xmm4, CCSTATE->fpr[4] - | movaps xmm5, CCSTATE->fpr[5] - | movaps xmm6, CCSTATE->fpr[6] - | movaps xmm7, CCSTATE->fpr[7] - |.endif - |5: - |.else - | mov FCARG1, CCSTATE->gpr[0] - | mov FCARG2, CCSTATE->gpr[1] - |.endif - | - | call aword CCSTATE->func - | - |.if X64 - | mov CCSTATE->gpr[0], rax - | movaps CCSTATE->fpr[0], xmm0 - |.if not X64WIN - | mov CCSTATE->gpr[1], rdx - | movaps CCSTATE->fpr[1], xmm1 - |.endif - |.else - | mov CCSTATE->gpr[0], eax - | mov CCSTATE->gpr[1], edx - | cmp byte CCSTATE->resx87, 1 - | jb >7 - | je >6 - | fstp qword CCSTATE->fpr[0].d[0] - | jmp >7 - |6: - | fstp dword CCSTATE->fpr[0].f[0] - |7: - |.if WIN - | sub CCSTATE->spadj, esp - |.endif - |.endif - | - |.if X64 - | mov rbx, [rbp-8]; leave; ret - |.else - | mov ebx, [ebp-4]; leave; ret - |.endif - |.endif - |// Note: vm_ffi_call must be the last function in this object file! - | - |//----------------------------------------------------------------------- -} - -/* Generate the code for a single instruction. */ -static void build_ins(BuildCtx *ctx, BCOp op, int defop) -{ - int vk = 0; - |// Note: aligning all instructions does not pay off. - |=>defop: - - switch (op) { - - /* -- Comparison ops ---------------------------------------------------- */ - - /* Remember: all ops branch for a true comparison, fall through otherwise. */ - - |.macro jmp_comp, lt, ge, le, gt, target - ||switch (op) { - ||case BC_ISLT: - | lt target - ||break; - ||case BC_ISGE: - | ge target - ||break; - ||case BC_ISLE: - | le target - ||break; - ||case BC_ISGT: - | gt target - ||break; - ||default: break; /* Shut up GCC. */ - ||} - |.endmacro - - case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: - | // RA = src1, RD = src2, JMP with RD = target - | ins_AD - |.if DUALNUM - | checkint RA, >7 - | checkint RD, >8 - | mov RB, dword [BASE+RA*8] - | add PC, 4 - | cmp RB, dword [BASE+RD*8] - | jmp_comp jge, jl, jg, jle, >9 - |6: - | movzx RD, PC_RD - | branchPC RD - |9: - | ins_next - | - |7: // RA is not an integer. - | ja ->vmeta_comp - | // RA is a number. - | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp - | // RA is a number, RD is an integer. - | cvtsi2sd xmm0, dword [BASE+RD*8] - | jmp >2 - | - |8: // RA is an integer, RD is not an integer. - | ja ->vmeta_comp - | // RA is an integer, RD is a number. - | cvtsi2sd xmm1, dword [BASE+RA*8] - | movsd xmm0, qword [BASE+RD*8] - | add PC, 4 - | ucomisd xmm0, xmm1 - | jmp_comp jbe, ja, jb, jae, <9 - | jmp <6 - |.else - | checknum RA, ->vmeta_comp - | checknum RD, ->vmeta_comp - |.endif - |1: - | movsd xmm0, qword [BASE+RD*8] - |2: - | add PC, 4 - | ucomisd xmm0, qword [BASE+RA*8] - |3: - | // Unordered: all of ZF CF PF set, ordered: PF clear. - | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. - |.if DUALNUM - | jmp_comp jbe, ja, jb, jae, <9 - | jmp <6 - |.else - | jmp_comp jbe, ja, jb, jae, >1 - | movzx RD, PC_RD - | branchPC RD - |1: - | ins_next - |.endif - break; - - case BC_ISEQV: case BC_ISNEV: - vk = op == BC_ISEQV; - | ins_AD // RA = src1, RD = src2, JMP with RD = target - | mov RB, [BASE+RD*8+4] - | add PC, 4 - |.if DUALNUM - | cmp RB, LJ_TISNUM; jne >7 - | checkint RA, >8 - | mov RB, dword [BASE+RD*8] - | cmp RB, dword [BASE+RA*8] - if (vk) { - | jne >9 - } else { - | je >9 - } - | movzx RD, PC_RD - | branchPC RD - |9: - | ins_next - | - |7: // RD is not an integer. - | ja >5 - | // RD is a number. - | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 - | // RD is a number, RA is an integer. - | cvtsi2sd xmm0, dword [BASE+RA*8] - | jmp >2 - | - |8: // RD is an integer, RA is not an integer. - | ja >5 - | // RD is an integer, RA is a number. - | cvtsi2sd xmm0, dword [BASE+RD*8] - | ucomisd xmm0, qword [BASE+RA*8] - | jmp >4 - | - |.else - | cmp RB, LJ_TISNUM; jae >5 - | checknum RA, >5 - |.endif - |1: - | movsd xmm0, qword [BASE+RA*8] - |2: - | ucomisd xmm0, qword [BASE+RD*8] - |4: - iseqne_fp: - if (vk) { - | jp >2 // Unordered means not equal. - | jne >2 - } else { - | jp >2 // Unordered means not equal. - | je >1 - } - iseqne_end: - if (vk) { - |1: // EQ: Branch to the target. - | movzx RD, PC_RD - | branchPC RD - |2: // NE: Fallthrough to next instruction. - |.if not FFI - |3: - |.endif - } else { - |.if not FFI - |3: - |.endif - |2: // NE: Branch to the target. - | movzx RD, PC_RD - | branchPC RD - |1: // EQ: Fallthrough to next instruction. - } - if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV || - op == BC_ISEQN || op == BC_ISNEN)) { - | jmp <9 - } else { - | ins_next - } - | - if (op == BC_ISEQV || op == BC_ISNEV) { - |5: // Either or both types are not numbers. - |.if FFI - | cmp RB, LJ_TCDATA; je ->vmeta_equal_cd - | checktp RA, LJ_TCDATA; je ->vmeta_equal_cd - |.endif - | checktp RA, RB // Compare types. - | jne <2 // Not the same type? - | cmp RB, LJ_TISPRI - | jae <1 // Same type and primitive type? - | - | // Same types and not a primitive type. Compare GCobj or pvalue. - | mov RA, [BASE+RA*8] - | mov RD, [BASE+RD*8] - | cmp RA, RD - | je <1 // Same GCobjs or pvalues? - | cmp RB, LJ_TISTABUD - | ja <2 // Different objects and not table/ud? - |.if X64 - | cmp RB, LJ_TUDATA // And not 64 bit lightuserdata. - | jb <2 - |.endif - | - | // Different tables or userdatas. Need to check __eq metamethod. - | // Field metatable must be at same offset for GCtab and GCudata! - | mov TAB:RB, TAB:RA->metatable - | test TAB:RB, TAB:RB - | jz <2 // No metatable? - | test byte TAB:RB->nomm, 1<vmeta_equal // Handle __eq metamethod. - } else { - |.if FFI - |3: - | cmp RB, LJ_TCDATA - if (LJ_DUALNUM && vk) { - | jne <9 - } else { - | jne <2 - } - | jmp ->vmeta_equal_cd - |.endif - } - break; - case BC_ISEQS: case BC_ISNES: - vk = op == BC_ISEQS; - | ins_AND // RA = src, RD = str const, JMP with RD = target - | mov RB, [BASE+RA*8+4] - | add PC, 4 - | cmp RB, LJ_TSTR; jne >3 - | mov RA, [BASE+RA*8] - | cmp RA, [KBASE+RD*4] - iseqne_test: - if (vk) { - | jne >2 - } else { - | je >1 - } - goto iseqne_end; - case BC_ISEQN: case BC_ISNEN: - vk = op == BC_ISEQN; - | ins_AD // RA = src, RD = num const, JMP with RD = target - | mov RB, [BASE+RA*8+4] - | add PC, 4 - |.if DUALNUM - | cmp RB, LJ_TISNUM; jne >7 - | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jne >8 - | mov RB, dword [KBASE+RD*8] - | cmp RB, dword [BASE+RA*8] - if (vk) { - | jne >9 - } else { - | je >9 - } - | movzx RD, PC_RD - | branchPC RD - |9: - | ins_next - | - |7: // RA is not an integer. - | ja >3 - | // RA is a number. - | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 - | // RA is a number, RD is an integer. - | cvtsi2sd xmm0, dword [KBASE+RD*8] - | jmp >2 - | - |8: // RA is an integer, RD is a number. - | cvtsi2sd xmm0, dword [BASE+RA*8] - | ucomisd xmm0, qword [KBASE+RD*8] - | jmp >4 - |.else - | cmp RB, LJ_TISNUM; jae >3 - |.endif - |1: - | movsd xmm0, qword [KBASE+RD*8] - |2: - | ucomisd xmm0, qword [BASE+RA*8] - |4: - goto iseqne_fp; - case BC_ISEQP: case BC_ISNEP: - vk = op == BC_ISEQP; - | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target - | mov RB, [BASE+RA*8+4] - | add PC, 4 - | cmp RB, RD - if (!LJ_HASFFI) goto iseqne_test; - if (vk) { - | jne >3 - | movzx RD, PC_RD - | branchPC RD - |2: - | ins_next - |3: - | cmp RB, LJ_TCDATA; jne <2 - | jmp ->vmeta_equal_cd - } else { - | je >2 - | cmp RB, LJ_TCDATA; je ->vmeta_equal_cd - | movzx RD, PC_RD - | branchPC RD - |2: - | ins_next - } - break; - - /* -- Unary test and copy ops ------------------------------------------- */ - - case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: - | ins_AD // RA = dst or unused, RD = src, JMP with RD = target - | mov RB, [BASE+RD*8+4] - | add PC, 4 - | cmp RB, LJ_TISTRUECOND - if (op == BC_IST || op == BC_ISTC) { - | jae >1 - } else { - | jb >1 - } - if (op == BC_ISTC || op == BC_ISFC) { - | mov [BASE+RA*8+4], RB - | mov RB, [BASE+RD*8] - | mov [BASE+RA*8], RB - } - | movzx RD, PC_RD - | branchPC RD - |1: // Fallthrough to the next instruction. - | ins_next - break; - - case BC_ISTYPE: - | ins_AD // RA = src, RD = -type - | add RD, [BASE+RA*8+4] - | jne ->vmeta_istype - | ins_next - break; - case BC_ISNUM: - | ins_AD // RA = src, RD = -(TISNUM-1) - | checknum RA, ->vmeta_istype - | ins_next - break; - - /* -- Unary ops --------------------------------------------------------- */ - - case BC_MOV: - | ins_AD // RA = dst, RD = src - |.if X64 - | mov RBa, [BASE+RD*8] - | mov [BASE+RA*8], RBa - |.else - | mov RB, [BASE+RD*8+4] - | mov RD, [BASE+RD*8] - | mov [BASE+RA*8+4], RB - | mov [BASE+RA*8], RD - |.endif - | ins_next_ - break; - case BC_NOT: - | ins_AD // RA = dst, RD = src - | xor RB, RB - | checktp RD, LJ_TISTRUECOND - | adc RB, LJ_TTRUE - | mov [BASE+RA*8+4], RB - | ins_next - break; - case BC_UNM: - | ins_AD // RA = dst, RD = src - |.if DUALNUM - | checkint RD, >5 - | mov RB, [BASE+RD*8] - | neg RB - | jo >4 - | mov dword [BASE+RA*8+4], LJ_TISNUM - | mov dword [BASE+RA*8], RB - |9: - | ins_next - |4: - | mov dword [BASE+RA*8+4], 0x41e00000 // 2^31. - | mov dword [BASE+RA*8], 0 - | jmp <9 - |5: - | ja ->vmeta_unm - |.else - | checknum RD, ->vmeta_unm - |.endif - | movsd xmm0, qword [BASE+RD*8] - | sseconst_sign xmm1, RDa - | xorps xmm0, xmm1 - | movsd qword [BASE+RA*8], xmm0 - |.if DUALNUM - | jmp <9 - |.else - | ins_next - |.endif - break; - case BC_LEN: - | ins_AD // RA = dst, RD = src - | checkstr RD, >2 - | mov STR:RD, [BASE+RD*8] - |.if DUALNUM - | mov RD, dword STR:RD->len - |1: - | mov dword [BASE+RA*8+4], LJ_TISNUM - | mov dword [BASE+RA*8], RD - |.else - | xorps xmm0, xmm0 - | cvtsi2sd xmm0, dword STR:RD->len - |1: - | movsd qword [BASE+RA*8], xmm0 - |.endif - | ins_next - |2: - | checktab RD, ->vmeta_len - | mov TAB:FCARG1, [BASE+RD*8] -#if LJ_52 - | mov TAB:RB, TAB:FCARG1->metatable - | cmp TAB:RB, 0 - | jnz >9 - |3: -#endif - |->BC_LEN_Z: - | mov RB, BASE // Save BASE. - | call extern lj_tab_len@4 // (GCtab *t) - | // Length of table returned in eax (RD). - |.if DUALNUM - | // Nothing to do. - |.else - | cvtsi2sd xmm0, RD - |.endif - | mov BASE, RB // Restore BASE. - | movzx RA, PC_RA - | jmp <1 -#if LJ_52 - |9: // Check for __len. - | test byte TAB:RB->nomm, 1<vmeta_len // 'no __len' flag NOT set: check. -#endif - break; - - /* -- Binary ops -------------------------------------------------------- */ - - |.macro ins_arithpre, sseins, ssereg - | ins_ABC - ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); - ||switch (vk) { - ||case 0: - | checknum RB, ->vmeta_arith_vn - | .if DUALNUM - | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn - | .endif - | movsd xmm0, qword [BASE+RB*8] - | sseins ssereg, qword [KBASE+RC*8] - || break; - ||case 1: - | checknum RB, ->vmeta_arith_nv - | .if DUALNUM - | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv - | .endif - | movsd xmm0, qword [KBASE+RC*8] - | sseins ssereg, qword [BASE+RB*8] - || break; - ||default: - | checknum RB, ->vmeta_arith_vv - | checknum RC, ->vmeta_arith_vv - | movsd xmm0, qword [BASE+RB*8] - | sseins ssereg, qword [BASE+RC*8] - || break; - ||} - |.endmacro - | - |.macro ins_arithdn, intins - | ins_ABC - ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); - ||switch (vk) { - ||case 0: - | checkint RB, ->vmeta_arith_vn - | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jne ->vmeta_arith_vn - | mov RB, [BASE+RB*8] - | intins RB, [KBASE+RC*8]; jo ->vmeta_arith_vno - || break; - ||case 1: - | checkint RB, ->vmeta_arith_nv - | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jne ->vmeta_arith_nv - | mov RC, [KBASE+RC*8] - | intins RC, [BASE+RB*8]; jo ->vmeta_arith_nvo - || break; - ||default: - | checkint RB, ->vmeta_arith_vv - | checkint RC, ->vmeta_arith_vv - | mov RB, [BASE+RB*8] - | intins RB, [BASE+RC*8]; jo ->vmeta_arith_vvo - || break; - ||} - | mov dword [BASE+RA*8+4], LJ_TISNUM - ||if (vk == 1) { - | mov dword [BASE+RA*8], RC - ||} else { - | mov dword [BASE+RA*8], RB - ||} - | ins_next - |.endmacro - | - |.macro ins_arithpost - | movsd qword [BASE+RA*8], xmm0 - |.endmacro - | - |.macro ins_arith, sseins - | ins_arithpre sseins, xmm0 - | ins_arithpost - | ins_next - |.endmacro - | - |.macro ins_arith, intins, sseins - |.if DUALNUM - | ins_arithdn intins - |.else - | ins_arith, sseins - |.endif - |.endmacro - - | // RA = dst, RB = src1 or num const, RC = src2 or num const - case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: - | ins_arith add, addsd - break; - case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: - | ins_arith sub, subsd - break; - case BC_MULVN: case BC_MULNV: case BC_MULVV: - | ins_arith imul, mulsd - break; - case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: - | ins_arith divsd - break; - case BC_MODVN: - | ins_arithpre movsd, xmm1 - |->BC_MODVN_Z: - | call ->vm_mod - | ins_arithpost - | ins_next - break; - case BC_MODNV: case BC_MODVV: - | ins_arithpre movsd, xmm1 - | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. - break; - case BC_POW: - | ins_arithpre movsd, xmm1 - | mov RB, BASE - |.if not X64 - | movsd FPARG1, xmm0 - | movsd FPARG3, xmm1 - |.endif - | call extern pow - | movzx RA, PC_RA - | mov BASE, RB - |.if X64 - | ins_arithpost - |.else - | fstp qword [BASE+RA*8] - |.endif - | ins_next - break; - - case BC_CAT: - | ins_ABC // RA = dst, RB = src_start, RC = src_end - |.if X64 - | mov L:CARG1d, SAVE_L - | mov L:CARG1d->base, BASE - | lea CARG2d, [BASE+RC*8] - | mov CARG3d, RC - | sub CARG3d, RB - |->BC_CAT_Z: - | mov L:RB, L:CARG1d - |.else - | lea RA, [BASE+RC*8] - | sub RC, RB - | mov ARG2, RA - | mov ARG3, RC - |->BC_CAT_Z: - | mov L:RB, SAVE_L - | mov ARG1, L:RB - | mov L:RB->base, BASE - |.endif - | mov SAVE_PC, PC - | call extern lj_meta_cat // (lua_State *L, TValue *top, int left) - | // NULL (finished) or TValue * (metamethod) returned in eax (RC). - | mov BASE, L:RB->base - | test RC, RC - | jnz ->vmeta_binop - | movzx RB, PC_RB // Copy result to Stk[RA] from Stk[RB]. - | movzx RA, PC_RA - |.if X64 - | mov RCa, [BASE+RB*8] - | mov [BASE+RA*8], RCa - |.else - | mov RC, [BASE+RB*8+4] - | mov RB, [BASE+RB*8] - | mov [BASE+RA*8+4], RC - | mov [BASE+RA*8], RB - |.endif - | ins_next - break; - - /* -- Constant ops ------------------------------------------------------ */ - - case BC_KSTR: - | ins_AND // RA = dst, RD = str const (~) - | mov RD, [KBASE+RD*4] - | mov dword [BASE+RA*8+4], LJ_TSTR - | mov [BASE+RA*8], RD - | ins_next - break; - case BC_KCDATA: - |.if FFI - | ins_AND // RA = dst, RD = cdata const (~) - | mov RD, [KBASE+RD*4] - | mov dword [BASE+RA*8+4], LJ_TCDATA - | mov [BASE+RA*8], RD - | ins_next - |.endif - break; - case BC_KSHORT: - | ins_AD // RA = dst, RD = signed int16 literal - |.if DUALNUM - | movsx RD, RDW - | mov dword [BASE+RA*8+4], LJ_TISNUM - | mov dword [BASE+RA*8], RD - |.else - | movsx RD, RDW // Sign-extend literal. - | cvtsi2sd xmm0, RD - | movsd qword [BASE+RA*8], xmm0 - |.endif - | ins_next - break; - case BC_KNUM: - | ins_AD // RA = dst, RD = num const - | movsd xmm0, qword [KBASE+RD*8] - | movsd qword [BASE+RA*8], xmm0 - | ins_next - break; - case BC_KPRI: - | ins_AND // RA = dst, RD = primitive type (~) - | mov [BASE+RA*8+4], RD - | ins_next - break; - case BC_KNIL: - | ins_AD // RA = dst_start, RD = dst_end - | lea RA, [BASE+RA*8+12] - | lea RD, [BASE+RD*8+4] - | mov RB, LJ_TNIL - | mov [RA-8], RB // Sets minimum 2 slots. - |1: - | mov [RA], RB - | add RA, 8 - | cmp RA, RD - | jbe <1 - | ins_next - break; - - /* -- Upvalue and function ops ------------------------------------------ */ - - case BC_UGET: - | ins_AD // RA = dst, RD = upvalue # - | mov LFUNC:RB, [BASE-8] - | mov UPVAL:RB, [LFUNC:RB+RD*4+offsetof(GCfuncL, uvptr)] - | mov RB, UPVAL:RB->v - |.if X64 - | mov RDa, [RB] - | mov [BASE+RA*8], RDa - |.else - | mov RD, [RB+4] - | mov RB, [RB] - | mov [BASE+RA*8+4], RD - | mov [BASE+RA*8], RB - |.endif - | ins_next - break; - case BC_USETV: -#define TV2MARKOFS \ - ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)) - | ins_AD // RA = upvalue #, RD = src - | mov LFUNC:RB, [BASE-8] - | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] - | cmp byte UPVAL:RB->closed, 0 - | mov RB, UPVAL:RB->v - | mov RA, [BASE+RD*8] - | mov RD, [BASE+RD*8+4] - | mov [RB], RA - | mov [RB+4], RD - | jz >1 - | // Check barrier for closed upvalue. - | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv) - | jnz >2 - |1: - | ins_next - | - |2: // Upvalue is black. Check if new value is collectable and white. - | sub RD, LJ_TISGCV - | cmp RD, LJ_TNUMX - LJ_TISGCV // tvisgcv(v) - | jbe <1 - | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v) - | jz <1 - | // Crossed a write barrier. Move the barrier forward. - |.if X64 and not X64WIN - | mov FCARG2, RB - | mov RB, BASE // Save BASE. - |.else - | xchg FCARG2, RB // Save BASE (FCARG2 == BASE). - |.endif - | lea GL:FCARG1, [DISPATCH+GG_DISP2G] - | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv) - | mov BASE, RB // Restore BASE. - | jmp <1 - break; -#undef TV2MARKOFS - case BC_USETS: - | ins_AND // RA = upvalue #, RD = str const (~) - | mov LFUNC:RB, [BASE-8] - | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] - | mov GCOBJ:RA, [KBASE+RD*4] - | mov RD, UPVAL:RB->v - | mov [RD], GCOBJ:RA - | mov dword [RD+4], LJ_TSTR - | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv) - | jnz >2 - |1: - | ins_next - | - |2: // Check if string is white and ensure upvalue is closed. - | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str) - | jz <1 - | cmp byte UPVAL:RB->closed, 0 - | jz <1 - | // Crossed a write barrier. Move the barrier forward. - | mov RB, BASE // Save BASE (FCARG2 == BASE). - | mov FCARG2, RD - | lea GL:FCARG1, [DISPATCH+GG_DISP2G] - | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv) - | mov BASE, RB // Restore BASE. - | jmp <1 - break; - case BC_USETN: - | ins_AD // RA = upvalue #, RD = num const - | mov LFUNC:RB, [BASE-8] - | movsd xmm0, qword [KBASE+RD*8] - | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] - | mov RA, UPVAL:RB->v - | movsd qword [RA], xmm0 - | ins_next - break; - case BC_USETP: - | ins_AND // RA = upvalue #, RD = primitive type (~) - | mov LFUNC:RB, [BASE-8] - | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] - | mov RA, UPVAL:RB->v - | mov [RA+4], RD - | ins_next - break; - case BC_UCLO: - | ins_AD // RA = level, RD = target - | branchPC RD // Do this first to free RD. - | mov L:RB, SAVE_L - | cmp dword L:RB->openupval, 0 - | je >1 - | mov L:RB->base, BASE - | lea FCARG2, [BASE+RA*8] // Caveat: FCARG2 == BASE - | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA - | call extern lj_func_closeuv@8 // (lua_State *L, TValue *level) - | mov BASE, L:RB->base - |1: - | ins_next - break; - - case BC_FNEW: - | ins_AND // RA = dst, RD = proto const (~) (holding function prototype) - |.if X64 - | mov L:RB, SAVE_L - | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE. - | mov CARG3d, [BASE-8] - | mov CARG2d, [KBASE+RD*4] // Fetch GCproto *. - | mov CARG1d, L:RB - |.else - | mov LFUNC:RA, [BASE-8] - | mov PROTO:RD, [KBASE+RD*4] // Fetch GCproto *. - | mov L:RB, SAVE_L - | mov ARG3, LFUNC:RA - | mov ARG2, PROTO:RD - | mov ARG1, L:RB - | mov L:RB->base, BASE - |.endif - | mov SAVE_PC, PC - | // (lua_State *L, GCproto *pt, GCfuncL *parent) - | call extern lj_func_newL_gc - | // GCfuncL * returned in eax (RC). - | mov BASE, L:RB->base - | movzx RA, PC_RA - | mov [BASE+RA*8], LFUNC:RC - | mov dword [BASE+RA*8+4], LJ_TFUNC - | ins_next - break; - - /* -- Table ops --------------------------------------------------------- */ - - case BC_TNEW: - | ins_AD // RA = dst, RD = hbits|asize - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] - | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] - | mov SAVE_PC, PC - | jae >5 - |1: - |.if X64 - | mov CARG3d, RD - | and RD, 0x7ff - | shr CARG3d, 11 - |.else - | mov RA, RD - | and RD, 0x7ff - | shr RA, 11 - | mov ARG3, RA - |.endif - | cmp RD, 0x7ff - | je >3 - |2: - |.if X64 - | mov L:CARG1d, L:RB - | mov CARG2d, RD - |.else - | mov ARG1, L:RB - | mov ARG2, RD - |.endif - | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) - | // Table * returned in eax (RC). - | mov BASE, L:RB->base - | movzx RA, PC_RA - | mov [BASE+RA*8], TAB:RC - | mov dword [BASE+RA*8+4], LJ_TTAB - | ins_next - |3: // Turn 0x7ff into 0x801. - | mov RD, 0x801 - | jmp <2 - |5: - | mov L:FCARG1, L:RB - | call extern lj_gc_step_fixtop@4 // (lua_State *L) - | movzx RD, PC_RD - | jmp <1 - break; - case BC_TDUP: - | ins_AND // RA = dst, RD = table const (~) (holding template table) - | mov L:RB, SAVE_L - | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] - | mov SAVE_PC, PC - | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] - | mov L:RB->base, BASE - | jae >3 - |2: - | mov TAB:FCARG2, [KBASE+RD*4] // Caveat: FCARG2 == BASE - | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA - | call extern lj_tab_dup@8 // (lua_State *L, Table *kt) - | // Table * returned in eax (RC). - | mov BASE, L:RB->base - | movzx RA, PC_RA - | mov [BASE+RA*8], TAB:RC - | mov dword [BASE+RA*8+4], LJ_TTAB - | ins_next - |3: - | mov L:FCARG1, L:RB - | call extern lj_gc_step_fixtop@4 // (lua_State *L) - | movzx RD, PC_RD // Need to reload RD. - | not RDa - | jmp <2 - break; - - case BC_GGET: - | ins_AND // RA = dst, RD = str const (~) - | mov LFUNC:RB, [BASE-8] - | mov TAB:RB, LFUNC:RB->env - | mov STR:RC, [KBASE+RD*4] - | jmp ->BC_TGETS_Z - break; - case BC_GSET: - | ins_AND // RA = src, RD = str const (~) - | mov LFUNC:RB, [BASE-8] - | mov TAB:RB, LFUNC:RB->env - | mov STR:RC, [KBASE+RD*4] - | jmp ->BC_TSETS_Z - break; - - case BC_TGETV: - | ins_ABC // RA = dst, RB = table, RC = key - | checktab RB, ->vmeta_tgetv - | mov TAB:RB, [BASE+RB*8] - | - | // Integer key? - |.if DUALNUM - | checkint RC, >5 - | mov RC, dword [BASE+RC*8] - |.else - | // Convert number to int and back and compare. - | checknum RC, >5 - | movsd xmm0, qword [BASE+RC*8] - | cvttsd2si RC, xmm0 - | cvtsi2sd xmm1, RC - | ucomisd xmm0, xmm1 - | jne ->vmeta_tgetv // Generic numeric key? Use fallback. - |.endif - | cmp RC, TAB:RB->asize // Takes care of unordered, too. - | jae ->vmeta_tgetv // Not in array part? Use fallback. - | shl RC, 3 - | add RC, TAB:RB->array - | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath. - | je >2 - | // Get array slot. - |.if X64 - | mov RBa, [RC] - | mov [BASE+RA*8], RBa - |.else - | mov RB, [RC] - | mov RC, [RC+4] - | mov [BASE+RA*8], RB - | mov [BASE+RA*8+4], RC - |.endif - |1: - | ins_next - | - |2: // Check for __index if table value is nil. - | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. - | jz >3 - | mov TAB:RA, TAB:RB->metatable - | test byte TAB:RA->nomm, 1<vmeta_tgetv // 'no __index' flag NOT set: check. - | movzx RA, PC_RA // Restore RA. - |3: - | mov dword [BASE+RA*8+4], LJ_TNIL - | jmp <1 - | - |5: // String key? - | checkstr RC, ->vmeta_tgetv - | mov STR:RC, [BASE+RC*8] - | jmp ->BC_TGETS_Z - break; - case BC_TGETS: - | ins_ABC // RA = dst, RB = table, RC = str const (~) - | not RCa - | mov STR:RC, [KBASE+RC*4] - | checktab RB, ->vmeta_tgets - | mov TAB:RB, [BASE+RB*8] - |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. - | mov RA, TAB:RB->hmask - | and RA, STR:RC->hash - | imul RA, #NODE - | add NODE:RA, TAB:RB->node - |1: - | cmp dword NODE:RA->key.it, LJ_TSTR - | jne >4 - | cmp dword NODE:RA->key.gcr, STR:RC - | jne >4 - | // Ok, key found. Assumes: offsetof(Node, val) == 0 - | cmp dword [RA+4], LJ_TNIL // Avoid overwriting RB in fastpath. - | je >5 // Key found, but nil value? - | movzx RC, PC_RA - | // Get node value. - |.if X64 - | mov RBa, [RA] - | mov [BASE+RC*8], RBa - |.else - | mov RB, [RA] - | mov RA, [RA+4] - | mov [BASE+RC*8], RB - | mov [BASE+RC*8+4], RA - |.endif - |2: - | ins_next - | - |3: - | movzx RC, PC_RA - | mov dword [BASE+RC*8+4], LJ_TNIL - | jmp <2 - | - |4: // Follow hash chain. - | mov NODE:RA, NODE:RA->next - | test NODE:RA, NODE:RA - | jnz <1 - | // End of hash chain: key not found, nil result. - | - |5: // Check for __index if table value is nil. - | mov TAB:RA, TAB:RB->metatable - | test TAB:RA, TAB:RA - | jz <3 // No metatable: done. - | test byte TAB:RA->nomm, 1<vmeta_tgets // Caveat: preserve STR:RC. - break; - case BC_TGETB: - | ins_ABC // RA = dst, RB = table, RC = byte literal - | checktab RB, ->vmeta_tgetb - | mov TAB:RB, [BASE+RB*8] - | cmp RC, TAB:RB->asize - | jae ->vmeta_tgetb - | shl RC, 3 - | add RC, TAB:RB->array - | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath. - | je >2 - | // Get array slot. - |.if X64 - | mov RBa, [RC] - | mov [BASE+RA*8], RBa - |.else - | mov RB, [RC] - | mov RC, [RC+4] - | mov [BASE+RA*8], RB - | mov [BASE+RA*8+4], RC - |.endif - |1: - | ins_next - | - |2: // Check for __index if table value is nil. - | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. - | jz >3 - | mov TAB:RA, TAB:RB->metatable - | test byte TAB:RA->nomm, 1<vmeta_tgetb // 'no __index' flag NOT set: check. - | movzx RA, PC_RA // Restore RA. - |3: - | mov dword [BASE+RA*8+4], LJ_TNIL - | jmp <1 - break; - case BC_TGETR: - | ins_ABC // RA = dst, RB = table, RC = key - | mov TAB:RB, [BASE+RB*8] - |.if DUALNUM - | mov RC, dword [BASE+RC*8] - |.else - | cvttsd2si RC, qword [BASE+RC*8] - |.endif - | cmp RC, TAB:RB->asize - | jae ->vmeta_tgetr // Not in array part? Use fallback. - | shl RC, 3 - | add RC, TAB:RB->array - | // Get array slot. - |->BC_TGETR_Z: - |.if X64 - | mov RBa, [RC] - | mov [BASE+RA*8], RBa - |.else - | mov RB, [RC] - | mov RC, [RC+4] - | mov [BASE+RA*8], RB - | mov [BASE+RA*8+4], RC - |.endif - |->BC_TGETR2_Z: - | ins_next - break; - - case BC_TSETV: - | ins_ABC // RA = src, RB = table, RC = key - | checktab RB, ->vmeta_tsetv - | mov TAB:RB, [BASE+RB*8] - | - | // Integer key? - |.if DUALNUM - | checkint RC, >5 - | mov RC, dword [BASE+RC*8] - |.else - | // Convert number to int and back and compare. - | checknum RC, >5 - | movsd xmm0, qword [BASE+RC*8] - | cvttsd2si RC, xmm0 - | cvtsi2sd xmm1, RC - | ucomisd xmm0, xmm1 - | jne ->vmeta_tsetv // Generic numeric key? Use fallback. - |.endif - | cmp RC, TAB:RB->asize // Takes care of unordered, too. - | jae ->vmeta_tsetv - | shl RC, 3 - | add RC, TAB:RB->array - | cmp dword [RC+4], LJ_TNIL - | je >3 // Previous value is nil? - |1: - | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) - | jnz >7 - |2: // Set array slot. - |.if X64 - | mov RBa, [BASE+RA*8] - | mov [RC], RBa - |.else - | mov RB, [BASE+RA*8+4] - | mov RA, [BASE+RA*8] - | mov [RC+4], RB - | mov [RC], RA - |.endif - | ins_next - | - |3: // Check for __newindex if previous value is nil. - | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. - | jz <1 - | mov TAB:RA, TAB:RB->metatable - | test byte TAB:RA->nomm, 1<vmeta_tsetv // 'no __newindex' flag NOT set: check. - | movzx RA, PC_RA // Restore RA. - | jmp <1 - | - |5: // String key? - | checkstr RC, ->vmeta_tsetv - | mov STR:RC, [BASE+RC*8] - | jmp ->BC_TSETS_Z - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, RA - | movzx RA, PC_RA // Restore RA. - | jmp <2 - break; - case BC_TSETS: - | ins_ABC // RA = src, RB = table, RC = str const (~) - | not RCa - | mov STR:RC, [KBASE+RC*4] - | checktab RB, ->vmeta_tsets - | mov TAB:RB, [BASE+RB*8] - |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. - | mov RA, TAB:RB->hmask - | and RA, STR:RC->hash - | imul RA, #NODE - | mov byte TAB:RB->nomm, 0 // Clear metamethod cache. - | add NODE:RA, TAB:RB->node - |1: - | cmp dword NODE:RA->key.it, LJ_TSTR - | jne >5 - | cmp dword NODE:RA->key.gcr, STR:RC - | jne >5 - | // Ok, key found. Assumes: offsetof(Node, val) == 0 - | cmp dword [RA+4], LJ_TNIL - | je >4 // Previous value is nil? - |2: - | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) - | jnz >7 - |3: // Set node value. - | movzx RC, PC_RA - |.if X64 - | mov RBa, [BASE+RC*8] - | mov [RA], RBa - |.else - | mov RB, [BASE+RC*8+4] - | mov RC, [BASE+RC*8] - | mov [RA+4], RB - | mov [RA], RC - |.endif - | ins_next - | - |4: // Check for __newindex if previous value is nil. - | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. - | jz <2 - | mov TMP1, RA // Save RA. - | mov TAB:RA, TAB:RB->metatable - | test byte TAB:RA->nomm, 1<vmeta_tsets // 'no __newindex' flag NOT set: check. - | mov RA, TMP1 // Restore RA. - | jmp <2 - | - |5: // Follow hash chain. - | mov NODE:RA, NODE:RA->next - | test NODE:RA, NODE:RA - | jnz <1 - | // End of hash chain: key not found, add a new one. - | - | // But check for __newindex first. - | mov TAB:RA, TAB:RB->metatable - | test TAB:RA, TAB:RA - | jz >6 // No metatable: continue. - | test byte TAB:RA->nomm, 1<vmeta_tsets // 'no __newindex' flag NOT set: check. - |6: - | mov TMP1, STR:RC - | mov TMP2, LJ_TSTR - | mov TMP3, TAB:RB // Save TAB:RB for us. - |.if X64 - | mov L:CARG1d, SAVE_L - | mov L:CARG1d->base, BASE - | lea CARG3, TMP1 - | mov CARG2d, TAB:RB - | mov L:RB, L:CARG1d - |.else - | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2. - | mov ARG2, TAB:RB - | mov L:RB, SAVE_L - | mov ARG3, RC - | mov ARG1, L:RB - | mov L:RB->base, BASE - |.endif - | mov SAVE_PC, PC - | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) - | // Handles write barrier for the new key. TValue * returned in eax (RC). - | mov BASE, L:RB->base - | mov TAB:RB, TMP3 // Need TAB:RB for barrier. - | mov RA, eax - | jmp <2 // Must check write barrier for value. - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, RC // Destroys STR:RC. - | jmp <3 - break; - case BC_TSETB: - | ins_ABC // RA = src, RB = table, RC = byte literal - | checktab RB, ->vmeta_tsetb - | mov TAB:RB, [BASE+RB*8] - | cmp RC, TAB:RB->asize - | jae ->vmeta_tsetb - | shl RC, 3 - | add RC, TAB:RB->array - | cmp dword [RC+4], LJ_TNIL - | je >3 // Previous value is nil? - |1: - | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) - | jnz >7 - |2: // Set array slot. - |.if X64 - | mov RAa, [BASE+RA*8] - | mov [RC], RAa - |.else - | mov RB, [BASE+RA*8+4] - | mov RA, [BASE+RA*8] - | mov [RC+4], RB - | mov [RC], RA - |.endif - | ins_next - | - |3: // Check for __newindex if previous value is nil. - | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. - | jz <1 - | mov TAB:RA, TAB:RB->metatable - | test byte TAB:RA->nomm, 1<vmeta_tsetb // 'no __newindex' flag NOT set: check. - | movzx RA, PC_RA // Restore RA. - | jmp <1 - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, RA - | movzx RA, PC_RA // Restore RA. - | jmp <2 - break; - case BC_TSETR: - | ins_ABC // RA = src, RB = table, RC = key - | mov TAB:RB, [BASE+RB*8] - |.if DUALNUM - | mov RC, dword [BASE+RC*8] - |.else - | cvttsd2si RC, qword [BASE+RC*8] - |.endif - | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) - | jnz >7 - |2: - | cmp RC, TAB:RB->asize - | jae ->vmeta_tsetr - | shl RC, 3 - | add RC, TAB:RB->array - | // Set array slot. - |->BC_TSETR_Z: - |.if X64 - | mov RBa, [BASE+RA*8] - | mov [RC], RBa - |.else - | mov RB, [BASE+RA*8+4] - | mov RA, [BASE+RA*8] - | mov [RC+4], RB - | mov [RC], RA - |.endif - | ins_next - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, RA - | movzx RA, PC_RA // Restore RA. - | jmp <2 - break; - - case BC_TSETM: - | ins_AD // RA = base (table at base-1), RD = num const (start index) - | mov TMP1, KBASE // Need one more free register. - | mov KBASE, dword [KBASE+RD*8] // Integer constant is in lo-word. - |1: - | lea RA, [BASE+RA*8] - | mov TAB:RB, [RA-8] // Guaranteed to be a table. - | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) - | jnz >7 - |2: - | mov RD, MULTRES - | sub RD, 1 - | jz >4 // Nothing to copy? - | add RD, KBASE // Compute needed size. - | cmp RD, TAB:RB->asize - | ja >5 // Doesn't fit into array part? - | sub RD, KBASE - | shl KBASE, 3 - | add KBASE, TAB:RB->array - |3: // Copy result slots to table. - |.if X64 - | mov RBa, [RA] - | add RA, 8 - | mov [KBASE], RBa - |.else - | mov RB, [RA] - | mov [KBASE], RB - | mov RB, [RA+4] - | add RA, 8 - | mov [KBASE+4], RB - |.endif - | add KBASE, 8 - | sub RD, 1 - | jnz <3 - |4: - | mov KBASE, TMP1 - | ins_next - | - |5: // Need to resize array part. - |.if X64 - | mov L:CARG1d, SAVE_L - | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE. - | mov CARG2d, TAB:RB - | mov CARG3d, RD - | mov L:RB, L:CARG1d - |.else - | mov ARG2, TAB:RB - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - | mov ARG3, RD - | mov ARG1, L:RB - |.endif - | mov SAVE_PC, PC - | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) - | mov BASE, L:RB->base - | movzx RA, PC_RA // Restore RA. - | jmp <1 // Retry. - | - |7: // Possible table write barrier for any value. Skip valiswhite check. - | barrierback TAB:RB, RD - | jmp <2 - break; - - /* -- Calls and vararg handling ----------------------------------------- */ - - case BC_CALL: case BC_CALLM: - | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs - if (op == BC_CALLM) { - | add NARGS:RD, MULTRES - } - | cmp dword [BASE+RA*8+4], LJ_TFUNC - | mov LFUNC:RB, [BASE+RA*8] - | jne ->vmeta_call_ra - | lea BASE, [BASE+RA*8+8] - | ins_call - break; - - case BC_CALLMT: - | ins_AD // RA = base, RD = extra_nargs - | add NARGS:RD, MULTRES - | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op. - break; - case BC_CALLT: - | ins_AD // RA = base, RD = nargs+1 - | lea RA, [BASE+RA*8+8] - | mov KBASE, BASE // Use KBASE for move + vmeta_call hint. - | mov LFUNC:RB, [RA-8] - | cmp dword [RA-4], LJ_TFUNC - | jne ->vmeta_call - |->BC_CALLT_Z: - | mov PC, [BASE-4] - | test PC, FRAME_TYPE - | jnz >7 - |1: - | mov [BASE-8], LFUNC:RB // Copy function down, reloaded below. - | mov MULTRES, NARGS:RD - | sub NARGS:RD, 1 - | jz >3 - |2: // Move args down. - |.if X64 - | mov RBa, [RA] - | add RA, 8 - | mov [KBASE], RBa - |.else - | mov RB, [RA] - | mov [KBASE], RB - | mov RB, [RA+4] - | add RA, 8 - | mov [KBASE+4], RB - |.endif - | add KBASE, 8 - | sub NARGS:RD, 1 - | jnz <2 - | - | mov LFUNC:RB, [BASE-8] - |3: - | mov NARGS:RD, MULTRES - | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function? - | ja >5 - |4: - | ins_callt - | - |5: // Tailcall to a fast function. - | test PC, FRAME_TYPE // Lua frame below? - | jnz <4 - | movzx RA, PC_RA - | not RAa - | mov LFUNC:KBASE, [BASE+RA*8-8] // Need to prepare KBASE. - | mov KBASE, LFUNC:KBASE->pc - | mov KBASE, [KBASE+PC2PROTO(k)] - | jmp <4 - | - |7: // Tailcall from a vararg function. - | sub PC, FRAME_VARG - | test PC, FRAME_TYPEP - | jnz >8 // Vararg frame below? - | sub BASE, PC // Need to relocate BASE/KBASE down. - | mov KBASE, BASE - | mov PC, [BASE-4] - | jmp <1 - |8: - | add PC, FRAME_VARG - | jmp <1 - break; - - case BC_ITERC: - | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1) - | lea RA, [BASE+RA*8+8] // fb = base+1 - |.if X64 - | mov RBa, [RA-24] // Copy state. fb[0] = fb[-3]. - | mov RCa, [RA-16] // Copy control var. fb[1] = fb[-2]. - | mov [RA], RBa - | mov [RA+8], RCa - |.else - | mov RB, [RA-24] // Copy state. fb[0] = fb[-3]. - | mov RC, [RA-20] - | mov [RA], RB - | mov [RA+4], RC - | mov RB, [RA-16] // Copy control var. fb[1] = fb[-2]. - | mov RC, [RA-12] - | mov [RA+8], RB - | mov [RA+12], RC - |.endif - | mov LFUNC:RB, [RA-32] // Copy callable. fb[-1] = fb[-4] - | mov RC, [RA-28] - | mov [RA-8], LFUNC:RB - | mov [RA-4], RC - | cmp RC, LJ_TFUNC // Handle like a regular 2-arg call. - | mov NARGS:RD, 2+1 - | jne ->vmeta_call - | mov BASE, RA - | ins_call - break; - - case BC_ITERN: - | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) - |.if JIT - | // NYI: add hotloop, record BC_ITERN. - |.endif - | mov TMP1, KBASE // Need two more free registers. - | mov TMP2, DISPATCH - | mov TAB:RB, [BASE+RA*8-16] - | mov RC, [BASE+RA*8-8] // Get index from control var. - | mov DISPATCH, TAB:RB->asize - | add PC, 4 - | mov KBASE, TAB:RB->array - |1: // Traverse array part. - | cmp RC, DISPATCH; jae >5 // Index points after array part? - | cmp dword [KBASE+RC*8+4], LJ_TNIL; je >4 - |.if DUALNUM - | mov dword [BASE+RA*8+4], LJ_TISNUM - | mov dword [BASE+RA*8], RC - |.else - | cvtsi2sd xmm0, RC - |.endif - | // Copy array slot to returned value. - |.if X64 - | mov RBa, [KBASE+RC*8] - | mov [BASE+RA*8+8], RBa - |.else - | mov RB, [KBASE+RC*8+4] - | mov [BASE+RA*8+12], RB - | mov RB, [KBASE+RC*8] - | mov [BASE+RA*8+8], RB - |.endif - | add RC, 1 - | // Return array index as a numeric key. - |.if DUALNUM - | // See above. - |.else - | movsd qword [BASE+RA*8], xmm0 - |.endif - | mov [BASE+RA*8-8], RC // Update control var. - |2: - | movzx RD, PC_RD // Get target from ITERL. - | branchPC RD - |3: - | mov DISPATCH, TMP2 - | mov KBASE, TMP1 - | ins_next - | - |4: // Skip holes in array part. - | add RC, 1 - | jmp <1 - | - |5: // Traverse hash part. - | sub RC, DISPATCH - |6: - | cmp RC, TAB:RB->hmask; ja <3 // End of iteration? Branch to ITERL+1. - | imul KBASE, RC, #NODE - | add NODE:KBASE, TAB:RB->node - | cmp dword NODE:KBASE->val.it, LJ_TNIL; je >7 - | lea DISPATCH, [RC+DISPATCH+1] - | // Copy key and value from hash slot. - |.if X64 - | mov RBa, NODE:KBASE->key - | mov RCa, NODE:KBASE->val - | mov [BASE+RA*8], RBa - | mov [BASE+RA*8+8], RCa - |.else - | mov RB, NODE:KBASE->key.gcr - | mov RC, NODE:KBASE->key.it - | mov [BASE+RA*8], RB - | mov [BASE+RA*8+4], RC - | mov RB, NODE:KBASE->val.gcr - | mov RC, NODE:KBASE->val.it - | mov [BASE+RA*8+8], RB - | mov [BASE+RA*8+12], RC - |.endif - | mov [BASE+RA*8-8], DISPATCH - | jmp <2 - | - |7: // Skip holes in hash part. - | add RC, 1 - | jmp <6 - break; - - case BC_ISNEXT: - | ins_AD // RA = base, RD = target (points to ITERN) - | cmp dword [BASE+RA*8-20], LJ_TFUNC; jne >5 - | mov CFUNC:RB, [BASE+RA*8-24] - | cmp dword [BASE+RA*8-12], LJ_TTAB; jne >5 - | cmp dword [BASE+RA*8-4], LJ_TNIL; jne >5 - | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5 - | branchPC RD - | mov dword [BASE+RA*8-8], 0 // Initialize control var. - | mov dword [BASE+RA*8-4], 0xfffe7fff - |1: - | ins_next - |5: // Despecialize bytecode if any of the checks fail. - | mov PC_OP, BC_JMP - | branchPC RD - | mov byte [PC], BC_ITERC - | jmp <1 - break; - - case BC_VARG: - | ins_ABC // RA = base, RB = nresults+1, RC = numparams - | mov TMP1, KBASE // Need one more free register. - | lea KBASE, [BASE+RC*8+(8+FRAME_VARG)] - | lea RA, [BASE+RA*8] - | sub KBASE, [BASE-4] - | // Note: KBASE may now be even _above_ BASE if nargs was < numparams. - | test RB, RB - | jz >5 // Copy all varargs? - | lea RB, [RA+RB*8-8] - | cmp KBASE, BASE // No vararg slots? - | jnb >2 - |1: // Copy vararg slots to destination slots. - |.if X64 - | mov RCa, [KBASE-8] - | add KBASE, 8 - | mov [RA], RCa - |.else - | mov RC, [KBASE-8] - | mov [RA], RC - | mov RC, [KBASE-4] - | add KBASE, 8 - | mov [RA+4], RC - |.endif - | add RA, 8 - | cmp RA, RB // All destination slots filled? - | jnb >3 - | cmp KBASE, BASE // No more vararg slots? - | jb <1 - |2: // Fill up remainder with nil. - | mov dword [RA+4], LJ_TNIL - | add RA, 8 - | cmp RA, RB - | jb <2 - |3: - | mov KBASE, TMP1 - | ins_next - | - |5: // Copy all varargs. - | mov MULTRES, 1 // MULTRES = 0+1 - | mov RC, BASE - | sub RC, KBASE - | jbe <3 // No vararg slots? - | mov RB, RC - | shr RB, 3 - | add RB, 1 - | mov MULTRES, RB // MULTRES = #varargs+1 - | mov L:RB, SAVE_L - | add RC, RA - | cmp RC, L:RB->maxstack - | ja >7 // Need to grow stack? - |6: // Copy all vararg slots. - |.if X64 - | mov RCa, [KBASE-8] - | add KBASE, 8 - | mov [RA], RCa - |.else - | mov RC, [KBASE-8] - | mov [RA], RC - | mov RC, [KBASE-4] - | add KBASE, 8 - | mov [RA+4], RC - |.endif - | add RA, 8 - | cmp KBASE, BASE // No more vararg slots? - | jb <6 - | jmp <3 - | - |7: // Grow stack for varargs. - | mov L:RB->base, BASE - | mov L:RB->top, RA - | mov SAVE_PC, PC - | sub KBASE, BASE // Need delta, because BASE may change. - | mov FCARG2, MULTRES - | sub FCARG2, 1 - | mov FCARG1, L:RB - | call extern lj_state_growstack@8 // (lua_State *L, int n) - | mov BASE, L:RB->base - | mov RA, L:RB->top - | add KBASE, BASE - | jmp <6 - break; - - /* -- Returns ----------------------------------------------------------- */ - - case BC_RETM: - | ins_AD // RA = results, RD = extra_nresults - | add RD, MULTRES // MULTRES >=1, so RD >=1. - | // Fall through. Assumes BC_RET follows and ins_AD is a no-op. - break; - - case BC_RET: case BC_RET0: case BC_RET1: - | ins_AD // RA = results, RD = nresults+1 - if (op != BC_RET0) { - | shl RA, 3 - } - |1: - | mov PC, [BASE-4] - | mov MULTRES, RD // Save nresults+1. - | test PC, FRAME_TYPE // Check frame type marker. - | jnz >7 // Not returning to a fixarg Lua func? - switch (op) { - case BC_RET: - |->BC_RET_Z: - | mov KBASE, BASE // Use KBASE for result move. - | sub RD, 1 - | jz >3 - |2: // Move results down. - |.if X64 - | mov RBa, [KBASE+RA] - | mov [KBASE-8], RBa - |.else - | mov RB, [KBASE+RA] - | mov [KBASE-8], RB - | mov RB, [KBASE+RA+4] - | mov [KBASE-4], RB - |.endif - | add KBASE, 8 - | sub RD, 1 - | jnz <2 - |3: - | mov RD, MULTRES // Note: MULTRES may be >255. - | movzx RB, PC_RB // So cannot compare with RDL! - |5: - | cmp RB, RD // More results expected? - | ja >6 - break; - case BC_RET1: - |.if X64 - | mov RBa, [BASE+RA] - | mov [BASE-8], RBa - |.else - | mov RB, [BASE+RA+4] - | mov [BASE-4], RB - | mov RB, [BASE+RA] - | mov [BASE-8], RB - |.endif - /* fallthrough */ - case BC_RET0: - |5: - | cmp PC_RB, RDL // More results expected? - | ja >6 - default: - break; - } - | movzx RA, PC_RA - | not RAa // Note: ~RA = -(RA+1) - | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8 - | mov LFUNC:KBASE, [BASE-8] - | mov KBASE, LFUNC:KBASE->pc - | mov KBASE, [KBASE+PC2PROTO(k)] - | ins_next - | - |6: // Fill up results with nil. - if (op == BC_RET) { - | mov dword [KBASE-4], LJ_TNIL // Note: relies on shifted base. - | add KBASE, 8 - } else { - | mov dword [BASE+RD*8-12], LJ_TNIL - } - | add RD, 1 - | jmp <5 - | - |7: // Non-standard return case. - | lea RB, [PC-FRAME_VARG] - | test RB, FRAME_TYPEP - | jnz ->vm_return - | // Return from vararg function: relocate BASE down and RA up. - | sub BASE, RB - if (op != BC_RET0) { - | add RA, RB - } - | jmp <1 - break; - - /* -- Loops and branches ------------------------------------------------ */ - - |.define FOR_IDX, [RA]; .define FOR_TIDX, dword [RA+4] - |.define FOR_STOP, [RA+8]; .define FOR_TSTOP, dword [RA+12] - |.define FOR_STEP, [RA+16]; .define FOR_TSTEP, dword [RA+20] - |.define FOR_EXT, [RA+24]; .define FOR_TEXT, dword [RA+28] - - case BC_FORL: - |.if JIT - | hotloop RB - |.endif - | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op. - break; - - case BC_JFORI: - case BC_JFORL: -#if !LJ_HASJIT - break; -#endif - case BC_FORI: - case BC_IFORL: - vk = (op == BC_IFORL || op == BC_JFORL); - | ins_AJ // RA = base, RD = target (after end of loop or start of loop) - | lea RA, [BASE+RA*8] - if (LJ_DUALNUM) { - | cmp FOR_TIDX, LJ_TISNUM; jne >9 - if (!vk) { - | cmp FOR_TSTOP, LJ_TISNUM; jne ->vmeta_for - | cmp FOR_TSTEP, LJ_TISNUM; jne ->vmeta_for - | mov RB, dword FOR_IDX - | cmp dword FOR_STEP, 0; jl >5 - } else { -#ifdef LUA_USE_ASSERT - | cmp FOR_TSTOP, LJ_TISNUM; jne ->assert_bad_for_arg_type - | cmp FOR_TSTEP, LJ_TISNUM; jne ->assert_bad_for_arg_type -#endif - | mov RB, dword FOR_STEP - | test RB, RB; js >5 - | add RB, dword FOR_IDX; jo >1 - | mov dword FOR_IDX, RB - } - | cmp RB, dword FOR_STOP - | mov FOR_TEXT, LJ_TISNUM - | mov dword FOR_EXT, RB - if (op == BC_FORI) { - | jle >7 - |1: - |6: - | branchPC RD - } else if (op == BC_JFORI) { - | branchPC RD - | movzx RD, PC_RD - | jle =>BC_JLOOP - |1: - |6: - } else if (op == BC_IFORL) { - | jg >7 - |6: - | branchPC RD - |1: - } else { - | jle =>BC_JLOOP - |1: - |6: - } - |7: - | ins_next - | - |5: // Invert check for negative step. - if (vk) { - | add RB, dword FOR_IDX; jo <1 - | mov dword FOR_IDX, RB - } - | cmp RB, dword FOR_STOP - | mov FOR_TEXT, LJ_TISNUM - | mov dword FOR_EXT, RB - if (op == BC_FORI) { - | jge <7 - } else if (op == BC_JFORI) { - | branchPC RD - | movzx RD, PC_RD - | jge =>BC_JLOOP - } else if (op == BC_IFORL) { - | jl <7 - } else { - | jge =>BC_JLOOP - } - | jmp <6 - |9: // Fallback to FP variant. - } else if (!vk) { - | cmp FOR_TIDX, LJ_TISNUM - } - if (!vk) { - | jae ->vmeta_for - | cmp FOR_TSTOP, LJ_TISNUM; jae ->vmeta_for - } else { -#ifdef LUA_USE_ASSERT - | cmp FOR_TSTOP, LJ_TISNUM; jae ->assert_bad_for_arg_type - | cmp FOR_TSTEP, LJ_TISNUM; jae ->assert_bad_for_arg_type -#endif - } - | mov RB, FOR_TSTEP // Load type/hiword of for step. - if (!vk) { - | cmp RB, LJ_TISNUM; jae ->vmeta_for - } - | movsd xmm0, qword FOR_IDX - | movsd xmm1, qword FOR_STOP - if (vk) { - | addsd xmm0, qword FOR_STEP - | movsd qword FOR_IDX, xmm0 - | test RB, RB; js >3 - } else { - | jl >3 - } - | ucomisd xmm1, xmm0 - |1: - | movsd qword FOR_EXT, xmm0 - if (op == BC_FORI) { - |.if DUALNUM - | jnb <7 - |.else - | jnb >2 - | branchPC RD - |.endif - } else if (op == BC_JFORI) { - | branchPC RD - | movzx RD, PC_RD - | jnb =>BC_JLOOP - } else if (op == BC_IFORL) { - |.if DUALNUM - | jb <7 - |.else - | jb >2 - | branchPC RD - |.endif - } else { - | jnb =>BC_JLOOP - } - |.if DUALNUM - | jmp <6 - |.else - |2: - | ins_next - |.endif - | - |3: // Invert comparison if step is negative. - | ucomisd xmm0, xmm1 - | jmp <1 - break; - - case BC_ITERL: - |.if JIT - | hotloop RB - |.endif - | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op. - break; - - case BC_JITERL: -#if !LJ_HASJIT - break; -#endif - case BC_IITERL: - | ins_AJ // RA = base, RD = target - | lea RA, [BASE+RA*8] - | mov RB, [RA+4] - | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil. - if (op == BC_JITERL) { - | mov [RA-4], RB - | mov RB, [RA] - | mov [RA-8], RB - | jmp =>BC_JLOOP - } else { - | branchPC RD // Otherwise save control var + branch. - | mov RD, [RA] - | mov [RA-4], RB - | mov [RA-8], RD - } - |1: - | ins_next - break; - - case BC_LOOP: - | ins_A // RA = base, RD = target (loop extent) - | // Note: RA/RD is only used by trace recorder to determine scope/extent - | // This opcode does NOT jump, it's only purpose is to detect a hot loop. - |.if JIT - | hotloop RB - |.endif - | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. - break; - - case BC_ILOOP: - | ins_A // RA = base, RD = target (loop extent) - | ins_next - break; - - case BC_JLOOP: - |.if JIT - | ins_AD // RA = base (ignored), RD = traceno - | mov RA, [DISPATCH+DISPATCH_J(trace)] - | mov TRACE:RD, [RA+RD*4] - | mov RDa, TRACE:RD->mcode - | mov L:RB, SAVE_L - | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE - | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB - | // Save additional callee-save registers only used in compiled code. - |.if X64WIN - | mov TMPQ, r12 - | mov TMPa, r13 - | mov CSAVE_4, r14 - | mov CSAVE_3, r15 - | mov RAa, rsp - | sub rsp, 9*16+4*8 - | movdqa [RAa], xmm6 - | movdqa [RAa-1*16], xmm7 - | movdqa [RAa-2*16], xmm8 - | movdqa [RAa-3*16], xmm9 - | movdqa [RAa-4*16], xmm10 - | movdqa [RAa-5*16], xmm11 - | movdqa [RAa-6*16], xmm12 - | movdqa [RAa-7*16], xmm13 - | movdqa [RAa-8*16], xmm14 - | movdqa [RAa-9*16], xmm15 - |.elif X64 - | mov TMPQ, r12 - | mov TMPa, r13 - | sub rsp, 16 - |.endif - | jmp RDa - |.endif - break; - - case BC_JMP: - | ins_AJ // RA = unused, RD = target - | branchPC RD - | ins_next - break; - - /* -- Function headers -------------------------------------------------- */ - - /* - ** Reminder: A function may be called with func/args above L->maxstack, - ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot, - ** too. This means all FUNC* ops (including fast functions) must check - ** for stack overflow _before_ adding more slots! - */ - - case BC_FUNCF: - |.if JIT - | hotcall RB - |.endif - case BC_FUNCV: /* NYI: compiled vararg functions. */ - | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op. - break; - - case BC_JFUNCF: -#if !LJ_HASJIT - break; -#endif - case BC_IFUNCF: - | ins_AD // BASE = new base, RA = framesize, RD = nargs+1 - | mov KBASE, [PC-4+PC2PROTO(k)] - | mov L:RB, SAVE_L - | lea RA, [BASE+RA*8] // Top of frame. - | cmp RA, L:RB->maxstack - | ja ->vm_growstack_f - | movzx RA, byte [PC-4+PC2PROTO(numparams)] - | cmp NARGS:RD, RA // Check for missing parameters. - | jbe >3 - |2: - if (op == BC_JFUNCF) { - | movzx RD, PC_RD - | jmp =>BC_JLOOP - } else { - | ins_next - } - | - |3: // Clear missing parameters. - | mov dword [BASE+NARGS:RD*8-4], LJ_TNIL - | add NARGS:RD, 1 - | cmp NARGS:RD, RA - | jbe <3 - | jmp <2 - break; - - case BC_JFUNCV: -#if !LJ_HASJIT - break; -#endif - | int3 // NYI: compiled vararg functions - break; /* NYI: compiled vararg functions. */ - - case BC_IFUNCV: - | ins_AD // BASE = new base, RA = framesize, RD = nargs+1 - | lea RB, [NARGS:RD*8+FRAME_VARG] - | lea RD, [BASE+NARGS:RD*8] - | mov LFUNC:KBASE, [BASE-8] - | mov [RD-4], RB // Store delta + FRAME_VARG. - | mov [RD-8], LFUNC:KBASE // Store copy of LFUNC. - | mov L:RB, SAVE_L - | lea RA, [RD+RA*8] - | cmp RA, L:RB->maxstack - | ja ->vm_growstack_v // Need to grow stack. - | mov RA, BASE - | mov BASE, RD - | movzx RB, byte [PC-4+PC2PROTO(numparams)] - | test RB, RB - | jz >2 - |1: // Copy fixarg slots up to new frame. - | add RA, 8 - | cmp RA, BASE - | jnb >3 // Less args than parameters? - | mov KBASE, [RA-8] - | mov [RD], KBASE - | mov KBASE, [RA-4] - | mov [RD+4], KBASE - | add RD, 8 - | mov dword [RA-4], LJ_TNIL // Clear old fixarg slot (help the GC). - | sub RB, 1 - | jnz <1 - |2: - if (op == BC_JFUNCV) { - | movzx RD, PC_RD - | jmp =>BC_JLOOP - } else { - | mov KBASE, [PC-4+PC2PROTO(k)] - | ins_next - } - | - |3: // Clear missing parameters. - | mov dword [RD+4], LJ_TNIL - | add RD, 8 - | sub RB, 1 - | jnz <3 - | jmp <2 - break; - - case BC_FUNCC: - case BC_FUNCCW: - | ins_AD // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1 - | mov CFUNC:RB, [BASE-8] - | mov KBASEa, CFUNC:RB->f - | mov L:RB, SAVE_L - | lea RD, [BASE+NARGS:RD*8-8] - | mov L:RB->base, BASE - | lea RA, [RD+8*LUA_MINSTACK] - | cmp RA, L:RB->maxstack - | mov L:RB->top, RD - if (op == BC_FUNCC) { - |.if X64 - | mov CARG1d, L:RB // Caveat: CARG1d may be RA. - |.else - | mov ARG1, L:RB - |.endif - } else { - |.if X64 - | mov CARG2, KBASEa - | mov CARG1d, L:RB // Caveat: CARG1d may be RA. - |.else - | mov ARG2, KBASEa - | mov ARG1, L:RB - |.endif - } - | ja ->vm_growstack_c // Need to grow stack. - | set_vmstate C - if (op == BC_FUNCC) { - | call KBASEa // (lua_State *L) - } else { - | // (lua_State *L, lua_CFunction f) - | call aword [DISPATCH+DISPATCH_GL(wrapf)] - } - | // nresults returned in eax (RD). - | mov BASE, L:RB->base - | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB - | set_vmstate INTERP - | lea RA, [BASE+RD*8] - | neg RA - | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 - | mov PC, [BASE-4] // Fetch PC of caller. - | jmp ->vm_returnc - break; - - /* ---------------------------------------------------------------------- */ - - default: - fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); - exit(2); - break; - } -} - -static int build_backend(BuildCtx *ctx) -{ - int op; - dasm_growpc(Dst, BC__MAX); - build_subroutines(ctx); - |.code_op - for (op = 0; op < BC__MAX; op++) - build_ins(ctx, (BCOp)op, op); - return BC__MAX; -} - -/* Emit pseudo frame-info for all assembler functions. */ -static void emit_asm_debug(BuildCtx *ctx) -{ - int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); -#if LJ_64 -#define SZPTR "8" -#define BSZPTR "3" -#define REG_SP "0x7" -#define REG_RA "0x10" -#else -#define SZPTR "4" -#define BSZPTR "2" -#define REG_SP "0x4" -#define REG_RA "0x8" -#endif - switch (ctx->mode) { - case BUILD_elfasm: - fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); - fprintf(ctx->fp, - ".Lframe0:\n" - "\t.long .LECIE0-.LSCIE0\n" - ".LSCIE0:\n" - "\t.long 0xffffffff\n" - "\t.byte 0x1\n" - "\t.string \"\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -" SZPTR "\n" - "\t.byte " REG_RA "\n" - "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n" - "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n" - "\t.align " SZPTR "\n" - ".LECIE0:\n\n"); - fprintf(ctx->fp, - ".LSFDE0:\n" - "\t.long .LEFDE0-.LASFDE0\n" - ".LASFDE0:\n" - "\t.long .Lframe0\n" -#if LJ_64 - "\t.quad .Lbegin\n" - "\t.quad %d\n" - "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ - "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ - "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ - "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */ - "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */ -#if LJ_NO_UNWIND - "\t.byte 0x8d\n\t.uleb128 0x6\n" /* offset r13 */ - "\t.byte 0x8c\n\t.uleb128 0x7\n" /* offset r12 */ -#endif -#else - "\t.long .Lbegin\n" - "\t.long %d\n" - "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ - "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */ - "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */ - "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */ - "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */ -#endif - "\t.align " SZPTR "\n" - ".LEFDE0:\n\n", fcofs, CFRAME_SIZE); -#if LJ_HASFFI - fprintf(ctx->fp, - ".LSFDE1:\n" - "\t.long .LEFDE1-.LASFDE1\n" - ".LASFDE1:\n" - "\t.long .Lframe0\n" -#if LJ_64 - "\t.quad lj_vm_ffi_call\n" - "\t.quad %d\n" - "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */ - "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ - "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */ - "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ -#else - "\t.long lj_vm_ffi_call\n" - "\t.long %d\n" - "\t.byte 0xe\n\t.uleb128 8\n" /* def_cfa_offset */ - "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */ - "\t.byte 0xd\n\t.uleb128 0x5\n" /* def_cfa_register ebp */ - "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset ebx */ -#endif - "\t.align " SZPTR "\n" - ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); -#endif -#if !LJ_NO_UNWIND -#if (defined(__sun__) && defined(__svr4__)) -#if LJ_64 - fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n"); -#else - fprintf(ctx->fp, "\t.section .eh_frame,\"aw\",@progbits\n"); -#endif -#else - fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); -#endif - fprintf(ctx->fp, - ".Lframe1:\n" - "\t.long .LECIE1-.LSCIE1\n" - ".LSCIE1:\n" - "\t.long 0\n" - "\t.byte 0x1\n" - "\t.string \"zPR\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -" SZPTR "\n" - "\t.byte " REG_RA "\n" - "\t.uleb128 6\n" /* augmentation length */ - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.long lj_err_unwind_dwarf-.\n" - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n" - "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n" - "\t.align " SZPTR "\n" - ".LECIE1:\n\n"); - fprintf(ctx->fp, - ".LSFDE2:\n" - "\t.long .LEFDE2-.LASFDE2\n" - ".LASFDE2:\n" - "\t.long .LASFDE2-.Lframe1\n" - "\t.long .Lbegin-.\n" - "\t.long %d\n" - "\t.uleb128 0\n" /* augmentation length */ - "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ -#if LJ_64 - "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ - "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ - "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */ - "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */ -#else - "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */ - "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */ - "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */ - "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */ -#endif - "\t.align " SZPTR "\n" - ".LEFDE2:\n\n", fcofs, CFRAME_SIZE); -#if LJ_HASFFI - fprintf(ctx->fp, - ".Lframe2:\n" - "\t.long .LECIE2-.LSCIE2\n" - ".LSCIE2:\n" - "\t.long 0\n" - "\t.byte 0x1\n" - "\t.string \"zR\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -" SZPTR "\n" - "\t.byte " REG_RA "\n" - "\t.uleb128 1\n" /* augmentation length */ - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n" - "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n" - "\t.align " SZPTR "\n" - ".LECIE2:\n\n"); - fprintf(ctx->fp, - ".LSFDE3:\n" - "\t.long .LEFDE3-.LASFDE3\n" - ".LASFDE3:\n" - "\t.long .LASFDE3-.Lframe2\n" - "\t.long lj_vm_ffi_call-.\n" - "\t.long %d\n" - "\t.uleb128 0\n" /* augmentation length */ -#if LJ_64 - "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */ - "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ - "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */ - "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ -#else - "\t.byte 0xe\n\t.uleb128 8\n" /* def_cfa_offset */ - "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */ - "\t.byte 0xd\n\t.uleb128 0x5\n" /* def_cfa_register ebp */ - "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset ebx */ -#endif - "\t.align " SZPTR "\n" - ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); -#endif -#endif - break; -#if !LJ_NO_UNWIND - /* Mental note: never let Apple design an assembler. - ** Or a linker. Or a plastic case. But I digress. - */ - case BUILD_machasm: { -#if LJ_HASFFI - int fcsize = 0; -#endif - int i; - fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n"); - fprintf(ctx->fp, - "EH_frame1:\n" - "\t.set L$set$x,LECIEX-LSCIEX\n" - "\t.long L$set$x\n" - "LSCIEX:\n" - "\t.long 0\n" - "\t.byte 0x1\n" - "\t.ascii \"zPR\\0\"\n" - "\t.byte 0x1\n" - "\t.byte 128-" SZPTR "\n" - "\t.byte " REG_RA "\n" - "\t.byte 6\n" /* augmentation length */ - "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */ -#if LJ_64 - "\t.long _lj_err_unwind_dwarf+4@GOTPCREL\n" - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.byte " REG_SP "\n\t.byte " SZPTR "\n" -#else - "\t.long L_lj_err_unwind_dwarf$non_lazy_ptr-.\n" - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.byte 0x5\n\t.byte 0x4\n" /* esp=5 on 32 bit MACH-O. */ -#endif - "\t.byte 0x80+" REG_RA "\n\t.byte 0x1\n" - "\t.align " BSZPTR "\n" - "LECIEX:\n\n"); - for (i = 0; i < ctx->nsym; i++) { - const char *name = ctx->sym[i].name; - int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs; - if (size == 0) continue; -#if LJ_HASFFI - if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; } -#endif - fprintf(ctx->fp, - "%s.eh:\n" - "LSFDE%d:\n" - "\t.set L$set$%d,LEFDE%d-LASFDE%d\n" - "\t.long L$set$%d\n" - "LASFDE%d:\n" - "\t.long LASFDE%d-EH_frame1\n" - "\t.long %s-.\n" - "\t.long %d\n" - "\t.byte 0\n" /* augmentation length */ - "\t.byte 0xe\n\t.byte %d\n" /* def_cfa_offset */ -#if LJ_64 - "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */ - "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */ - "\t.byte 0x8f\n\t.byte 0x4\n" /* offset r15 */ - "\t.byte 0x8e\n\t.byte 0x5\n" /* offset r14 */ -#else - "\t.byte 0x84\n\t.byte 0x2\n" /* offset ebp (4 for MACH-O)*/ - "\t.byte 0x87\n\t.byte 0x3\n" /* offset edi */ - "\t.byte 0x86\n\t.byte 0x4\n" /* offset esi */ - "\t.byte 0x83\n\t.byte 0x5\n" /* offset ebx */ -#endif - "\t.align " BSZPTR "\n" - "LEFDE%d:\n\n", - name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i); - } -#if LJ_HASFFI - if (fcsize) { - fprintf(ctx->fp, - "EH_frame2:\n" - "\t.set L$set$y,LECIEY-LSCIEY\n" - "\t.long L$set$y\n" - "LSCIEY:\n" - "\t.long 0\n" - "\t.byte 0x1\n" - "\t.ascii \"zR\\0\"\n" - "\t.byte 0x1\n" - "\t.byte 128-" SZPTR "\n" - "\t.byte " REG_RA "\n" - "\t.byte 1\n" /* augmentation length */ -#if LJ_64 - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.byte " REG_SP "\n\t.byte " SZPTR "\n" -#else - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.byte 0x5\n\t.byte 0x4\n" /* esp=5 on 32 bit MACH. */ -#endif - "\t.byte 0x80+" REG_RA "\n\t.byte 0x1\n" - "\t.align " BSZPTR "\n" - "LECIEY:\n\n"); - fprintf(ctx->fp, - "_lj_vm_ffi_call.eh:\n" - "LSFDEY:\n" - "\t.set L$set$yy,LEFDEY-LASFDEY\n" - "\t.long L$set$yy\n" - "LASFDEY:\n" - "\t.long LASFDEY-EH_frame2\n" - "\t.long _lj_vm_ffi_call-.\n" - "\t.long %d\n" - "\t.byte 0\n" /* augmentation length */ -#if LJ_64 - "\t.byte 0xe\n\t.byte 16\n" /* def_cfa_offset */ - "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */ - "\t.byte 0xd\n\t.byte 0x6\n" /* def_cfa_register rbp */ - "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */ -#else - "\t.byte 0xe\n\t.byte 8\n" /* def_cfa_offset */ - "\t.byte 0x84\n\t.byte 0x2\n" /* offset ebp (4 for MACH-O)*/ - "\t.byte 0xd\n\t.byte 0x4\n" /* def_cfa_register ebp */ - "\t.byte 0x83\n\t.byte 0x3\n" /* offset ebx */ -#endif - "\t.align " BSZPTR "\n" - "LEFDEY:\n\n", fcsize); - } -#endif -#if !LJ_64 - fprintf(ctx->fp, - "\t.non_lazy_symbol_pointer\n" - "L_lj_err_unwind_dwarf$non_lazy_ptr:\n" - ".indirect_symbol _lj_err_unwind_dwarf\n" - ".long 0\n\n"); - fprintf(ctx->fp, "\t.section __IMPORT,__jump_table,symbol_stubs,pure_instructions+self_modifying_code,5\n"); - { - const char *const *xn; - for (xn = ctx->extnames; *xn; xn++) - if (strncmp(*xn, LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) - fprintf(ctx->fp, "L_%s$stub:\n\t.indirect_symbol _%s\n\t.ascii \"\\364\\364\\364\\364\\364\"\n", *xn, *xn); - } -#endif - fprintf(ctx->fp, ".subsections_via_symbols\n"); - } - break; -#endif - default: /* Difficult for other modes. */ - break; - } -} -