From 4c9f71be5d4449b717870092839c47a1d5db0dca Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Wed, 24 Feb 2010 07:09:34 +0100 Subject: [PATCH] Major 32/64 bit cleanups in assembler and exit handling. Add 64 bit lightuserdata handling. Keep the tagged 64 bit value. Allocate/save/restore 64 bit spill slots for 64 bit lightuserdata. Fix code generation for 64 bit loads/stores/moves/compares. Fix code generation for stack pointer adjustments. Add fixed spill slot definitions for x64. Reduce reserved spill slots. Disable STRREF + ADD fusion in 64 bit mode (avoid negative 32 bit ofs). --- src/lj_asm.c | 218 ++++++++++++++++++++++++++++++-------------- src/lj_ir.h | 18 +++- src/lj_snap.c | 10 ++ src/lj_target_x86.h | 26 ++++-- 4 files changed, 193 insertions(+), 79 deletions(-) diff --git a/src/lj_asm.c b/src/lj_asm.c index 116c6e1f..24467dbc 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -261,18 +261,16 @@ static void emit_rmrxo(ASMState *as, x86Op xo, Reg rr, Reg rb, Reg rx, static void emit_gri(ASMState *as, x86Group xg, Reg rb, int32_t i) { MCode *p = as->mcp; + x86Op xo; if (checki8(i)) { - p -= 3; - p[2] = (MCode)i; - p[0] = (MCode)(xg >> 16); + *--p = (MCode)i; + xo = XG_TOXOi8(xg); } else { - p -= 6; - *(int32_t *)(p+2) = i; - p[0] = (MCode)(xg >> 8); + p -= 4; + *(int32_t *)p = i; + xo = XG_TOXOi(xg); } - p[1] = MODRM(XM_REG, xg, rb); - REXRB(p, 0, rb); - as->mcp = p; + as->mcp = emit_opm(xo, XM_REG, (Reg)(xg & 7) | (rb & REX_64), rb, p, 0); } /* op [base+ofs], i */ @@ -282,12 +280,12 @@ static void emit_gmroi(ASMState *as, x86Group xg, Reg rb, int32_t ofs, x86Op xo; if (checki8(i)) { emit_i8(as, i); - xo = (x86Op)(((xg >> 16) << 24)+0xfe); + xo = XG_TOXOi8(xg); } else { emit_i32(as, i); - xo = (x86Op)(((xg >> 8) << 24)+0xfe); + xo = XG_TOXOi(xg); } - emit_rmro(as, xo, (Reg)xg, rb, ofs); + emit_rmro(as, xo, (Reg)(xg & 7), rb, ofs); } #define emit_shifti(as, xg, r, i) \ @@ -346,18 +344,6 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs) /* -- Emit moves ---------------------------------------------------------- */ -/* Generic move between two regs. */ -static void emit_movrr(ASMState *as, Reg r1, Reg r2) -{ - emit_rr(as, r1 < RID_MAX_GPR ? XO_MOV : XMM_MOVRR(as), r1, r2); -} - -/* Generic move from [base+ofs]. */ -static void emit_movrmro(ASMState *as, Reg rr, Reg rb, int32_t ofs) -{ - emit_rmro(as, rr < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), rr, rb, ofs); -} - /* mov [base+ofs], i */ static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) { @@ -623,7 +609,7 @@ static int32_t ra_spill(ASMState *as, IRIns *ir) { int32_t slot = ir->s; if (!ra_hasspill(slot)) { - if (irt_isnum(ir->t)) { + if (irt_isnum(ir->t) || (LJ_64 && irt_islightud(ir->t))) { slot = as->evenspill; as->evenspill += 2; } else if (as->oddspill) { @@ -653,6 +639,16 @@ static Reg ra_releasetmp(ASMState *as, IRRef ref) return r; } +/* Use 64 bit operations to handle 64 bit lightuserdata. */ +#define REX_64LU(ir, r) \ + ((r) | ((LJ_64 && irt_islightud((ir)->t)) ? REX_64 : 0)) + +/* Generic move between two regs. */ +static void ra_movrr(ASMState *as, IRIns *ir, Reg r1, Reg r2) +{ + emit_rr(as, r1 < RID_MAX_GPR ? XO_MOV : XMM_MOVRR(as), REX_64LU(ir, r1), r2); +} + /* Restore a register (marked as free). Rematerialize or force a spill. */ static Reg ra_restore(ASMState *as, IRRef ref) { @@ -668,7 +664,8 @@ static Reg ra_restore(ASMState *as, IRRef ref) if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */ ra_modified(as, r); RA_DBGX((as, "restore $i $r", ir, r)); - emit_movrmro(as, r, RID_ESP, ofs); + emit_rmro(as, r < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), + REX_64LU(ir, r), RID_ESP, ofs); } return r; } @@ -679,7 +676,7 @@ static void ra_save(ASMState *as, IRIns *ir, Reg r) { RA_DBGX((as, "save $i $r", ir, r)); emit_rmro(as, r < RID_MAX_GPR ? XO_MOVto : XO_MOVSDto, - r, RID_ESP, sps_scale(ir->s)); + REX_64LU(ir, r), RID_ESP, sps_scale(ir->s)); } #define MINCOST(r) \ @@ -822,7 +819,8 @@ static Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow) static void ra_rename(ASMState *as, Reg down, Reg up) { IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]); - IR(ref)->r = (uint8_t)up; + IRIns *ir = IR(ref); + ir->r = (uint8_t)up; as->cost[down] = 0; lua_assert((down < RID_MAX_GPR) == (up < RID_MAX_GPR)); lua_assert(!rset_test(as->freeset, down) && rset_test(as->freeset, up)); @@ -831,7 +829,7 @@ static void ra_rename(ASMState *as, Reg down, Reg up) rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */ ra_noweak(as, up); RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); - emit_movrr(as, down, up); /* Backwards code generation needs inverse move. */ + ra_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */ if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno); ren = tref_ref(lj_ir_emit(as->J)); @@ -864,7 +862,7 @@ static void ra_destreg(ASMState *as, IRIns *ir, Reg r) Reg dest = ra_dest(as, ir, RID2RSET(r)); if (dest != r) { ra_scratch(as, RID2RSET(r)); - emit_movrr(as, dest, r); + ra_movrr(as, ir, dest, r); } } @@ -903,7 +901,7 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref) ra_modified(as, left); ra_rename(as, left, dest); } else { - emit_movrr(as, dest, left); + ra_movrr(as, ir, dest, left); } } } @@ -1201,7 +1199,8 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow) } else { Reg r; /* Fuse a constant add into the offset, e.g. string.sub(s, i+10). */ - if (mayfuse(as, ir->op2) && irr->o == IR_ADD && irref_isk(irr->op2)) { + if (!LJ_64 && /* NYI: has bad effects with negative index on x64. */ + mayfuse(as, ir->op2) && irr->o == IR_ADD && irref_isk(irr->op2)) { as->mrm.ofs += IR(irr->op2)->i; r = ra_alloc1(as, irr->op1, allow); } else { @@ -1325,7 +1324,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) allow &= ~RID2RSET(r); if (ra_hasreg(ir->r)) { ra_noweak(as, ir->r); - emit_movrr(as, r, ir->r); + ra_movrr(as, ir, r, ir->r); } else { ra_allocref(as, args[n], RID2RSET(r)); } @@ -1358,7 +1357,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) ra_evictset(as, drop); /* Evictions must be performed first. */ if (ra_used(ir)) { if (irt_isnum(ir->t)) { - int32_t ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */ + int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ #if LJ_64 if ((ci->flags & CCI_CASTU64)) { Reg dest = ir->r; @@ -1367,7 +1366,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) ra_modified(as, dest); emit_rr(as, XO_MOVD, dest|REX_64, RID_RET); /* Really MOVQ. */ } else { - emit_movrmro(as, RID_RET, RID_ESP, ofs); + emit_movtomro(as, RID_RET|REX_64, RID_ESP, ofs); } } else { ra_destreg(as, ir, RID_FPRET); @@ -1493,8 +1492,8 @@ static void asm_strto(ASMState *as, IRIns *ir) args[0] = ir->op1; args[1] = ASMREF_TMP1; asm_gencall(as, ci, args); - /* Store the result to the spill slot or slots SPS_TEMP1/2. */ - emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1), + /* Store the result to the spill slot or temp slots. */ + emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1)|REX_64, RID_ESP, sps_scale(ir->s)); } @@ -1509,7 +1508,7 @@ static void asm_tostr(ASMState *as, IRIns *ir) args[1] = ASMREF_TMP1; asm_setupresult(as, ir, ci); asm_gencall(as, ci, args); - emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1), + emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1)|REX_64, RID_ESP, ra_spill(as, irl)); } else { const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; @@ -1627,6 +1626,10 @@ static void asm_href(ASMState *as, IRIns *ir) emit_i8(as, ~IRT_NUM); emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); } +#if LJ_64 + } else if (irt_islightud(kt)) { + emit_rmro(as, XO_CMP, key|REX_64, dest, offsetof(Node, key.u64)); +#endif } else { if (!irt_ispri(kt)) { lua_assert(irt_isaddr(kt)); @@ -1747,16 +1750,17 @@ static void asm_newref(ASMState *as, IRIns *ir) if (irref_isk(ir->op2)) emit_loada(as, tmp, ir_knum(irkey)); else - emit_rmro(as, XO_LEA, tmp, RID_ESP, ra_spill(as, irkey)); + emit_rmro(as, XO_LEA, tmp|REX_64, RID_ESP, ra_spill(as, irkey)); } else { /* Otherwise use g->tmptv to hold the TValue. */ if (!irref_isk(ir->op2)) { Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp)); - emit_movtomro(as, src, tmp, 0); + emit_movtomro(as, REX_64LU(irkey, src), tmp, 0); } else if (!irt_ispri(irkey->t)) { emit_movmroi(as, tmp, 0, irkey->i); } - emit_movmroi(as, tmp, 4, irt_toitype(irkey->t)); + if (!(LJ_64 && irt_islightud(irkey->t))) + emit_movmroi(as, tmp, 4, irt_toitype(irkey->t)); emit_loada(as, tmp, &J2G(as->J)->tmptv); } } @@ -1822,6 +1826,11 @@ static void asm_fxload(ASMState *as, IRIns *ir) case IRT_U8: xo = XO_MOVZXb; break; case IRT_I16: xo = XO_MOVSXw; break; case IRT_U16: xo = XO_MOVZXw; break; +#if LJ_64 + case IRT_LIGHTUD: + dest |= REX_64; + /* fallthrough */ +#endif default: lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t)); xo = XO_MOV; @@ -1848,6 +1857,9 @@ static void asm_fstore(ASMState *as, IRIns *ir) switch (irt_type(ir->t)) { case IRT_I8: case IRT_U8: xo = XO_MOVtob; src |= FORCE_REX; break; case IRT_I16: case IRT_U16: xo = XO_MOVtow; break; +#if LJ_64 + case IRT_LIGHTUD: lua_assert(0); /* NYI: mask 64 bit lightuserdata. */ +#endif default: lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t)); xo = XO_MOVto; @@ -1866,11 +1878,41 @@ static void asm_fstore(ASMState *as, IRIns *ir) } } +#if LJ_64 +static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) +{ + if (ra_used(ir) || typecheck) { + Reg dest = ra_dest(as, ir, RSET_GPR); + if (typecheck) { + Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, dest)); + asm_guardcc(as, CC_NE); + emit_i8(as, -2); + emit_rr(as, XO_ARITHi8, XOg_CMP, tmp); + emit_shifti(as, XOg_SAR|REX_64, tmp, 47); + emit_rr(as, XO_MOV, tmp|REX_64, dest); + } + return dest; + } else { + return RID_NONE; + } +} +#endif + static void asm_ahuload(ASMState *as, IRIns *ir) { - RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t)); +#if LJ_64 + if (irt_islightud(ir->t)) { + Reg dest = asm_load_lightud64(as, ir, 1); + if (ra_hasreg(dest)) { + asm_fuseahuref(as, ir->op1, RSET_GPR); + emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM); + } + return; + } else +#endif if (ra_used(ir)) { + RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; Reg dest = ra_dest(as, ir, allow); asm_fuseahuref(as, ir->op1, RSET_GPR); emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM); @@ -1890,6 +1932,12 @@ static void asm_ahustore(ASMState *as, IRIns *ir) Reg src = ra_alloc1(as, ir->op2, RSET_FPR); asm_fuseahuref(as, ir->op1, RSET_GPR); emit_mrm(as, XO_MOVSDto, src, RID_MRM); +#if LJ_64 + } else if (irt_islightud(ir->t)) { + Reg src = ra_alloc1(as, ir->op2, RSET_GPR); + asm_fuseahuref(as, ir->op1, rset_exclude(RSET_GPR, src)); + emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM); +#endif } else { IRIns *irr = IR(ir->op2); RegSet allow = RSET_GPR; @@ -1925,6 +1973,15 @@ static void asm_sload(ASMState *as, IRIns *ir) base = ra_alloc1(as, REF_BASE, RSET_GPR); emit_rmro(as, XMM_MOVRM(as), left, base, ofs); t.irt = IRT_NUM; /* Continue with a regular number type check. */ +#if LJ_64 + } else if (irt_islightud(t)) { + Reg dest = asm_load_lightud64(as, ir, (ir->op2 & IRSLOAD_TYPECHECK)); + if (ra_hasreg(dest)) { + base = ra_alloc1(as, REF_BASE, RSET_GPR); + emit_rmro(as, XO_MOV, dest|REX_64, base, ofs); + } + return; +#endif } else if (ra_used(ir)) { RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; Reg dest = ra_dest(as, ir, allow); @@ -1932,8 +1989,10 @@ static void asm_sload(ASMState *as, IRIns *ir) lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); if (irt_isint(t)) emit_rmro(as, XO_CVTSD2SI, dest, base, ofs); + else if (irt_isnum(t)) + emit_rmro(as, XMM_MOVRM(as), dest, base, ofs); else - emit_movrmro(as, dest, base, ofs); + emit_rmro(as, XO_MOV, dest, base, ofs); } else { if (!(ir->op2 & IRSLOAD_TYPECHECK)) return; /* No type check: avoid base alloc. */ @@ -2117,7 +2176,7 @@ static void asm_fpmath(ASMState *as, IRIns *ir) } else if (fpm == IRFPM_EXP2 && fpmjoin_pow(as, ir)) { /* Rejoined to pow(). */ } else { /* Handle x87 ops. */ - int32_t ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */ + int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ Reg dest = ir->r; if (ra_hasreg(dest)) { ra_free(as, dest); @@ -2521,6 +2580,10 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) Reg left = ra_alloc1(as, lref, RSET_GPR); Reg right = asm_fuseload(as, rref, rset_exclude(RSET_GPR, left)); asm_guardcc(as, cc); +#if LJ_64 + if (irt_islightud(ir->t)) + left |= REX_64; +#endif emit_mrm(as, XO_CMP, left, right); } } @@ -2563,7 +2626,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot, Reg r = allow ? rset_pickbot(allow) : RID_EAX; emit_jcc(as, CC_B, exitstub_addr(as->J, exitno)); if (allow == RSET_EMPTY) /* Restore temp. register. */ - emit_rmro(as, XO_MOV, r, RID_ESP, sps_scale(SPS_TEMP1)); + emit_rmro(as, XO_MOV, r|REX_64, RID_ESP, 0); else ra_modified(as, r); emit_gri(as, XG_ARITHi(XOg_CMP), r, (int32_t)(8*topslot)); @@ -2575,7 +2638,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot, emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack)); emit_getgl(as, r, jit_L); if (allow == RSET_EMPTY) /* Spill temp. register. */ - emit_rmro(as, XO_MOVto, r, RID_ESP, sps_scale(SPS_TEMP1)); + emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0); } /* Restore Lua stack from on-trace state. */ @@ -2600,14 +2663,17 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t)); if (!irref_isk(ref)) { Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); - emit_movtomro(as, src, RID_BASE, ofs); + emit_movtomro(as, REX_64LU(ir, src), RID_BASE, ofs); } else if (!irt_ispri(ir->t)) { emit_movmroi(as, RID_BASE, ofs, ir->i); } - if (!(sn & (SNAP_CONT|SNAP_FRAME))) - emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); - else if (s != 0) /* Do not overwrite link to previous frame. */ - emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--)); + if ((sn & (SNAP_CONT|SNAP_FRAME))) { + if (s != 0) /* Do not overwrite link to previous frame. */ + emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--)); + } else { + if (!(LJ_64 && irt_islightud(ir->t))) + emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); + } } checkmclim(as); } @@ -2668,7 +2734,7 @@ static void asm_gc_check(ASMState *as, SnapShot *snap) (int32_t)as->T->snapmap[snap->mapofs+snap->nent]); emit_gri(as, XG_ARITHi(XOg_AND), tmp, CFRAME_RAWMASK); lstate = IR(ASMREF_L)->r; - emit_movrmro(as, tmp, lstate, offsetof(lua_State, cframe)); + emit_rmro(as, XO_MOV, tmp, lstate, offsetof(lua_State, cframe)); /* It's ok if lstate is already in a non-scratch reg. But all allocations ** in the non-fast path must use a scratch reg. See comment above. */ @@ -2830,7 +2896,7 @@ static void asm_phi(ASMState *as, IRIns *ir) r = ra_allocref(as, ir->op2, allow); } else { /* Duplicate right PHI, need a copy (rare). */ r = ra_scratch(as, allow); - emit_movrr(as, r, irr->r); + ra_movrr(as, irr, r, irr->r); } ir->r = (uint8_t)r; rset_set(as->phiset, r); @@ -2912,6 +2978,14 @@ static void asm_loop(ASMState *as) /* -- Head of trace ------------------------------------------------------- */ +/* Calculate stack adjustment. */ +static int32_t asm_stack_adjust(ASMState *as) +{ + if (as->evenspill <= SPS_FIXED) + return 0; + return sps_scale((as->evenspill - SPS_FIXED + 3) & ~3); +} + /* Coalesce BASE register for a root trace. */ static void asm_head_root_base(ASMState *as) { @@ -2932,9 +3006,9 @@ static void asm_head_root(ASMState *as) int32_t spadj; asm_head_root_base(as); emit_setgli(as, vmstate, (int32_t)as->J->curtrace); - spadj = sps_adjust(as->evenspill); + spadj = asm_stack_adjust(as); as->T->spadjust = (uint16_t)spadj; - emit_addptr(as, RID_ESP, -spadj); + emit_addptr(as, RID_ESP|REX_64, -spadj); /* Root traces assume a checked stack for the starting proto. */ as->T->topslot = gcref(as->T->startpt)->pt.framesize; } @@ -3007,7 +3081,7 @@ static void asm_head_side(ASMState *as) } /* Calculate stack frame adjustment. */ - spadj = sps_adjust(as->evenspill); + spadj = asm_stack_adjust(as); spdelta = spadj - (int32_t)as->parent->spadjust; if (spdelta < 0) { /* Don't shrink the stack frame. */ spadj = (int32_t)as->parent->spadjust; @@ -3048,7 +3122,7 @@ static void asm_head_side(ASMState *as) /* Store trace number and adjust stack frame relative to the parent. */ emit_setgli(as, vmstate, (int32_t)as->J->curtrace); - emit_addptr(as, RID_ESP, -spdelta); + emit_addptr(as, RID_ESP|REX_64, -spdelta); /* Restore target registers from parent spill slots. */ if (pass3) { @@ -3061,7 +3135,8 @@ static void asm_head_side(ASMState *as) if (ra_hasspill(regsp_spill(rs))) { int32_t ofs = sps_scale(regsp_spill(rs)); ra_free(as, r); - emit_movrmro(as, r, RID_ESP, ofs); + emit_rmro(as, r < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), + REX_64LU(ir, r), RID_ESP, ofs); checkmclim(as); } } @@ -3078,7 +3153,7 @@ static void asm_head_side(ASMState *as) rset_clear(live, rp); rset_clear(allow, rp); ra_free(as, ir->r); - emit_movrr(as, ir->r, rp); + ra_movrr(as, ir, ir->r, rp); checkmclim(as); } @@ -3150,7 +3225,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) MCode *target, *q; int32_t spadj = as->T->spadjust; if (spadj == 0) { - p -= (as->flags & JIT_F_LEA_AGU) ? 7 : 6; + p -= ((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0); } else { MCode *p1; /* Patch stack adjustment. */ @@ -3163,10 +3238,16 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) *(int32_t *)p1 = spadj; } if ((as->flags & JIT_F_LEA_AGU)) { +#if LJ_64 + p1[-4] = 0x48; +#endif p1[-3] = (MCode)XI_LEA; p1[-2] = MODRM(checki8(spadj) ? XM_OFS8 : XM_OFS32, RID_ESP, RID_ESP); p1[-1] = MODRM(XM_SCALE1, RID_ESP, RID_ESP); } else { +#if LJ_64 + p1[-3] = 0x48; +#endif p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi); p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP); } @@ -3365,12 +3446,13 @@ static void asm_setup_regsp(ASMState *as, Trace *T) break; case IR_CALLN: case IR_CALLL: case IR_CALLS: { const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; +#if LJ_64 + /* NYI: add stack slots for calls with more than 4/6 args. */ + ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET); +#else /* NYI: not fastcall-aware, but doesn't matter (yet). */ if (CCI_NARGS(ci) > (uint32_t)as->evenspill) /* Leave room for args. */ as->evenspill = (int32_t)CCI_NARGS(ci); -#if LJ_64 - ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET); -#else ir->prev = REGSP_HINT(RID_RET); #endif if (inloop) @@ -3379,8 +3461,12 @@ static void asm_setup_regsp(ASMState *as, Trace *T) continue; } /* C calls evict all scratch regs and return results in RID_RET. */ - case IR_SNEW: case IR_TNEW: case IR_TDUP: case IR_TOSTR: - case IR_NEWREF: + case IR_SNEW: case IR_NEWREF: +#if !LJ_64 + if (as->evenspill < 3) /* lj_str_new and lj_tab_newkey need 3 args. */ + as->evenspill = 3; +#endif + case IR_TNEW: case IR_TDUP: case IR_TOSTR: ir->prev = REGSP_HINT(RID_RET); if (inloop) as->modset = RSET_SCRATCH; @@ -3500,7 +3586,7 @@ void lj_asm_trace(jit_State *J, Trace *T) if (!as->loopref) { /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */ - as->mcp -= (as->flags & JIT_F_LEA_AGU) ? 7 : 6; + as->mcp -= (as->flags & JIT_F_LEA_AGU) ? 7 : 6 + (LJ_64 ? 1 : 0); as->invmcp = NULL; asm_tail_link(as); } diff --git a/src/lj_ir.h b/src/lj_ir.h index 75519ed4..ca871238 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h @@ -362,6 +362,7 @@ typedef struct IRType1 { uint8_t irt; } IRType1; #define irt_isnil(t) (irt_type(t) == IRT_NIL) #define irt_ispri(t) ((uint32_t)irt_type(t) <= IRT_TRUE) +#define irt_islightud(t) (irt_type(t) == IRT_LIGHTUD) #define irt_isstr(t) (irt_type(t) == IRT_STR) #define irt_isfunc(t) (irt_type(t) == IRT_FUNC) #define irt_istab(t) (irt_type(t) == IRT_TAB) @@ -376,9 +377,20 @@ typedef struct IRType1 { uint8_t irt; } IRType1; #define irt_isgcv(t) (irt_typerange((t), IRT_STR, IRT_UDATA)) #define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA)) -#define itype2irt(tv) \ - (~uitype(tv) < IRT_NUM ? cast(IRType, ~uitype(tv)) : IRT_NUM) -#define irt_toitype(t) ((int32_t)~(uint32_t)irt_type(t)) +static LJ_AINLINE IRType itype2irt(const TValue *tv) +{ + if (tvisnum(tv)) + return IRT_NUM; +#if LJ_64 + else if (tvislightud(tv)) + return IRT_LIGHTUD; +#endif + else + return cast(IRType, ~uitype(tv)); +} + +#define irt_toitype(t) \ + check_exp(!(LJ_64 && irt_islightud((t))), (int32_t)~(uint32_t)irt_type((t))) #define irt_isguard(t) ((t).irt & IRT_GUARD) #define irt_ismarked(t) ((t).irt & IRT_MARK) diff --git a/src/lj_snap.c b/src/lj_snap.c index 2b82e672..86890a26 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c @@ -279,6 +279,11 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) setintV(o, *sps); } else if (irt_isnum(t)) { o->u64 = *(uint64_t *)sps; +#if LJ_64 + } else if (irt_islightud(t)) { + /* 64 bit lightuserdata which may escape already has the tag bits. */ + o->u64 = *(uint64_t *)sps; +#endif } else { lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ setgcrefi(o->gcr, *sps); @@ -291,6 +296,11 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) setintV(o, ex->gpr[r-RID_MIN_GPR]); } else if (irt_isnum(t)) { setnumV(o, ex->fpr[r-RID_MIN_FPR]); +#if LJ_64 + } else if (irt_islightud(t)) { + /* 64 bit lightuserdata which may escape already has the tag bits. */ + o->u64 = ex->gpr[r-RID_MIN_GPR]; +#endif } else { if (!irt_ispri(t)) setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]); diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index cb1892d5..8e9a8788 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h @@ -96,20 +96,24 @@ enum { /* -- Spill slots --------------------------------------------------------- */ -/* Stack layout for the compiled machine code (after stack adjustment). */ -enum { - SPS_TEMP1, /* Temps (3*dword) for calls and asm_x87load. */ - SPS_TEMP2, - SPS_TEMP3, - SPS_FIRST, /* First spill slot for general use. */ +/* Available fixed spill slots in interpreter frame. +** This definition must match with the *.dasc file(s). +*/ +#if LJ_64 +#ifdef _WIN64 +#define SPS_FIXED (5*2) +#else +#define SPS_FIXED 2 +#endif +#else +#define SPS_FIXED 6 +#endif - /* This definition must match with the *.dasc file(s). */ - SPS_FIXED = 6 /* Available fixed spill slots in interpreter frame. */ -}; +/* First spill slot for general use. Reserve one 64 bit slot. */ +#define SPS_FIRST 2 /* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. */ #define sps_scale(slot) (4 * (int32_t)(slot)) -#define sps_adjust(slot) (sps_scale(((slot)-SPS_FIXED+3)&~3)) /* -- Exit state ---------------------------------------------------------- */ @@ -241,6 +245,8 @@ typedef uint32_t x86Group; #define XG_(i8, i, g) ((x86Group)(((i8) << 16) + ((i) << 8) + (g))) #define XG_ARITHi(g) XG_(XI_ARITHi8, XI_ARITHi, g) +#define XG_TOXOi(xg) ((x86Op)(0x000000fe + (((xg)<<16) & 0xff000000))) +#define XG_TOXOi8(xg) ((x86Op)(0x000000fe + (((xg)<<8) & 0xff000000))) #define XO_ARITH(a) ((x86Op)(0x030000fe + ((a)<<27)))