Major 32/64 bit cleanups in assembler and exit handling.

Add 64 bit lightuserdata handling. Keep the tagged 64 bit value.
Allocate/save/restore 64 bit spill slots for 64 bit lightuserdata.
Fix code generation for 64 bit loads/stores/moves/compares.
Fix code generation for stack pointer adjustments.
Add fixed spill slot definitions for x64. Reduce reserved spill slots.
Disable STRREF + ADD fusion in 64 bit mode (avoid negative 32 bit ofs).
This commit is contained in:
Mike Pall 2010-02-24 07:09:34 +01:00
parent e46f4c8a11
commit 4c9f71be5d
4 changed files with 193 additions and 79 deletions

View File

@ -261,18 +261,16 @@ static void emit_rmrxo(ASMState *as, x86Op xo, Reg rr, Reg rb, Reg rx,
static void emit_gri(ASMState *as, x86Group xg, Reg rb, int32_t i)
{
MCode *p = as->mcp;
x86Op xo;
if (checki8(i)) {
p -= 3;
p[2] = (MCode)i;
p[0] = (MCode)(xg >> 16);
*--p = (MCode)i;
xo = XG_TOXOi8(xg);
} else {
p -= 6;
*(int32_t *)(p+2) = i;
p[0] = (MCode)(xg >> 8);
p -= 4;
*(int32_t *)p = i;
xo = XG_TOXOi(xg);
}
p[1] = MODRM(XM_REG, xg, rb);
REXRB(p, 0, rb);
as->mcp = p;
as->mcp = emit_opm(xo, XM_REG, (Reg)(xg & 7) | (rb & REX_64), rb, p, 0);
}
/* op [base+ofs], i */
@ -282,12 +280,12 @@ static void emit_gmroi(ASMState *as, x86Group xg, Reg rb, int32_t ofs,
x86Op xo;
if (checki8(i)) {
emit_i8(as, i);
xo = (x86Op)(((xg >> 16) << 24)+0xfe);
xo = XG_TOXOi8(xg);
} else {
emit_i32(as, i);
xo = (x86Op)(((xg >> 8) << 24)+0xfe);
xo = XG_TOXOi(xg);
}
emit_rmro(as, xo, (Reg)xg, rb, ofs);
emit_rmro(as, xo, (Reg)(xg & 7), rb, ofs);
}
#define emit_shifti(as, xg, r, i) \
@ -346,18 +344,6 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
/* -- Emit moves ---------------------------------------------------------- */
/* Generic move between two regs. */
static void emit_movrr(ASMState *as, Reg r1, Reg r2)
{
emit_rr(as, r1 < RID_MAX_GPR ? XO_MOV : XMM_MOVRR(as), r1, r2);
}
/* Generic move from [base+ofs]. */
static void emit_movrmro(ASMState *as, Reg rr, Reg rb, int32_t ofs)
{
emit_rmro(as, rr < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), rr, rb, ofs);
}
/* mov [base+ofs], i */
static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i)
{
@ -623,7 +609,7 @@ static int32_t ra_spill(ASMState *as, IRIns *ir)
{
int32_t slot = ir->s;
if (!ra_hasspill(slot)) {
if (irt_isnum(ir->t)) {
if (irt_isnum(ir->t) || (LJ_64 && irt_islightud(ir->t))) {
slot = as->evenspill;
as->evenspill += 2;
} else if (as->oddspill) {
@ -653,6 +639,16 @@ static Reg ra_releasetmp(ASMState *as, IRRef ref)
return r;
}
/* Use 64 bit operations to handle 64 bit lightuserdata. */
#define REX_64LU(ir, r) \
((r) | ((LJ_64 && irt_islightud((ir)->t)) ? REX_64 : 0))
/* Generic move between two regs. */
static void ra_movrr(ASMState *as, IRIns *ir, Reg r1, Reg r2)
{
emit_rr(as, r1 < RID_MAX_GPR ? XO_MOV : XMM_MOVRR(as), REX_64LU(ir, r1), r2);
}
/* Restore a register (marked as free). Rematerialize or force a spill. */
static Reg ra_restore(ASMState *as, IRRef ref)
{
@ -668,7 +664,8 @@ static Reg ra_restore(ASMState *as, IRRef ref)
if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */
ra_modified(as, r);
RA_DBGX((as, "restore $i $r", ir, r));
emit_movrmro(as, r, RID_ESP, ofs);
emit_rmro(as, r < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as),
REX_64LU(ir, r), RID_ESP, ofs);
}
return r;
}
@ -679,7 +676,7 @@ static void ra_save(ASMState *as, IRIns *ir, Reg r)
{
RA_DBGX((as, "save $i $r", ir, r));
emit_rmro(as, r < RID_MAX_GPR ? XO_MOVto : XO_MOVSDto,
r, RID_ESP, sps_scale(ir->s));
REX_64LU(ir, r), RID_ESP, sps_scale(ir->s));
}
#define MINCOST(r) \
@ -822,7 +819,8 @@ static Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow)
static void ra_rename(ASMState *as, Reg down, Reg up)
{
IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]);
IR(ref)->r = (uint8_t)up;
IRIns *ir = IR(ref);
ir->r = (uint8_t)up;
as->cost[down] = 0;
lua_assert((down < RID_MAX_GPR) == (up < RID_MAX_GPR));
lua_assert(!rset_test(as->freeset, down) && rset_test(as->freeset, up));
@ -831,7 +829,7 @@ static void ra_rename(ASMState *as, Reg down, Reg up)
rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */
ra_noweak(as, up);
RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up));
emit_movrr(as, down, up); /* Backwards code generation needs inverse move. */
ra_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */
if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */
lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno);
ren = tref_ref(lj_ir_emit(as->J));
@ -864,7 +862,7 @@ static void ra_destreg(ASMState *as, IRIns *ir, Reg r)
Reg dest = ra_dest(as, ir, RID2RSET(r));
if (dest != r) {
ra_scratch(as, RID2RSET(r));
emit_movrr(as, dest, r);
ra_movrr(as, ir, dest, r);
}
}
@ -903,7 +901,7 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref)
ra_modified(as, left);
ra_rename(as, left, dest);
} else {
emit_movrr(as, dest, left);
ra_movrr(as, ir, dest, left);
}
}
}
@ -1201,7 +1199,8 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow)
} else {
Reg r;
/* Fuse a constant add into the offset, e.g. string.sub(s, i+10). */
if (mayfuse(as, ir->op2) && irr->o == IR_ADD && irref_isk(irr->op2)) {
if (!LJ_64 && /* NYI: has bad effects with negative index on x64. */
mayfuse(as, ir->op2) && irr->o == IR_ADD && irref_isk(irr->op2)) {
as->mrm.ofs += IR(irr->op2)->i;
r = ra_alloc1(as, irr->op1, allow);
} else {
@ -1325,7 +1324,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
allow &= ~RID2RSET(r);
if (ra_hasreg(ir->r)) {
ra_noweak(as, ir->r);
emit_movrr(as, r, ir->r);
ra_movrr(as, ir, r, ir->r);
} else {
ra_allocref(as, args[n], RID2RSET(r));
}
@ -1358,7 +1357,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
ra_evictset(as, drop); /* Evictions must be performed first. */
if (ra_used(ir)) {
if (irt_isnum(ir->t)) {
int32_t ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */
int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */
#if LJ_64
if ((ci->flags & CCI_CASTU64)) {
Reg dest = ir->r;
@ -1367,7 +1366,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
ra_modified(as, dest);
emit_rr(as, XO_MOVD, dest|REX_64, RID_RET); /* Really MOVQ. */
} else {
emit_movrmro(as, RID_RET, RID_ESP, ofs);
emit_movtomro(as, RID_RET|REX_64, RID_ESP, ofs);
}
} else {
ra_destreg(as, ir, RID_FPRET);
@ -1493,8 +1492,8 @@ static void asm_strto(ASMState *as, IRIns *ir)
args[0] = ir->op1;
args[1] = ASMREF_TMP1;
asm_gencall(as, ci, args);
/* Store the result to the spill slot or slots SPS_TEMP1/2. */
emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1),
/* Store the result to the spill slot or temp slots. */
emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1)|REX_64,
RID_ESP, sps_scale(ir->s));
}
@ -1509,7 +1508,7 @@ static void asm_tostr(ASMState *as, IRIns *ir)
args[1] = ASMREF_TMP1;
asm_setupresult(as, ir, ci);
asm_gencall(as, ci, args);
emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1),
emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1)|REX_64,
RID_ESP, ra_spill(as, irl));
} else {
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
@ -1627,6 +1626,10 @@ static void asm_href(ASMState *as, IRIns *ir)
emit_i8(as, ~IRT_NUM);
emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it));
}
#if LJ_64
} else if (irt_islightud(kt)) {
emit_rmro(as, XO_CMP, key|REX_64, dest, offsetof(Node, key.u64));
#endif
} else {
if (!irt_ispri(kt)) {
lua_assert(irt_isaddr(kt));
@ -1747,16 +1750,17 @@ static void asm_newref(ASMState *as, IRIns *ir)
if (irref_isk(ir->op2))
emit_loada(as, tmp, ir_knum(irkey));
else
emit_rmro(as, XO_LEA, tmp, RID_ESP, ra_spill(as, irkey));
emit_rmro(as, XO_LEA, tmp|REX_64, RID_ESP, ra_spill(as, irkey));
} else {
/* Otherwise use g->tmptv to hold the TValue. */
if (!irref_isk(ir->op2)) {
Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp));
emit_movtomro(as, src, tmp, 0);
emit_movtomro(as, REX_64LU(irkey, src), tmp, 0);
} else if (!irt_ispri(irkey->t)) {
emit_movmroi(as, tmp, 0, irkey->i);
}
emit_movmroi(as, tmp, 4, irt_toitype(irkey->t));
if (!(LJ_64 && irt_islightud(irkey->t)))
emit_movmroi(as, tmp, 4, irt_toitype(irkey->t));
emit_loada(as, tmp, &J2G(as->J)->tmptv);
}
}
@ -1822,6 +1826,11 @@ static void asm_fxload(ASMState *as, IRIns *ir)
case IRT_U8: xo = XO_MOVZXb; break;
case IRT_I16: xo = XO_MOVSXw; break;
case IRT_U16: xo = XO_MOVZXw; break;
#if LJ_64
case IRT_LIGHTUD:
dest |= REX_64;
/* fallthrough */
#endif
default:
lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t));
xo = XO_MOV;
@ -1848,6 +1857,9 @@ static void asm_fstore(ASMState *as, IRIns *ir)
switch (irt_type(ir->t)) {
case IRT_I8: case IRT_U8: xo = XO_MOVtob; src |= FORCE_REX; break;
case IRT_I16: case IRT_U16: xo = XO_MOVtow; break;
#if LJ_64
case IRT_LIGHTUD: lua_assert(0); /* NYI: mask 64 bit lightuserdata. */
#endif
default:
lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t));
xo = XO_MOVto;
@ -1866,11 +1878,41 @@ static void asm_fstore(ASMState *as, IRIns *ir)
}
}
#if LJ_64
static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
{
if (ra_used(ir) || typecheck) {
Reg dest = ra_dest(as, ir, RSET_GPR);
if (typecheck) {
Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, dest));
asm_guardcc(as, CC_NE);
emit_i8(as, -2);
emit_rr(as, XO_ARITHi8, XOg_CMP, tmp);
emit_shifti(as, XOg_SAR|REX_64, tmp, 47);
emit_rr(as, XO_MOV, tmp|REX_64, dest);
}
return dest;
} else {
return RID_NONE;
}
}
#endif
static void asm_ahuload(ASMState *as, IRIns *ir)
{
RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t));
#if LJ_64
if (irt_islightud(ir->t)) {
Reg dest = asm_load_lightud64(as, ir, 1);
if (ra_hasreg(dest)) {
asm_fuseahuref(as, ir->op1, RSET_GPR);
emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM);
}
return;
} else
#endif
if (ra_used(ir)) {
RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
Reg dest = ra_dest(as, ir, allow);
asm_fuseahuref(as, ir->op1, RSET_GPR);
emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM);
@ -1890,6 +1932,12 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
Reg src = ra_alloc1(as, ir->op2, RSET_FPR);
asm_fuseahuref(as, ir->op1, RSET_GPR);
emit_mrm(as, XO_MOVSDto, src, RID_MRM);
#if LJ_64
} else if (irt_islightud(ir->t)) {
Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
asm_fuseahuref(as, ir->op1, rset_exclude(RSET_GPR, src));
emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM);
#endif
} else {
IRIns *irr = IR(ir->op2);
RegSet allow = RSET_GPR;
@ -1925,6 +1973,15 @@ static void asm_sload(ASMState *as, IRIns *ir)
base = ra_alloc1(as, REF_BASE, RSET_GPR);
emit_rmro(as, XMM_MOVRM(as), left, base, ofs);
t.irt = IRT_NUM; /* Continue with a regular number type check. */
#if LJ_64
} else if (irt_islightud(t)) {
Reg dest = asm_load_lightud64(as, ir, (ir->op2 & IRSLOAD_TYPECHECK));
if (ra_hasreg(dest)) {
base = ra_alloc1(as, REF_BASE, RSET_GPR);
emit_rmro(as, XO_MOV, dest|REX_64, base, ofs);
}
return;
#endif
} else if (ra_used(ir)) {
RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
Reg dest = ra_dest(as, ir, allow);
@ -1932,8 +1989,10 @@ static void asm_sload(ASMState *as, IRIns *ir)
lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
if (irt_isint(t))
emit_rmro(as, XO_CVTSD2SI, dest, base, ofs);
else if (irt_isnum(t))
emit_rmro(as, XMM_MOVRM(as), dest, base, ofs);
else
emit_movrmro(as, dest, base, ofs);
emit_rmro(as, XO_MOV, dest, base, ofs);
} else {
if (!(ir->op2 & IRSLOAD_TYPECHECK))
return; /* No type check: avoid base alloc. */
@ -2117,7 +2176,7 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
} else if (fpm == IRFPM_EXP2 && fpmjoin_pow(as, ir)) {
/* Rejoined to pow(). */
} else { /* Handle x87 ops. */
int32_t ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */
int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */
Reg dest = ir->r;
if (ra_hasreg(dest)) {
ra_free(as, dest);
@ -2521,6 +2580,10 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
Reg left = ra_alloc1(as, lref, RSET_GPR);
Reg right = asm_fuseload(as, rref, rset_exclude(RSET_GPR, left));
asm_guardcc(as, cc);
#if LJ_64
if (irt_islightud(ir->t))
left |= REX_64;
#endif
emit_mrm(as, XO_CMP, left, right);
}
}
@ -2563,7 +2626,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
Reg r = allow ? rset_pickbot(allow) : RID_EAX;
emit_jcc(as, CC_B, exitstub_addr(as->J, exitno));
if (allow == RSET_EMPTY) /* Restore temp. register. */
emit_rmro(as, XO_MOV, r, RID_ESP, sps_scale(SPS_TEMP1));
emit_rmro(as, XO_MOV, r|REX_64, RID_ESP, 0);
else
ra_modified(as, r);
emit_gri(as, XG_ARITHi(XOg_CMP), r, (int32_t)(8*topslot));
@ -2575,7 +2638,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack));
emit_getgl(as, r, jit_L);
if (allow == RSET_EMPTY) /* Spill temp. register. */
emit_rmro(as, XO_MOVto, r, RID_ESP, sps_scale(SPS_TEMP1));
emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0);
}
/* Restore Lua stack from on-trace state. */
@ -2600,14 +2663,17 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t));
if (!irref_isk(ref)) {
Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
emit_movtomro(as, src, RID_BASE, ofs);
emit_movtomro(as, REX_64LU(ir, src), RID_BASE, ofs);
} else if (!irt_ispri(ir->t)) {
emit_movmroi(as, RID_BASE, ofs, ir->i);
}
if (!(sn & (SNAP_CONT|SNAP_FRAME)))
emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t));
else if (s != 0) /* Do not overwrite link to previous frame. */
emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--));
if ((sn & (SNAP_CONT|SNAP_FRAME))) {
if (s != 0) /* Do not overwrite link to previous frame. */
emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--));
} else {
if (!(LJ_64 && irt_islightud(ir->t)))
emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t));
}
}
checkmclim(as);
}
@ -2668,7 +2734,7 @@ static void asm_gc_check(ASMState *as, SnapShot *snap)
(int32_t)as->T->snapmap[snap->mapofs+snap->nent]);
emit_gri(as, XG_ARITHi(XOg_AND), tmp, CFRAME_RAWMASK);
lstate = IR(ASMREF_L)->r;
emit_movrmro(as, tmp, lstate, offsetof(lua_State, cframe));
emit_rmro(as, XO_MOV, tmp, lstate, offsetof(lua_State, cframe));
/* It's ok if lstate is already in a non-scratch reg. But all allocations
** in the non-fast path must use a scratch reg. See comment above.
*/
@ -2830,7 +2896,7 @@ static void asm_phi(ASMState *as, IRIns *ir)
r = ra_allocref(as, ir->op2, allow);
} else { /* Duplicate right PHI, need a copy (rare). */
r = ra_scratch(as, allow);
emit_movrr(as, r, irr->r);
ra_movrr(as, irr, r, irr->r);
}
ir->r = (uint8_t)r;
rset_set(as->phiset, r);
@ -2912,6 +2978,14 @@ static void asm_loop(ASMState *as)
/* -- Head of trace ------------------------------------------------------- */
/* Calculate stack adjustment. */
static int32_t asm_stack_adjust(ASMState *as)
{
if (as->evenspill <= SPS_FIXED)
return 0;
return sps_scale((as->evenspill - SPS_FIXED + 3) & ~3);
}
/* Coalesce BASE register for a root trace. */
static void asm_head_root_base(ASMState *as)
{
@ -2932,9 +3006,9 @@ static void asm_head_root(ASMState *as)
int32_t spadj;
asm_head_root_base(as);
emit_setgli(as, vmstate, (int32_t)as->J->curtrace);
spadj = sps_adjust(as->evenspill);
spadj = asm_stack_adjust(as);
as->T->spadjust = (uint16_t)spadj;
emit_addptr(as, RID_ESP, -spadj);
emit_addptr(as, RID_ESP|REX_64, -spadj);
/* Root traces assume a checked stack for the starting proto. */
as->T->topslot = gcref(as->T->startpt)->pt.framesize;
}
@ -3007,7 +3081,7 @@ static void asm_head_side(ASMState *as)
}
/* Calculate stack frame adjustment. */
spadj = sps_adjust(as->evenspill);
spadj = asm_stack_adjust(as);
spdelta = spadj - (int32_t)as->parent->spadjust;
if (spdelta < 0) { /* Don't shrink the stack frame. */
spadj = (int32_t)as->parent->spadjust;
@ -3048,7 +3122,7 @@ static void asm_head_side(ASMState *as)
/* Store trace number and adjust stack frame relative to the parent. */
emit_setgli(as, vmstate, (int32_t)as->J->curtrace);
emit_addptr(as, RID_ESP, -spdelta);
emit_addptr(as, RID_ESP|REX_64, -spdelta);
/* Restore target registers from parent spill slots. */
if (pass3) {
@ -3061,7 +3135,8 @@ static void asm_head_side(ASMState *as)
if (ra_hasspill(regsp_spill(rs))) {
int32_t ofs = sps_scale(regsp_spill(rs));
ra_free(as, r);
emit_movrmro(as, r, RID_ESP, ofs);
emit_rmro(as, r < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as),
REX_64LU(ir, r), RID_ESP, ofs);
checkmclim(as);
}
}
@ -3078,7 +3153,7 @@ static void asm_head_side(ASMState *as)
rset_clear(live, rp);
rset_clear(allow, rp);
ra_free(as, ir->r);
emit_movrr(as, ir->r, rp);
ra_movrr(as, ir, ir->r, rp);
checkmclim(as);
}
@ -3150,7 +3225,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
MCode *target, *q;
int32_t spadj = as->T->spadjust;
if (spadj == 0) {
p -= (as->flags & JIT_F_LEA_AGU) ? 7 : 6;
p -= ((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0);
} else {
MCode *p1;
/* Patch stack adjustment. */
@ -3163,10 +3238,16 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
*(int32_t *)p1 = spadj;
}
if ((as->flags & JIT_F_LEA_AGU)) {
#if LJ_64
p1[-4] = 0x48;
#endif
p1[-3] = (MCode)XI_LEA;
p1[-2] = MODRM(checki8(spadj) ? XM_OFS8 : XM_OFS32, RID_ESP, RID_ESP);
p1[-1] = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
} else {
#if LJ_64
p1[-3] = 0x48;
#endif
p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi);
p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP);
}
@ -3365,12 +3446,13 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
break;
case IR_CALLN: case IR_CALLL: case IR_CALLS: {
const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
#if LJ_64
/* NYI: add stack slots for calls with more than 4/6 args. */
ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET);
#else
/* NYI: not fastcall-aware, but doesn't matter (yet). */
if (CCI_NARGS(ci) > (uint32_t)as->evenspill) /* Leave room for args. */
as->evenspill = (int32_t)CCI_NARGS(ci);
#if LJ_64
ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET);
#else
ir->prev = REGSP_HINT(RID_RET);
#endif
if (inloop)
@ -3379,8 +3461,12 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
continue;
}
/* C calls evict all scratch regs and return results in RID_RET. */
case IR_SNEW: case IR_TNEW: case IR_TDUP: case IR_TOSTR:
case IR_NEWREF:
case IR_SNEW: case IR_NEWREF:
#if !LJ_64
if (as->evenspill < 3) /* lj_str_new and lj_tab_newkey need 3 args. */
as->evenspill = 3;
#endif
case IR_TNEW: case IR_TDUP: case IR_TOSTR:
ir->prev = REGSP_HINT(RID_RET);
if (inloop)
as->modset = RSET_SCRATCH;
@ -3500,7 +3586,7 @@ void lj_asm_trace(jit_State *J, Trace *T)
if (!as->loopref) {
/* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */
as->mcp -= (as->flags & JIT_F_LEA_AGU) ? 7 : 6;
as->mcp -= (as->flags & JIT_F_LEA_AGU) ? 7 : 6 + (LJ_64 ? 1 : 0);
as->invmcp = NULL;
asm_tail_link(as);
}

View File

@ -362,6 +362,7 @@ typedef struct IRType1 { uint8_t irt; } IRType1;
#define irt_isnil(t) (irt_type(t) == IRT_NIL)
#define irt_ispri(t) ((uint32_t)irt_type(t) <= IRT_TRUE)
#define irt_islightud(t) (irt_type(t) == IRT_LIGHTUD)
#define irt_isstr(t) (irt_type(t) == IRT_STR)
#define irt_isfunc(t) (irt_type(t) == IRT_FUNC)
#define irt_istab(t) (irt_type(t) == IRT_TAB)
@ -376,9 +377,20 @@ typedef struct IRType1 { uint8_t irt; } IRType1;
#define irt_isgcv(t) (irt_typerange((t), IRT_STR, IRT_UDATA))
#define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA))
#define itype2irt(tv) \
(~uitype(tv) < IRT_NUM ? cast(IRType, ~uitype(tv)) : IRT_NUM)
#define irt_toitype(t) ((int32_t)~(uint32_t)irt_type(t))
static LJ_AINLINE IRType itype2irt(const TValue *tv)
{
if (tvisnum(tv))
return IRT_NUM;
#if LJ_64
else if (tvislightud(tv))
return IRT_LIGHTUD;
#endif
else
return cast(IRType, ~uitype(tv));
}
#define irt_toitype(t) \
check_exp(!(LJ_64 && irt_islightud((t))), (int32_t)~(uint32_t)irt_type((t)))
#define irt_isguard(t) ((t).irt & IRT_GUARD)
#define irt_ismarked(t) ((t).irt & IRT_MARK)

View File

@ -279,6 +279,11 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
setintV(o, *sps);
} else if (irt_isnum(t)) {
o->u64 = *(uint64_t *)sps;
#if LJ_64
} else if (irt_islightud(t)) {
/* 64 bit lightuserdata which may escape already has the tag bits. */
o->u64 = *(uint64_t *)sps;
#endif
} else {
lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */
setgcrefi(o->gcr, *sps);
@ -291,6 +296,11 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
setintV(o, ex->gpr[r-RID_MIN_GPR]);
} else if (irt_isnum(t)) {
setnumV(o, ex->fpr[r-RID_MIN_FPR]);
#if LJ_64
} else if (irt_islightud(t)) {
/* 64 bit lightuserdata which may escape already has the tag bits. */
o->u64 = ex->gpr[r-RID_MIN_GPR];
#endif
} else {
if (!irt_ispri(t))
setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]);

View File

@ -96,20 +96,24 @@ enum {
/* -- Spill slots --------------------------------------------------------- */
/* Stack layout for the compiled machine code (after stack adjustment). */
enum {
SPS_TEMP1, /* Temps (3*dword) for calls and asm_x87load. */
SPS_TEMP2,
SPS_TEMP3,
SPS_FIRST, /* First spill slot for general use. */
/* Available fixed spill slots in interpreter frame.
** This definition must match with the *.dasc file(s).
*/
#if LJ_64
#ifdef _WIN64
#define SPS_FIXED (5*2)
#else
#define SPS_FIXED 2
#endif
#else
#define SPS_FIXED 6
#endif
/* This definition must match with the *.dasc file(s). */
SPS_FIXED = 6 /* Available fixed spill slots in interpreter frame. */
};
/* First spill slot for general use. Reserve one 64 bit slot. */
#define SPS_FIRST 2
/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. */
#define sps_scale(slot) (4 * (int32_t)(slot))
#define sps_adjust(slot) (sps_scale(((slot)-SPS_FIXED+3)&~3))
/* -- Exit state ---------------------------------------------------------- */
@ -241,6 +245,8 @@ typedef uint32_t x86Group;
#define XG_(i8, i, g) ((x86Group)(((i8) << 16) + ((i) << 8) + (g)))
#define XG_ARITHi(g) XG_(XI_ARITHi8, XI_ARITHi, g)
#define XG_TOXOi(xg) ((x86Op)(0x000000fe + (((xg)<<16) & 0xff000000)))
#define XG_TOXOi8(xg) ((x86Op)(0x000000fe + (((xg)<<8) & 0xff000000)))
#define XO_ARITH(a) ((x86Op)(0x030000fe + ((a)<<27)))