mirror of
https://github.com/LuaJIT/LuaJIT.git
synced 2025-02-07 15:14:08 +00:00
Major 32/64 bit cleanups in assembler and exit handling.
Add 64 bit lightuserdata handling. Keep the tagged 64 bit value. Allocate/save/restore 64 bit spill slots for 64 bit lightuserdata. Fix code generation for 64 bit loads/stores/moves/compares. Fix code generation for stack pointer adjustments. Add fixed spill slot definitions for x64. Reduce reserved spill slots. Disable STRREF + ADD fusion in 64 bit mode (avoid negative 32 bit ofs).
This commit is contained in:
parent
e46f4c8a11
commit
4c9f71be5d
218
src/lj_asm.c
218
src/lj_asm.c
@ -261,18 +261,16 @@ static void emit_rmrxo(ASMState *as, x86Op xo, Reg rr, Reg rb, Reg rx,
|
||||
static void emit_gri(ASMState *as, x86Group xg, Reg rb, int32_t i)
|
||||
{
|
||||
MCode *p = as->mcp;
|
||||
x86Op xo;
|
||||
if (checki8(i)) {
|
||||
p -= 3;
|
||||
p[2] = (MCode)i;
|
||||
p[0] = (MCode)(xg >> 16);
|
||||
*--p = (MCode)i;
|
||||
xo = XG_TOXOi8(xg);
|
||||
} else {
|
||||
p -= 6;
|
||||
*(int32_t *)(p+2) = i;
|
||||
p[0] = (MCode)(xg >> 8);
|
||||
p -= 4;
|
||||
*(int32_t *)p = i;
|
||||
xo = XG_TOXOi(xg);
|
||||
}
|
||||
p[1] = MODRM(XM_REG, xg, rb);
|
||||
REXRB(p, 0, rb);
|
||||
as->mcp = p;
|
||||
as->mcp = emit_opm(xo, XM_REG, (Reg)(xg & 7) | (rb & REX_64), rb, p, 0);
|
||||
}
|
||||
|
||||
/* op [base+ofs], i */
|
||||
@ -282,12 +280,12 @@ static void emit_gmroi(ASMState *as, x86Group xg, Reg rb, int32_t ofs,
|
||||
x86Op xo;
|
||||
if (checki8(i)) {
|
||||
emit_i8(as, i);
|
||||
xo = (x86Op)(((xg >> 16) << 24)+0xfe);
|
||||
xo = XG_TOXOi8(xg);
|
||||
} else {
|
||||
emit_i32(as, i);
|
||||
xo = (x86Op)(((xg >> 8) << 24)+0xfe);
|
||||
xo = XG_TOXOi(xg);
|
||||
}
|
||||
emit_rmro(as, xo, (Reg)xg, rb, ofs);
|
||||
emit_rmro(as, xo, (Reg)(xg & 7), rb, ofs);
|
||||
}
|
||||
|
||||
#define emit_shifti(as, xg, r, i) \
|
||||
@ -346,18 +344,6 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
|
||||
|
||||
/* -- Emit moves ---------------------------------------------------------- */
|
||||
|
||||
/* Generic move between two regs. */
|
||||
static void emit_movrr(ASMState *as, Reg r1, Reg r2)
|
||||
{
|
||||
emit_rr(as, r1 < RID_MAX_GPR ? XO_MOV : XMM_MOVRR(as), r1, r2);
|
||||
}
|
||||
|
||||
/* Generic move from [base+ofs]. */
|
||||
static void emit_movrmro(ASMState *as, Reg rr, Reg rb, int32_t ofs)
|
||||
{
|
||||
emit_rmro(as, rr < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), rr, rb, ofs);
|
||||
}
|
||||
|
||||
/* mov [base+ofs], i */
|
||||
static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i)
|
||||
{
|
||||
@ -623,7 +609,7 @@ static int32_t ra_spill(ASMState *as, IRIns *ir)
|
||||
{
|
||||
int32_t slot = ir->s;
|
||||
if (!ra_hasspill(slot)) {
|
||||
if (irt_isnum(ir->t)) {
|
||||
if (irt_isnum(ir->t) || (LJ_64 && irt_islightud(ir->t))) {
|
||||
slot = as->evenspill;
|
||||
as->evenspill += 2;
|
||||
} else if (as->oddspill) {
|
||||
@ -653,6 +639,16 @@ static Reg ra_releasetmp(ASMState *as, IRRef ref)
|
||||
return r;
|
||||
}
|
||||
|
||||
/* Use 64 bit operations to handle 64 bit lightuserdata. */
|
||||
#define REX_64LU(ir, r) \
|
||||
((r) | ((LJ_64 && irt_islightud((ir)->t)) ? REX_64 : 0))
|
||||
|
||||
/* Generic move between two regs. */
|
||||
static void ra_movrr(ASMState *as, IRIns *ir, Reg r1, Reg r2)
|
||||
{
|
||||
emit_rr(as, r1 < RID_MAX_GPR ? XO_MOV : XMM_MOVRR(as), REX_64LU(ir, r1), r2);
|
||||
}
|
||||
|
||||
/* Restore a register (marked as free). Rematerialize or force a spill. */
|
||||
static Reg ra_restore(ASMState *as, IRRef ref)
|
||||
{
|
||||
@ -668,7 +664,8 @@ static Reg ra_restore(ASMState *as, IRRef ref)
|
||||
if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */
|
||||
ra_modified(as, r);
|
||||
RA_DBGX((as, "restore $i $r", ir, r));
|
||||
emit_movrmro(as, r, RID_ESP, ofs);
|
||||
emit_rmro(as, r < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as),
|
||||
REX_64LU(ir, r), RID_ESP, ofs);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
@ -679,7 +676,7 @@ static void ra_save(ASMState *as, IRIns *ir, Reg r)
|
||||
{
|
||||
RA_DBGX((as, "save $i $r", ir, r));
|
||||
emit_rmro(as, r < RID_MAX_GPR ? XO_MOVto : XO_MOVSDto,
|
||||
r, RID_ESP, sps_scale(ir->s));
|
||||
REX_64LU(ir, r), RID_ESP, sps_scale(ir->s));
|
||||
}
|
||||
|
||||
#define MINCOST(r) \
|
||||
@ -822,7 +819,8 @@ static Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow)
|
||||
static void ra_rename(ASMState *as, Reg down, Reg up)
|
||||
{
|
||||
IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]);
|
||||
IR(ref)->r = (uint8_t)up;
|
||||
IRIns *ir = IR(ref);
|
||||
ir->r = (uint8_t)up;
|
||||
as->cost[down] = 0;
|
||||
lua_assert((down < RID_MAX_GPR) == (up < RID_MAX_GPR));
|
||||
lua_assert(!rset_test(as->freeset, down) && rset_test(as->freeset, up));
|
||||
@ -831,7 +829,7 @@ static void ra_rename(ASMState *as, Reg down, Reg up)
|
||||
rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */
|
||||
ra_noweak(as, up);
|
||||
RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up));
|
||||
emit_movrr(as, down, up); /* Backwards code generation needs inverse move. */
|
||||
ra_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */
|
||||
if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */
|
||||
lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno);
|
||||
ren = tref_ref(lj_ir_emit(as->J));
|
||||
@ -864,7 +862,7 @@ static void ra_destreg(ASMState *as, IRIns *ir, Reg r)
|
||||
Reg dest = ra_dest(as, ir, RID2RSET(r));
|
||||
if (dest != r) {
|
||||
ra_scratch(as, RID2RSET(r));
|
||||
emit_movrr(as, dest, r);
|
||||
ra_movrr(as, ir, dest, r);
|
||||
}
|
||||
}
|
||||
|
||||
@ -903,7 +901,7 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref)
|
||||
ra_modified(as, left);
|
||||
ra_rename(as, left, dest);
|
||||
} else {
|
||||
emit_movrr(as, dest, left);
|
||||
ra_movrr(as, ir, dest, left);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1201,7 +1199,8 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow)
|
||||
} else {
|
||||
Reg r;
|
||||
/* Fuse a constant add into the offset, e.g. string.sub(s, i+10). */
|
||||
if (mayfuse(as, ir->op2) && irr->o == IR_ADD && irref_isk(irr->op2)) {
|
||||
if (!LJ_64 && /* NYI: has bad effects with negative index on x64. */
|
||||
mayfuse(as, ir->op2) && irr->o == IR_ADD && irref_isk(irr->op2)) {
|
||||
as->mrm.ofs += IR(irr->op2)->i;
|
||||
r = ra_alloc1(as, irr->op1, allow);
|
||||
} else {
|
||||
@ -1325,7 +1324,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
|
||||
allow &= ~RID2RSET(r);
|
||||
if (ra_hasreg(ir->r)) {
|
||||
ra_noweak(as, ir->r);
|
||||
emit_movrr(as, r, ir->r);
|
||||
ra_movrr(as, ir, r, ir->r);
|
||||
} else {
|
||||
ra_allocref(as, args[n], RID2RSET(r));
|
||||
}
|
||||
@ -1358,7 +1357,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
|
||||
ra_evictset(as, drop); /* Evictions must be performed first. */
|
||||
if (ra_used(ir)) {
|
||||
if (irt_isnum(ir->t)) {
|
||||
int32_t ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */
|
||||
int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */
|
||||
#if LJ_64
|
||||
if ((ci->flags & CCI_CASTU64)) {
|
||||
Reg dest = ir->r;
|
||||
@ -1367,7 +1366,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
|
||||
ra_modified(as, dest);
|
||||
emit_rr(as, XO_MOVD, dest|REX_64, RID_RET); /* Really MOVQ. */
|
||||
} else {
|
||||
emit_movrmro(as, RID_RET, RID_ESP, ofs);
|
||||
emit_movtomro(as, RID_RET|REX_64, RID_ESP, ofs);
|
||||
}
|
||||
} else {
|
||||
ra_destreg(as, ir, RID_FPRET);
|
||||
@ -1493,8 +1492,8 @@ static void asm_strto(ASMState *as, IRIns *ir)
|
||||
args[0] = ir->op1;
|
||||
args[1] = ASMREF_TMP1;
|
||||
asm_gencall(as, ci, args);
|
||||
/* Store the result to the spill slot or slots SPS_TEMP1/2. */
|
||||
emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1),
|
||||
/* Store the result to the spill slot or temp slots. */
|
||||
emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1)|REX_64,
|
||||
RID_ESP, sps_scale(ir->s));
|
||||
}
|
||||
|
||||
@ -1509,7 +1508,7 @@ static void asm_tostr(ASMState *as, IRIns *ir)
|
||||
args[1] = ASMREF_TMP1;
|
||||
asm_setupresult(as, ir, ci);
|
||||
asm_gencall(as, ci, args);
|
||||
emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1),
|
||||
emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1)|REX_64,
|
||||
RID_ESP, ra_spill(as, irl));
|
||||
} else {
|
||||
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
|
||||
@ -1627,6 +1626,10 @@ static void asm_href(ASMState *as, IRIns *ir)
|
||||
emit_i8(as, ~IRT_NUM);
|
||||
emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it));
|
||||
}
|
||||
#if LJ_64
|
||||
} else if (irt_islightud(kt)) {
|
||||
emit_rmro(as, XO_CMP, key|REX_64, dest, offsetof(Node, key.u64));
|
||||
#endif
|
||||
} else {
|
||||
if (!irt_ispri(kt)) {
|
||||
lua_assert(irt_isaddr(kt));
|
||||
@ -1747,16 +1750,17 @@ static void asm_newref(ASMState *as, IRIns *ir)
|
||||
if (irref_isk(ir->op2))
|
||||
emit_loada(as, tmp, ir_knum(irkey));
|
||||
else
|
||||
emit_rmro(as, XO_LEA, tmp, RID_ESP, ra_spill(as, irkey));
|
||||
emit_rmro(as, XO_LEA, tmp|REX_64, RID_ESP, ra_spill(as, irkey));
|
||||
} else {
|
||||
/* Otherwise use g->tmptv to hold the TValue. */
|
||||
if (!irref_isk(ir->op2)) {
|
||||
Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp));
|
||||
emit_movtomro(as, src, tmp, 0);
|
||||
emit_movtomro(as, REX_64LU(irkey, src), tmp, 0);
|
||||
} else if (!irt_ispri(irkey->t)) {
|
||||
emit_movmroi(as, tmp, 0, irkey->i);
|
||||
}
|
||||
emit_movmroi(as, tmp, 4, irt_toitype(irkey->t));
|
||||
if (!(LJ_64 && irt_islightud(irkey->t)))
|
||||
emit_movmroi(as, tmp, 4, irt_toitype(irkey->t));
|
||||
emit_loada(as, tmp, &J2G(as->J)->tmptv);
|
||||
}
|
||||
}
|
||||
@ -1822,6 +1826,11 @@ static void asm_fxload(ASMState *as, IRIns *ir)
|
||||
case IRT_U8: xo = XO_MOVZXb; break;
|
||||
case IRT_I16: xo = XO_MOVSXw; break;
|
||||
case IRT_U16: xo = XO_MOVZXw; break;
|
||||
#if LJ_64
|
||||
case IRT_LIGHTUD:
|
||||
dest |= REX_64;
|
||||
/* fallthrough */
|
||||
#endif
|
||||
default:
|
||||
lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t));
|
||||
xo = XO_MOV;
|
||||
@ -1848,6 +1857,9 @@ static void asm_fstore(ASMState *as, IRIns *ir)
|
||||
switch (irt_type(ir->t)) {
|
||||
case IRT_I8: case IRT_U8: xo = XO_MOVtob; src |= FORCE_REX; break;
|
||||
case IRT_I16: case IRT_U16: xo = XO_MOVtow; break;
|
||||
#if LJ_64
|
||||
case IRT_LIGHTUD: lua_assert(0); /* NYI: mask 64 bit lightuserdata. */
|
||||
#endif
|
||||
default:
|
||||
lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t));
|
||||
xo = XO_MOVto;
|
||||
@ -1866,11 +1878,41 @@ static void asm_fstore(ASMState *as, IRIns *ir)
|
||||
}
|
||||
}
|
||||
|
||||
#if LJ_64
|
||||
static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
|
||||
{
|
||||
if (ra_used(ir) || typecheck) {
|
||||
Reg dest = ra_dest(as, ir, RSET_GPR);
|
||||
if (typecheck) {
|
||||
Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, dest));
|
||||
asm_guardcc(as, CC_NE);
|
||||
emit_i8(as, -2);
|
||||
emit_rr(as, XO_ARITHi8, XOg_CMP, tmp);
|
||||
emit_shifti(as, XOg_SAR|REX_64, tmp, 47);
|
||||
emit_rr(as, XO_MOV, tmp|REX_64, dest);
|
||||
}
|
||||
return dest;
|
||||
} else {
|
||||
return RID_NONE;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static void asm_ahuload(ASMState *as, IRIns *ir)
|
||||
{
|
||||
RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
|
||||
lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t));
|
||||
#if LJ_64
|
||||
if (irt_islightud(ir->t)) {
|
||||
Reg dest = asm_load_lightud64(as, ir, 1);
|
||||
if (ra_hasreg(dest)) {
|
||||
asm_fuseahuref(as, ir->op1, RSET_GPR);
|
||||
emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM);
|
||||
}
|
||||
return;
|
||||
} else
|
||||
#endif
|
||||
if (ra_used(ir)) {
|
||||
RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
|
||||
Reg dest = ra_dest(as, ir, allow);
|
||||
asm_fuseahuref(as, ir->op1, RSET_GPR);
|
||||
emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM);
|
||||
@ -1890,6 +1932,12 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
|
||||
Reg src = ra_alloc1(as, ir->op2, RSET_FPR);
|
||||
asm_fuseahuref(as, ir->op1, RSET_GPR);
|
||||
emit_mrm(as, XO_MOVSDto, src, RID_MRM);
|
||||
#if LJ_64
|
||||
} else if (irt_islightud(ir->t)) {
|
||||
Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
|
||||
asm_fuseahuref(as, ir->op1, rset_exclude(RSET_GPR, src));
|
||||
emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM);
|
||||
#endif
|
||||
} else {
|
||||
IRIns *irr = IR(ir->op2);
|
||||
RegSet allow = RSET_GPR;
|
||||
@ -1925,6 +1973,15 @@ static void asm_sload(ASMState *as, IRIns *ir)
|
||||
base = ra_alloc1(as, REF_BASE, RSET_GPR);
|
||||
emit_rmro(as, XMM_MOVRM(as), left, base, ofs);
|
||||
t.irt = IRT_NUM; /* Continue with a regular number type check. */
|
||||
#if LJ_64
|
||||
} else if (irt_islightud(t)) {
|
||||
Reg dest = asm_load_lightud64(as, ir, (ir->op2 & IRSLOAD_TYPECHECK));
|
||||
if (ra_hasreg(dest)) {
|
||||
base = ra_alloc1(as, REF_BASE, RSET_GPR);
|
||||
emit_rmro(as, XO_MOV, dest|REX_64, base, ofs);
|
||||
}
|
||||
return;
|
||||
#endif
|
||||
} else if (ra_used(ir)) {
|
||||
RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
|
||||
Reg dest = ra_dest(as, ir, allow);
|
||||
@ -1932,8 +1989,10 @@ static void asm_sload(ASMState *as, IRIns *ir)
|
||||
lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
|
||||
if (irt_isint(t))
|
||||
emit_rmro(as, XO_CVTSD2SI, dest, base, ofs);
|
||||
else if (irt_isnum(t))
|
||||
emit_rmro(as, XMM_MOVRM(as), dest, base, ofs);
|
||||
else
|
||||
emit_movrmro(as, dest, base, ofs);
|
||||
emit_rmro(as, XO_MOV, dest, base, ofs);
|
||||
} else {
|
||||
if (!(ir->op2 & IRSLOAD_TYPECHECK))
|
||||
return; /* No type check: avoid base alloc. */
|
||||
@ -2117,7 +2176,7 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
|
||||
} else if (fpm == IRFPM_EXP2 && fpmjoin_pow(as, ir)) {
|
||||
/* Rejoined to pow(). */
|
||||
} else { /* Handle x87 ops. */
|
||||
int32_t ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */
|
||||
int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */
|
||||
Reg dest = ir->r;
|
||||
if (ra_hasreg(dest)) {
|
||||
ra_free(as, dest);
|
||||
@ -2521,6 +2580,10 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
|
||||
Reg left = ra_alloc1(as, lref, RSET_GPR);
|
||||
Reg right = asm_fuseload(as, rref, rset_exclude(RSET_GPR, left));
|
||||
asm_guardcc(as, cc);
|
||||
#if LJ_64
|
||||
if (irt_islightud(ir->t))
|
||||
left |= REX_64;
|
||||
#endif
|
||||
emit_mrm(as, XO_CMP, left, right);
|
||||
}
|
||||
}
|
||||
@ -2563,7 +2626,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
|
||||
Reg r = allow ? rset_pickbot(allow) : RID_EAX;
|
||||
emit_jcc(as, CC_B, exitstub_addr(as->J, exitno));
|
||||
if (allow == RSET_EMPTY) /* Restore temp. register. */
|
||||
emit_rmro(as, XO_MOV, r, RID_ESP, sps_scale(SPS_TEMP1));
|
||||
emit_rmro(as, XO_MOV, r|REX_64, RID_ESP, 0);
|
||||
else
|
||||
ra_modified(as, r);
|
||||
emit_gri(as, XG_ARITHi(XOg_CMP), r, (int32_t)(8*topslot));
|
||||
@ -2575,7 +2638,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
|
||||
emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack));
|
||||
emit_getgl(as, r, jit_L);
|
||||
if (allow == RSET_EMPTY) /* Spill temp. register. */
|
||||
emit_rmro(as, XO_MOVto, r, RID_ESP, sps_scale(SPS_TEMP1));
|
||||
emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0);
|
||||
}
|
||||
|
||||
/* Restore Lua stack from on-trace state. */
|
||||
@ -2600,14 +2663,17 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
|
||||
lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t));
|
||||
if (!irref_isk(ref)) {
|
||||
Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
|
||||
emit_movtomro(as, src, RID_BASE, ofs);
|
||||
emit_movtomro(as, REX_64LU(ir, src), RID_BASE, ofs);
|
||||
} else if (!irt_ispri(ir->t)) {
|
||||
emit_movmroi(as, RID_BASE, ofs, ir->i);
|
||||
}
|
||||
if (!(sn & (SNAP_CONT|SNAP_FRAME)))
|
||||
emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t));
|
||||
else if (s != 0) /* Do not overwrite link to previous frame. */
|
||||
emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--));
|
||||
if ((sn & (SNAP_CONT|SNAP_FRAME))) {
|
||||
if (s != 0) /* Do not overwrite link to previous frame. */
|
||||
emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--));
|
||||
} else {
|
||||
if (!(LJ_64 && irt_islightud(ir->t)))
|
||||
emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t));
|
||||
}
|
||||
}
|
||||
checkmclim(as);
|
||||
}
|
||||
@ -2668,7 +2734,7 @@ static void asm_gc_check(ASMState *as, SnapShot *snap)
|
||||
(int32_t)as->T->snapmap[snap->mapofs+snap->nent]);
|
||||
emit_gri(as, XG_ARITHi(XOg_AND), tmp, CFRAME_RAWMASK);
|
||||
lstate = IR(ASMREF_L)->r;
|
||||
emit_movrmro(as, tmp, lstate, offsetof(lua_State, cframe));
|
||||
emit_rmro(as, XO_MOV, tmp, lstate, offsetof(lua_State, cframe));
|
||||
/* It's ok if lstate is already in a non-scratch reg. But all allocations
|
||||
** in the non-fast path must use a scratch reg. See comment above.
|
||||
*/
|
||||
@ -2830,7 +2896,7 @@ static void asm_phi(ASMState *as, IRIns *ir)
|
||||
r = ra_allocref(as, ir->op2, allow);
|
||||
} else { /* Duplicate right PHI, need a copy (rare). */
|
||||
r = ra_scratch(as, allow);
|
||||
emit_movrr(as, r, irr->r);
|
||||
ra_movrr(as, irr, r, irr->r);
|
||||
}
|
||||
ir->r = (uint8_t)r;
|
||||
rset_set(as->phiset, r);
|
||||
@ -2912,6 +2978,14 @@ static void asm_loop(ASMState *as)
|
||||
|
||||
/* -- Head of trace ------------------------------------------------------- */
|
||||
|
||||
/* Calculate stack adjustment. */
|
||||
static int32_t asm_stack_adjust(ASMState *as)
|
||||
{
|
||||
if (as->evenspill <= SPS_FIXED)
|
||||
return 0;
|
||||
return sps_scale((as->evenspill - SPS_FIXED + 3) & ~3);
|
||||
}
|
||||
|
||||
/* Coalesce BASE register for a root trace. */
|
||||
static void asm_head_root_base(ASMState *as)
|
||||
{
|
||||
@ -2932,9 +3006,9 @@ static void asm_head_root(ASMState *as)
|
||||
int32_t spadj;
|
||||
asm_head_root_base(as);
|
||||
emit_setgli(as, vmstate, (int32_t)as->J->curtrace);
|
||||
spadj = sps_adjust(as->evenspill);
|
||||
spadj = asm_stack_adjust(as);
|
||||
as->T->spadjust = (uint16_t)spadj;
|
||||
emit_addptr(as, RID_ESP, -spadj);
|
||||
emit_addptr(as, RID_ESP|REX_64, -spadj);
|
||||
/* Root traces assume a checked stack for the starting proto. */
|
||||
as->T->topslot = gcref(as->T->startpt)->pt.framesize;
|
||||
}
|
||||
@ -3007,7 +3081,7 @@ static void asm_head_side(ASMState *as)
|
||||
}
|
||||
|
||||
/* Calculate stack frame adjustment. */
|
||||
spadj = sps_adjust(as->evenspill);
|
||||
spadj = asm_stack_adjust(as);
|
||||
spdelta = spadj - (int32_t)as->parent->spadjust;
|
||||
if (spdelta < 0) { /* Don't shrink the stack frame. */
|
||||
spadj = (int32_t)as->parent->spadjust;
|
||||
@ -3048,7 +3122,7 @@ static void asm_head_side(ASMState *as)
|
||||
|
||||
/* Store trace number and adjust stack frame relative to the parent. */
|
||||
emit_setgli(as, vmstate, (int32_t)as->J->curtrace);
|
||||
emit_addptr(as, RID_ESP, -spdelta);
|
||||
emit_addptr(as, RID_ESP|REX_64, -spdelta);
|
||||
|
||||
/* Restore target registers from parent spill slots. */
|
||||
if (pass3) {
|
||||
@ -3061,7 +3135,8 @@ static void asm_head_side(ASMState *as)
|
||||
if (ra_hasspill(regsp_spill(rs))) {
|
||||
int32_t ofs = sps_scale(regsp_spill(rs));
|
||||
ra_free(as, r);
|
||||
emit_movrmro(as, r, RID_ESP, ofs);
|
||||
emit_rmro(as, r < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as),
|
||||
REX_64LU(ir, r), RID_ESP, ofs);
|
||||
checkmclim(as);
|
||||
}
|
||||
}
|
||||
@ -3078,7 +3153,7 @@ static void asm_head_side(ASMState *as)
|
||||
rset_clear(live, rp);
|
||||
rset_clear(allow, rp);
|
||||
ra_free(as, ir->r);
|
||||
emit_movrr(as, ir->r, rp);
|
||||
ra_movrr(as, ir, ir->r, rp);
|
||||
checkmclim(as);
|
||||
}
|
||||
|
||||
@ -3150,7 +3225,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
|
||||
MCode *target, *q;
|
||||
int32_t spadj = as->T->spadjust;
|
||||
if (spadj == 0) {
|
||||
p -= (as->flags & JIT_F_LEA_AGU) ? 7 : 6;
|
||||
p -= ((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0);
|
||||
} else {
|
||||
MCode *p1;
|
||||
/* Patch stack adjustment. */
|
||||
@ -3163,10 +3238,16 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
|
||||
*(int32_t *)p1 = spadj;
|
||||
}
|
||||
if ((as->flags & JIT_F_LEA_AGU)) {
|
||||
#if LJ_64
|
||||
p1[-4] = 0x48;
|
||||
#endif
|
||||
p1[-3] = (MCode)XI_LEA;
|
||||
p1[-2] = MODRM(checki8(spadj) ? XM_OFS8 : XM_OFS32, RID_ESP, RID_ESP);
|
||||
p1[-1] = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
|
||||
} else {
|
||||
#if LJ_64
|
||||
p1[-3] = 0x48;
|
||||
#endif
|
||||
p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi);
|
||||
p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP);
|
||||
}
|
||||
@ -3365,12 +3446,13 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
|
||||
break;
|
||||
case IR_CALLN: case IR_CALLL: case IR_CALLS: {
|
||||
const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
|
||||
#if LJ_64
|
||||
/* NYI: add stack slots for calls with more than 4/6 args. */
|
||||
ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET);
|
||||
#else
|
||||
/* NYI: not fastcall-aware, but doesn't matter (yet). */
|
||||
if (CCI_NARGS(ci) > (uint32_t)as->evenspill) /* Leave room for args. */
|
||||
as->evenspill = (int32_t)CCI_NARGS(ci);
|
||||
#if LJ_64
|
||||
ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET);
|
||||
#else
|
||||
ir->prev = REGSP_HINT(RID_RET);
|
||||
#endif
|
||||
if (inloop)
|
||||
@ -3379,8 +3461,12 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
|
||||
continue;
|
||||
}
|
||||
/* C calls evict all scratch regs and return results in RID_RET. */
|
||||
case IR_SNEW: case IR_TNEW: case IR_TDUP: case IR_TOSTR:
|
||||
case IR_NEWREF:
|
||||
case IR_SNEW: case IR_NEWREF:
|
||||
#if !LJ_64
|
||||
if (as->evenspill < 3) /* lj_str_new and lj_tab_newkey need 3 args. */
|
||||
as->evenspill = 3;
|
||||
#endif
|
||||
case IR_TNEW: case IR_TDUP: case IR_TOSTR:
|
||||
ir->prev = REGSP_HINT(RID_RET);
|
||||
if (inloop)
|
||||
as->modset = RSET_SCRATCH;
|
||||
@ -3500,7 +3586,7 @@ void lj_asm_trace(jit_State *J, Trace *T)
|
||||
|
||||
if (!as->loopref) {
|
||||
/* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */
|
||||
as->mcp -= (as->flags & JIT_F_LEA_AGU) ? 7 : 6;
|
||||
as->mcp -= (as->flags & JIT_F_LEA_AGU) ? 7 : 6 + (LJ_64 ? 1 : 0);
|
||||
as->invmcp = NULL;
|
||||
asm_tail_link(as);
|
||||
}
|
||||
|
18
src/lj_ir.h
18
src/lj_ir.h
@ -362,6 +362,7 @@ typedef struct IRType1 { uint8_t irt; } IRType1;
|
||||
|
||||
#define irt_isnil(t) (irt_type(t) == IRT_NIL)
|
||||
#define irt_ispri(t) ((uint32_t)irt_type(t) <= IRT_TRUE)
|
||||
#define irt_islightud(t) (irt_type(t) == IRT_LIGHTUD)
|
||||
#define irt_isstr(t) (irt_type(t) == IRT_STR)
|
||||
#define irt_isfunc(t) (irt_type(t) == IRT_FUNC)
|
||||
#define irt_istab(t) (irt_type(t) == IRT_TAB)
|
||||
@ -376,9 +377,20 @@ typedef struct IRType1 { uint8_t irt; } IRType1;
|
||||
#define irt_isgcv(t) (irt_typerange((t), IRT_STR, IRT_UDATA))
|
||||
#define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA))
|
||||
|
||||
#define itype2irt(tv) \
|
||||
(~uitype(tv) < IRT_NUM ? cast(IRType, ~uitype(tv)) : IRT_NUM)
|
||||
#define irt_toitype(t) ((int32_t)~(uint32_t)irt_type(t))
|
||||
static LJ_AINLINE IRType itype2irt(const TValue *tv)
|
||||
{
|
||||
if (tvisnum(tv))
|
||||
return IRT_NUM;
|
||||
#if LJ_64
|
||||
else if (tvislightud(tv))
|
||||
return IRT_LIGHTUD;
|
||||
#endif
|
||||
else
|
||||
return cast(IRType, ~uitype(tv));
|
||||
}
|
||||
|
||||
#define irt_toitype(t) \
|
||||
check_exp(!(LJ_64 && irt_islightud((t))), (int32_t)~(uint32_t)irt_type((t)))
|
||||
|
||||
#define irt_isguard(t) ((t).irt & IRT_GUARD)
|
||||
#define irt_ismarked(t) ((t).irt & IRT_MARK)
|
||||
|
@ -279,6 +279,11 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
|
||||
setintV(o, *sps);
|
||||
} else if (irt_isnum(t)) {
|
||||
o->u64 = *(uint64_t *)sps;
|
||||
#if LJ_64
|
||||
} else if (irt_islightud(t)) {
|
||||
/* 64 bit lightuserdata which may escape already has the tag bits. */
|
||||
o->u64 = *(uint64_t *)sps;
|
||||
#endif
|
||||
} else {
|
||||
lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */
|
||||
setgcrefi(o->gcr, *sps);
|
||||
@ -291,6 +296,11 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
|
||||
setintV(o, ex->gpr[r-RID_MIN_GPR]);
|
||||
} else if (irt_isnum(t)) {
|
||||
setnumV(o, ex->fpr[r-RID_MIN_FPR]);
|
||||
#if LJ_64
|
||||
} else if (irt_islightud(t)) {
|
||||
/* 64 bit lightuserdata which may escape already has the tag bits. */
|
||||
o->u64 = ex->gpr[r-RID_MIN_GPR];
|
||||
#endif
|
||||
} else {
|
||||
if (!irt_ispri(t))
|
||||
setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]);
|
||||
|
@ -96,20 +96,24 @@ enum {
|
||||
|
||||
/* -- Spill slots --------------------------------------------------------- */
|
||||
|
||||
/* Stack layout for the compiled machine code (after stack adjustment). */
|
||||
enum {
|
||||
SPS_TEMP1, /* Temps (3*dword) for calls and asm_x87load. */
|
||||
SPS_TEMP2,
|
||||
SPS_TEMP3,
|
||||
SPS_FIRST, /* First spill slot for general use. */
|
||||
/* Available fixed spill slots in interpreter frame.
|
||||
** This definition must match with the *.dasc file(s).
|
||||
*/
|
||||
#if LJ_64
|
||||
#ifdef _WIN64
|
||||
#define SPS_FIXED (5*2)
|
||||
#else
|
||||
#define SPS_FIXED 2
|
||||
#endif
|
||||
#else
|
||||
#define SPS_FIXED 6
|
||||
#endif
|
||||
|
||||
/* This definition must match with the *.dasc file(s). */
|
||||
SPS_FIXED = 6 /* Available fixed spill slots in interpreter frame. */
|
||||
};
|
||||
/* First spill slot for general use. Reserve one 64 bit slot. */
|
||||
#define SPS_FIRST 2
|
||||
|
||||
/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. */
|
||||
#define sps_scale(slot) (4 * (int32_t)(slot))
|
||||
#define sps_adjust(slot) (sps_scale(((slot)-SPS_FIXED+3)&~3))
|
||||
|
||||
/* -- Exit state ---------------------------------------------------------- */
|
||||
|
||||
@ -241,6 +245,8 @@ typedef uint32_t x86Group;
|
||||
|
||||
#define XG_(i8, i, g) ((x86Group)(((i8) << 16) + ((i) << 8) + (g)))
|
||||
#define XG_ARITHi(g) XG_(XI_ARITHi8, XI_ARITHi, g)
|
||||
#define XG_TOXOi(xg) ((x86Op)(0x000000fe + (((xg)<<16) & 0xff000000)))
|
||||
#define XG_TOXOi8(xg) ((x86Op)(0x000000fe + (((xg)<<8) & 0xff000000)))
|
||||
|
||||
#define XO_ARITH(a) ((x86Op)(0x030000fe + ((a)<<27)))
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user