LJ_FR2: Improve trace recording and snapshots.

The interesting changes here revolve around slots marked as TREF_FRAME /
TREF_CONT. Under !LJ_FR2, said slots contain two 32-bit values, and the
TRef for the slot primarily relates to the low 32 bits. In a snapshot, the
main SnapEntry relates to the low 32 bits, and the framelink from the
snapshot is used to restore the high 32 bits. Under LJ_FR2, TREF_FRAME /
TREF_CONT slots contain a single 64-bit value. The TRef relates to all 64
bits, the SnapEntry is used to restore all 64 bits, and no framelinks are
required to restore the slot. Restoration is done via IR_KNUM constants,
as the 64-bit values in question can be happily interpreted as denormal
numbers. These constants are created lazily: the slots in question get set
to just TREF_FRAME / TREF_CONT initially, and then if required for a
snapshot, the ref part of the TRef is changed from zero to the index of a
KNUM. Slot 1 is always zero, as although it is technically a frame link,
it never needs to be changed or saved or restored.

Though the framelink part of a snapshot isn't required for slot
restoration under LJ_FR2, it is still used for restoring PC. As such,
every snapshot has exactly two framelink entries, which are used to store
a 64-bit value.

Manipulations of J->maxslot are more interesting under LJ_FR2. For
example, the BC_MOV of a method call can introduce a three-slot gap under
LJ_FR2, whereas it could only introduce a one-slot gap under !LJ_FR2.
Other instructions can now introduce a one-slot gap where previously they
wouldn't ever introduce a gap.
This commit is contained in:
Peter Cawley 2016-03-26 15:37:38 +00:00
parent a1bbfd7f18
commit 79021951e5
11 changed files with 298 additions and 131 deletions

View File

@ -310,15 +310,17 @@ local function fmtfunc(func, pc)
end
end
local function formatk(tr, idx)
local function formatk(tr, idx, sn)
local k, t, slot = tracek(tr, idx)
local tn = type(k)
local s
if tn == "number" then
if k == 2^52+2^51 then
if band(sn or 0, 0x30000) ~= 0 then
s = band(sn, 0x20000) ~= 0 and "contpc" or "ftsz"
elseif k == 2^52+2^51 then
s = "bias"
else
s = format("%+.14g", k)
s = format(0 < k and k < 0x1p-1026 and "%+a" or "%+.14g", k)
end
elseif tn == "string" then
s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub))
@ -354,7 +356,7 @@ local function printsnap(tr, snap)
n = n + 1
local ref = band(sn, 0xffff) - 0x8000 -- REF_BIAS
if ref < 0 then
out:write(formatk(tr, ref))
out:write(formatk(tr, ref, sn))
elseif band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM
out:write(colorize(format("%04d/%04d", ref, ref+1), 14))
else

View File

@ -453,7 +453,7 @@
#endif
/* Disable or enable the JIT compiler. */
#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) || LJ_FR2 || LJ_GC64
#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) || LJ_GC64
#define LJ_HASJIT 0
#else
#define LJ_HASJIT 1

View File

@ -1893,7 +1893,7 @@ static BCReg asm_baseslot(ASMState *as, SnapShot *snap, int *gotframe)
SnapEntry sn = map[n-1];
if ((sn & SNAP_FRAME)) {
*gotframe = 1;
return snap_slot(sn);
return snap_slot(sn) - LJ_FR2;
}
}
return 0;
@ -1913,8 +1913,15 @@ static void asm_tail_link(ASMState *as)
if (as->T->link == 0) {
/* Setup fixed registers for exit to interpreter. */
const BCIns *pc = snap_pc(as->T->snapmap[snap->mapofs + snap->nent]);
int32_t mres;
#if LJ_FR2
uint64_t pcbase;
const BCIns *pc;
memcpy(&pcbase, &as->T->snapmap[snap->mapofs + snap->nent], 8);
pc = (const BCIns *)(pcbase >> 8);
#else
const BCIns *pc = snap_pc(as->T->snapmap[snap->mapofs + snap->nent]);
#endif
if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */
BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins;
if (bc_isret(bc_op(*retpc)))
@ -1922,7 +1929,7 @@ static void asm_tail_link(ASMState *as)
}
ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH);
ra_allockreg(as, i32ptr(pc), RID_LPC);
mres = (int32_t)(snap->nslots - baseslot);
mres = (int32_t)(snap->nslots - baseslot - LJ_FR2);
switch (bc_op(*pc)) {
case BC_CALLM: case BC_CALLMT:
mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break;

View File

@ -348,7 +348,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) &&
noconflict(as, ref, IR_RETF, 0)) {
as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow);
as->mrm.ofs = 8*((int32_t)ir->op1-1) + ((ir->op2&IRSLOAD_FRAME)?4:0);
as->mrm.ofs = 8*((int32_t)ir->op1-1) + (!LJ_FR2&&(ir->op2&IRSLOAD_FRAME)?4:0);
as->mrm.idx = RID_NONE;
return RID_MRM;
}
@ -655,6 +655,9 @@ static void asm_callx(ASMState *as, IRIns *ir)
static void asm_retf(ASMState *as, IRIns *ir)
{
Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
#if LJ_FR2
Reg rpc = ra_scratch(as, rset_exclude(RSET_GPR, base));
#endif
void *pc = ir_kptr(IR(ir->op2));
int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
as->topslot -= (BCReg)delta;
@ -663,7 +666,12 @@ static void asm_retf(ASMState *as, IRIns *ir)
emit_setgl(as, base, jit_base);
emit_addptr(as, base, -8*delta);
asm_guardcc(as, CC_NE);
#if LJ_FR2
emit_rmro(as, XO_CMP, rpc, base, -8);
emit_loadu64(as, rpc, u64ptr(pc));
#else
emit_gmroi(as, XG_ARITHi(XOg_CMP), base, -4, ptr2addr(pc));
#endif
}
/* -- Type conversions ---------------------------------------------------- */
@ -1400,7 +1408,11 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
static void asm_sload(ASMState *as, IRIns *ir)
{
int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
#if LJ_FR2
int32_t ofs = 8*((int32_t)ir->op1-2);
#else
int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2&IRSLOAD_FRAME)?4:0);
#endif
IRType1 t = ir->t;
Reg base;
lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
@ -2386,13 +2398,13 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
static void asm_stack_restore(ASMState *as, SnapShot *snap)
{
SnapEntry *map = &as->T->snapmap[snap->mapofs];
SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1];
SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2];
MSize n, nent = snap->nent;
/* Store the value of all modified slots to the Lua stack. */
for (n = 0; n < nent; n++) {
SnapEntry sn = map[n];
BCReg s = snap_slot(sn);
int32_t ofs = 8*((int32_t)s-1);
int32_t ofs = 8*((int32_t)s-1-LJ_FR2);
IRRef ref = snap_ref(sn);
IRIns *ir = IR(ref);
if ((sn & SNAP_NORESTORE))
@ -2410,8 +2422,10 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
emit_movmroi(as, RID_BASE, ofs, ir->i);
}
if ((sn & (SNAP_CONT|SNAP_FRAME))) {
#if !LJ_FR2
if (s != 0) /* Do not overwrite link to previous frame. */
emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--));
#endif
} else {
if (!(LJ_64 && irt_islightud(ir->t)))
emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t));

View File

@ -721,7 +721,11 @@ static void crec_index_meta(jit_State *J, CTState *cts, CType *ct,
if (!tv)
lj_trace_err(J, LJ_TRERR_BADTYPE);
if (tvisfunc(tv)) {
J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME;
J->base[-1-LJ_FR2] = lj_ir_kfunc(J, funcV(tv));
#if LJ_FR2
J->base[-1] = 0;
#endif
J->base[-1] |= TREF_FRAME;
rd->nres = -1; /* Pending tailcall. */
} else if (rd->data == 0 && tvistab(tv) && tref_isstr(J->base[1])) {
/* Specialize to result of __index lookup. */
@ -1119,20 +1123,20 @@ static void crec_snap_caller(jit_State *J)
lua_State *L = J->L;
TValue *base = L->base, *top = L->top;
const BCIns *pc = J->pc;
TRef ftr = J->base[-1];
TRef ftr = J->base[-1-LJ_FR2];
ptrdiff_t delta;
if (!frame_islua(base-1) || J->framedepth <= 0)
lj_trace_err(J, LJ_TRERR_NYICALL);
J->pc = frame_pc(base-1); delta = 1+LJ_FR2+bc_a(J->pc[-1]);
L->top = base; L->base = base - delta;
J->base[-1] = TREF_FALSE;
J->base[-1-LJ_FR2] = TREF_FALSE;
J->base -= delta; J->baseslot -= (BCReg)delta;
J->maxslot = (BCReg)delta; J->framedepth--;
J->maxslot = (BCReg)delta-LJ_FR2; J->framedepth--;
lj_snap_add(J);
L->base = base; L->top = top;
J->framedepth++; J->maxslot = 1;
J->base += delta; J->baseslot += (BCReg)delta;
J->base[-1] = ftr; J->pc = pc;
J->base[-1-LJ_FR2] = ftr; J->pc = pc;
}
/* Record function call. */
@ -1224,7 +1228,11 @@ void LJ_FASTCALL recff_cdata_call(jit_State *J, RecordFFData *rd)
tv = lj_ctype_meta(cts, ctype_isptr(ct->info) ? ctype_cid(ct->info) : id, mm);
if (tv) {
if (tvisfunc(tv)) {
J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME;
J->base[-1-LJ_FR2] = lj_ir_kfunc(J, funcV(tv));
#if LJ_FR2
J->base[-1] = 0;
#endif
J->base[-1] |= TREF_FRAME;
rd->nres = -1; /* Pending tailcall. */
return;
}
@ -1373,7 +1381,11 @@ static TRef crec_arith_meta(jit_State *J, TRef *sp, CType **s, CTState *cts,
}
if (tv) {
if (tvisfunc(tv)) {
J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME;
J->base[-1-LJ_FR2] = lj_ir_kfunc(J, funcV(tv));
#if LJ_FR2
J->base[-1] = 0;
#endif
J->base[-1] |= TREF_FRAME;
rd->nres = -1; /* Pending tailcall. */
return 0;
} /* NYI: non-function metamethods. */

View File

@ -95,6 +95,8 @@ typedef unsigned int uintptr_t;
#define U64x(hi, lo) (((uint64_t)0x##hi << 32) + (uint64_t)0x##lo)
#define i32ptr(p) ((int32_t)(intptr_t)(void *)(p))
#define u32ptr(p) ((uint32_t)(intptr_t)(void *)(p))
#define i64ptr(p) ((int64_t)(intptr_t)(void *)(p))
#define u64ptr(p) ((uint64_t)(intptr_t)(void *)(p))
#define checki8(x) ((x) == (int32_t)(int8_t)(x))
#define checku8(x) ((x) == (int32_t)(uint8_t)(x))

View File

@ -102,34 +102,41 @@ static void recff_stitch(jit_State *J)
ASMFunction cont = lj_cont_stitch;
lua_State *L = J->L;
TValue *base = L->base;
BCReg nslot = J->maxslot + 1 + LJ_FR2;
TValue *nframe = base + 1 + LJ_FR2;
const BCIns *pc = frame_pc(base-1);
TValue *pframe = frame_prevl(base-1);
lua_assert(!LJ_FR2); /* TODO_FR2: handle frame shift. */
/* Move func + args up in Lua stack and insert continuation. */
memmove(&base[1], &base[-1], sizeof(TValue)*(J->maxslot+1));
setframe_ftsz(base+1, ((char *)(base+1) - (char *)pframe) + FRAME_CONT);
setcont(base, cont);
memmove(&base[1], &base[-1-LJ_FR2], sizeof(TValue)*nslot);
setframe_ftsz(nframe, ((char *)nframe - (char *)pframe) + FRAME_CONT);
setcont(base-LJ_FR2, cont);
setframe_pc(base, pc);
setnilV(base-1); /* Incorrect, but rec_check_slots() won't run anymore. */
L->base += 2;
L->top += 2;
setnilV(base-1-LJ_FR2); /* Incorrect, but rec_check_slots() won't run. */
L->base += 2 + LJ_FR2;
L->top += 2 + LJ_FR2;
/* Ditto for the IR. */
memmove(&J->base[1], &J->base[-1], sizeof(TRef)*(J->maxslot+1));
memmove(&J->base[1], &J->base[-1-LJ_FR2], sizeof(TRef)*nslot);
#if LJ_FR2
J->base[2] = TREF_FRAME;
J->base[-1] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont)));
J->base[0] = lj_ir_k64(J, IR_KNUM, u64ptr(pc)) | TREF_CONT;
#else
J->base[0] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT;
J->ktrace = tref_ref((J->base[-1] = lj_ir_ktrace(J)));
J->base += 2;
J->baseslot += 2;
#endif
J->ktrace = tref_ref((J->base[-1-LJ_FR2] = lj_ir_ktrace(J)));
J->base += 2 + LJ_FR2;
J->baseslot += 2 + LJ_FR2;
J->framedepth++;
lj_record_stop(J, LJ_TRLINK_STITCH, 0);
/* Undo Lua stack changes. */
memmove(&base[-1], &base[1], sizeof(TValue)*(J->maxslot+1));
memmove(&base[-1-LJ_FR2], &base[1], sizeof(TValue)*nslot);
setframe_pc(base-1, pc);
L->base -= 2;
L->top -= 2;
L->base -= 2 + LJ_FR2;
L->top -= 2 + LJ_FR2;
}
/* Fallback handler for fast functions that are not recorded (yet). */
@ -372,10 +379,10 @@ static int recff_metacall(jit_State *J, RecordFFData *rd, MMS mm)
int errcode;
TValue argv0;
/* Temporarily insert metamethod below object. */
J->base[1] = J->base[0];
J->base[1+LJ_FR2] = J->base[0];
J->base[0] = ix.mobj;
copyTV(J->L, &argv0, &rd->argv[0]);
copyTV(J->L, &rd->argv[1], &rd->argv[0]);
copyTV(J->L, &rd->argv[1+LJ_FR2], &rd->argv[0]);
copyTV(J->L, &rd->argv[0], &ix.mobjv);
/* Need to protect lj_record_tailcall because it may throw. */
errcode = lj_vm_cpcall(J->L, NULL, J, recff_metacall_cp);
@ -442,6 +449,10 @@ static void LJ_FASTCALL recff_xpairs(jit_State *J, RecordFFData *rd)
static void LJ_FASTCALL recff_pcall(jit_State *J, RecordFFData *rd)
{
if (J->maxslot >= 1) {
#if LJ_FR2
/* Shift function arguments up. */
memmove(J->base + 1, J->base, sizeof(TRef) * J->maxslot);
#endif
lj_record_call(J, 0, J->maxslot - 1);
rd->nres = -1; /* Pending call. */
} /* else: Interpreter will throw. */
@ -461,13 +472,16 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd)
TValue argv0, argv1;
TRef tmp;
int errcode;
lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
/* Swap function and traceback. */
tmp = J->base[0]; J->base[0] = J->base[1]; J->base[1] = tmp;
copyTV(J->L, &argv0, &rd->argv[0]);
copyTV(J->L, &argv1, &rd->argv[1]);
copyTV(J->L, &rd->argv[0], &argv1);
copyTV(J->L, &rd->argv[1], &argv0);
#if LJ_FR2
/* Shift function arguments up. */
memmove(J->base + 2, J->base + 1, sizeof(TRef) * (J->maxslot-1));
#endif
/* Need to protect lj_record_call because it may throw. */
errcode = lj_vm_cpcall(J->L, NULL, J, recff_xpcall_cp);
/* Always undo Lua stack swap to avoid confusing the interpreter. */

View File

@ -220,7 +220,7 @@ IRFLDEF(FLENUM)
/* SLOAD mode bits, stored in op2. */
#define IRSLOAD_PARENT 0x01 /* Coalesce with parent trace. */
#define IRSLOAD_FRAME 0x02 /* Load hiword of frame. */
#define IRSLOAD_FRAME 0x02 /* Load 32 bits of ftsz. */
#define IRSLOAD_TYPECHECK 0x04 /* Needs type check. */
#define IRSLOAD_CONVERT 0x08 /* Number to integer conversion. */
#define IRSLOAD_READONLY 0x10 /* Read-only, omit slot store. */

View File

@ -179,12 +179,16 @@ LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT);
#define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref))
#define SNAP_TR(slot, tr) \
(((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK)))
#if !LJ_FR2
#define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc))
#endif
#define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz))
#define snap_ref(sn) ((sn) & 0xffff)
#define snap_slot(sn) ((BCReg)((sn) >> 24))
#define snap_isframe(sn) ((sn) & SNAP_FRAME)
#if !LJ_FR2
#define snap_pc(sn) ((const BCIns *)(uintptr_t)(sn))
#endif
#define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref))
/* Snapshot and exit numbers. */

View File

@ -87,8 +87,8 @@ static void rec_check_slots(jit_State *J)
BCReg s, nslots = J->baseslot + J->maxslot;
int32_t depth = 0;
cTValue *base = J->L->base - J->baseslot;
lua_assert(J->baseslot >= 1 && J->baseslot < LJ_MAX_JSLOTS);
lua_assert(J->baseslot == 1 || (J->slot[J->baseslot-1] & TREF_FRAME));
lua_assert(J->baseslot >= 1+LJ_FR2 && J->baseslot < LJ_MAX_JSLOTS);
lua_assert(J->baseslot == 1+LJ_FR2 || (J->slot[J->baseslot-1] & TREF_FRAME));
lua_assert(nslots < LJ_MAX_JSLOTS);
for (s = 0; s < nslots; s++) {
TRef tr = J->slot[s];
@ -96,21 +96,39 @@ static void rec_check_slots(jit_State *J)
cTValue *tv = &base[s];
IRRef ref = tref_ref(tr);
IRIns *ir;
if (!LJ_FR2 || ref || !(tr & (TREF_FRAME | TREF_CONT))) {
lua_assert(ref >= J->cur.nk && ref < J->cur.nins);
ir = IR(ref);
lua_assert(irt_t(ir->t) == tref_t(tr));
}
if (s == 0) {
lua_assert(tref_isfunc(tr));
#if LJ_FR2
} else if (s == 1) {
lua_assert(0);
#endif
} else if ((tr & TREF_FRAME)) {
GCfunc *fn = gco2func(frame_gc(tv));
BCReg delta = (BCReg)(tv - frame_prev(tv));
#if LJ_FR2
if (ref)
lua_assert(ir_knum(ir)->u64 == tv->u64);
tr = J->slot[s-1];
ir = IR(tref_ref(tr));
#endif
lua_assert(tref_isfunc(tr));
if (tref_isk(tr)) lua_assert(fn == ir_kfunc(ir));
lua_assert(s > delta ? (J->slot[s-delta] & TREF_FRAME) : (s == delta));
lua_assert(s > delta + LJ_FR2 ? (J->slot[s-delta] & TREF_FRAME)
: (s == delta + LJ_FR2));
depth++;
} else if ((tr & TREF_CONT)) {
#if LJ_FR2
if (ref)
lua_assert(ir_knum(ir)->u64 == tv->u64);
#else
lua_assert(ir_kptr(ir) == gcrefp(tv->gcr, void));
lua_assert((J->slot[s+1] & TREF_FRAME));
#endif
lua_assert((J->slot[s+1+LJ_FR2] & TREF_FRAME));
depth++;
} else {
if (tvisnumber(tv))
@ -162,10 +180,10 @@ static TRef sload(jit_State *J, int32_t slot)
/* Get TRef for current function. */
static TRef getcurrf(jit_State *J)
{
if (J->base[-1])
return J->base[-1];
lua_assert(J->baseslot == 1);
return sloadt(J, -1, IRT_FUNC, IRSLOAD_READONLY);
if (J->base[-1-LJ_FR2])
return J->base[-1-LJ_FR2];
lua_assert(J->baseslot == 1+LJ_FR2);
return sloadt(J, -1-LJ_FR2, IRT_FUNC, IRSLOAD_READONLY);
}
/* Compare for raw object equality.
@ -509,7 +527,6 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl)
static LoopEvent rec_iterl(jit_State *J, const BCIns iterins)
{
BCReg ra = bc_a(iterins);
lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
if (!tref_isnil(getslot(J, ra))) { /* Looping back? */
J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */
J->maxslot = ra-1+bc_b(J->pc[-1]);
@ -680,20 +697,27 @@ static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs)
TValue *functv = &J->L->base[func];
TRef *fbase = &J->base[func];
ptrdiff_t i;
lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
for (i = 0; i <= nargs; i++)
(void)getslot(J, func+i); /* Ensure func and all args have a reference. */
(void)getslot(J, func); /* Ensure func has a reference. */
for (i = 1; i <= nargs; i++)
(void)getslot(J, func+LJ_FR2+i); /* Ensure all args have a reference. */
if (!tref_isfunc(fbase[0])) { /* Resolve __call metamethod. */
ix.tab = fbase[0];
copyTV(J->L, &ix.tabv, functv);
if (!lj_record_mm_lookup(J, &ix, MM_call) || !tref_isfunc(ix.mobj))
lj_trace_err(J, LJ_TRERR_NOMM);
for (i = ++nargs; i > 0; i--) /* Shift arguments up. */
fbase[i] = fbase[i-1];
for (i = ++nargs; i > LJ_FR2; i--) /* Shift arguments up. */
fbase[i+LJ_FR2] = fbase[i+LJ_FR2-1];
#if LJ_FR2
fbase[2] = fbase[0];
#endif
fbase[0] = ix.mobj; /* Replace function. */
functv = &ix.mobjv;
}
fbase[0] = TREF_FRAME | rec_call_specialize(J, funcV(functv), fbase[0]);
fbase[0] = rec_call_specialize(J, funcV(functv), fbase[0]);
#if LJ_FR2
fbase[1] = 0;
#endif
fbase[LJ_FR2] |= TREF_FRAME;
J->maxslot = (BCReg)nargs;
}
@ -703,8 +727,8 @@ void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs)
rec_call_setup(J, func, nargs);
/* Bump frame. */
J->framedepth++;
J->base += func+1;
J->baseslot += func+1;
J->base += func+1+LJ_FR2;
J->baseslot += func+1+LJ_FR2;
}
/* Record tail call. */
@ -720,7 +744,11 @@ void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs)
func += cbase;
}
/* Move func + args down. */
memmove(&J->base[-1], &J->base[func], sizeof(TRef)*(J->maxslot+1));
#if LJ_FR2
if (J->baseslot == 2)
J->base[func+1] = 0;
#endif
memmove(&J->base[-1-LJ_FR2], &J->base[func], sizeof(TRef)*(J->maxslot+1+LJ_FR2));
/* Note: the new TREF_FRAME is now at J->base[-1] (even for slot #0). */
/* Tailcalls can form a loop, so count towards the loop unroll limit. */
if (++J->tailcalled > J->loopunroll)
@ -763,7 +791,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
BCReg cbase = (BCReg)frame_delta(frame);
if (--J->framedepth < 0)
lj_trace_err(J, LJ_TRERR_NYIRETL);
lua_assert(J->baseslot > 1);
lua_assert(J->baseslot > 1+LJ_FR2);
gotresults++;
rbase += cbase;
J->baseslot -= (BCReg)cbase;
@ -787,7 +815,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
BCReg cbase = (BCReg)frame_delta(frame);
if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */
lj_trace_err(J, LJ_TRERR_NYIRETL);
lua_assert(J->baseslot > 1);
lua_assert(J->baseslot > 1+LJ_FR2);
rbase += cbase;
J->baseslot -= (BCReg)cbase;
J->base -= cbase;
@ -797,8 +825,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
BCIns callins = *(frame_pc(frame)-1);
ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults;
BCReg cbase = bc_a(callins);
GCproto *pt = funcproto(frame_func(frame - (cbase+1-LJ_FR2)));
lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame teardown. */
GCproto *pt = funcproto(frame_func(frame - (cbase+1+LJ_FR2)));
if ((pt->flags & PROTO_NOJIT))
lj_trace_err(J, LJ_TRERR_CJITOFF);
if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) {
@ -811,13 +838,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
lj_snap_add(J);
}
for (i = 0; i < nresults; i++) /* Adjust results. */
J->base[i-1] = i < gotresults ? J->base[rbase+i] : TREF_NIL;
J->base[i-1-LJ_FR2] = i < gotresults ? J->base[rbase+i] : TREF_NIL;
J->maxslot = cbase+(BCReg)nresults;
if (J->framedepth > 0) { /* Return to a frame that is part of the trace. */
J->framedepth--;
lua_assert(J->baseslot > cbase+1);
J->baseslot -= cbase+1;
J->base -= cbase+1;
lua_assert(J->baseslot > cbase+1+LJ_FR2);
J->baseslot -= cbase+1+LJ_FR2;
J->base -= cbase+1+LJ_FR2;
} else if (J->parent == 0 && J->exitno == 0 &&
!bc_isret(bc_op(J->cur.startins))) {
/* Return to lower frame would leave the loop in a root trace. */
@ -827,13 +854,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
} else { /* Return to lower frame. Guard for the target we return to. */
TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO);
TRef trpc = lj_ir_kptr(J, (void *)frame_pc(frame));
emitir(IRTG(IR_RETF, IRT_P32), trpt, trpc);
emitir(IRTG(IR_RETF, IRT_PGC), trpt, trpc);
J->retdepth++;
J->needsnap = 1;
lua_assert(J->baseslot == 1);
lua_assert(J->baseslot == 1+LJ_FR2);
/* Shift result slots up and clear the slots of the new frame below. */
memmove(J->base + cbase, J->base-1, sizeof(TRef)*nresults);
memset(J->base-1, 0, sizeof(TRef)*(cbase+1));
memmove(J->base + cbase, J->base-1-LJ_FR2, sizeof(TRef)*nresults);
memset(J->base-1-LJ_FR2, 0, sizeof(TRef)*(cbase+1+LJ_FR2));
}
} else if (frame_iscont(frame)) { /* Return to continuation frame. */
ASMFunction cont = frame_contf(frame);
@ -842,32 +869,39 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
lj_trace_err(J, LJ_TRERR_NYIRETL);
J->baseslot -= (BCReg)cbase;
J->base -= cbase;
J->maxslot = cbase-2;
J->maxslot = cbase-(2<<LJ_FR2);
if (cont == lj_cont_ra) {
/* Copy result to destination slot. */
BCReg dst = bc_a(*(frame_contpc(frame)-1));
J->base[dst] = gotresults ? J->base[cbase+rbase] : TREF_NIL;
if (dst >= J->maxslot) J->maxslot = dst+1;
if (dst >= J->maxslot) {
J->maxslot = dst+1;
}
} else if (cont == lj_cont_nop) {
/* Nothing to do here. */
} else if (cont == lj_cont_cat) {
BCReg bslot = bc_b(*(frame_contpc(frame)-1));
TRef tr = gotresults ? J->base[cbase+rbase] : TREF_NIL;
if (bslot != cbase-2) { /* Concatenate the remainder. */
if (bslot != J->maxslot) { /* Concatenate the remainder. */
TValue *b = J->L->base, save; /* Simulate lower frame and result. */
J->base[cbase-2] = tr;
copyTV(J->L, &save, b-2);
if (gotresults) copyTV(J->L, b-2, b+rbase); else setnilV(b-2);
J->base[J->maxslot] = tr;
copyTV(J->L, &save, b-(2<<LJ_FR2));
if (gotresults)
copyTV(J->L, b-(2<<LJ_FR2), b+rbase);
else
setnilV(b-(2<<LJ_FR2));
J->L->base = b - cbase;
tr = rec_cat(J, bslot, cbase-2);
tr = rec_cat(J, bslot, cbase-(2<<LJ_FR2));
b = J->L->base + cbase; /* Undo. */
J->L->base = b;
copyTV(J->L, b-2, &save);
copyTV(J->L, b-(2<<LJ_FR2), &save);
}
if (tr) { /* Store final result. */
BCReg dst = bc_a(*(frame_contpc(frame)-1));
J->base[dst] = tr;
if (dst >= J->maxslot) J->maxslot = dst+1;
if (dst >= J->maxslot) {
J->maxslot = dst+1;
}
} /* Otherwise continue with another __concat call. */
} else {
/* Result type already specialized. */
@ -876,7 +910,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
} else {
lj_trace_err(J, LJ_TRERR_NYIRETL); /* NYI: handle return to C frame. */
}
lua_assert(J->baseslot >= 1);
lua_assert(J->baseslot >= 1+LJ_FR2);
}
/* -- Metamethod handling ------------------------------------------------- */
@ -885,11 +919,16 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
static BCReg rec_mm_prep(jit_State *J, ASMFunction cont)
{
BCReg s, top = cont == lj_cont_cat ? J->maxslot : curr_proto(J->L)->framesize;
#if LJ_FR2
J->base[top] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont)));
J->base[top+1] = TREF_CONT;
#else
J->base[top] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT;
#endif
J->framedepth++;
for (s = J->maxslot; s < top; s++)
J->base[s] = 0; /* Clear frame gap to avoid resurrecting previous refs. */
return top+1;
return top+1+LJ_FR2;
}
/* Record metamethod lookup. */
@ -967,9 +1006,9 @@ static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm)
BCReg func = rec_mm_prep(J, mm == MM_concat ? lj_cont_cat : lj_cont_ra);
TRef *base = J->base + func;
TValue *basev = J->L->base + func;
base[1] = ix->tab; base[2] = ix->key;
copyTV(J->L, basev+1, &ix->tabv);
copyTV(J->L, basev+2, &ix->keyv);
base[1+LJ_FR2] = ix->tab; base[2+LJ_FR2] = ix->key;
copyTV(J->L, basev+1+LJ_FR2, &ix->tabv);
copyTV(J->L, basev+2+LJ_FR2, &ix->keyv);
if (!lj_record_mm_lookup(J, ix, mm)) { /* Lookup mm on 1st operand. */
if (mm != MM_unm) {
ix->tab = ix->key;
@ -980,8 +1019,10 @@ static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm)
lj_trace_err(J, LJ_TRERR_NOMM);
}
ok:
lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
base[0] = ix->mobj;
#if LJ_FR2
base[1] = 0;
#endif
copyTV(J->L, basev+0, &ix->mobjv);
lj_record_call(J, func, 2);
return 0; /* No result yet. */
@ -997,8 +1038,9 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv)
BCReg func = rec_mm_prep(J, lj_cont_ra);
TRef *base = J->base + func;
TValue *basev = J->L->base + func;
lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
base[0] = ix.mobj; copyTV(J->L, basev+0, &ix.mobjv);
base += LJ_FR2;
basev += LJ_FR2;
base[1] = tr; copyTV(J->L, basev+1, tv);
#if LJ_52
base[2] = tr; copyTV(J->L, basev+2, tv);
@ -1018,11 +1060,10 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv)
static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op)
{
BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt);
TRef *base = J->base + func;
TValue *tv = J->L->base + func;
lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
base[0] = ix->mobj; base[1] = ix->val; base[2] = ix->key;
copyTV(J->L, tv+0, &ix->mobjv);
TRef *base = J->base + func + LJ_FR2;
TValue *tv = J->L->base + func + LJ_FR2;
base[-LJ_FR2] = ix->mobj; base[1] = ix->val; base[2] = ix->key;
copyTV(J->L, tv-LJ_FR2, &ix->mobjv);
copyTV(J->L, tv+1, &ix->valv);
copyTV(J->L, tv+2, &ix->keyv);
lj_record_call(J, func, 2);
@ -1339,11 +1380,10 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
handlemm:
if (tref_isfunc(ix->mobj)) { /* Handle metamethod call. */
BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra);
TRef *base = J->base + func;
TValue *tv = J->L->base + func;
lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
base[0] = ix->mobj; base[1] = ix->tab; base[2] = ix->key;
setfuncV(J->L, tv+0, funcV(&ix->mobjv));
TRef *base = J->base + func + LJ_FR2;
TValue *tv = J->L->base + func + LJ_FR2;
base[-LJ_FR2] = ix->mobj; base[1] = ix->tab; base[2] = ix->key;
setfuncV(J->L, tv-LJ_FR2, funcV(&ix->mobjv));
copyTV(J->L, tv+1, &ix->tabv);
copyTV(J->L, tv+2, &ix->keyv);
if (ix->val) {
@ -1533,7 +1573,11 @@ static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val)
goto noconstify;
kfunc = lj_ir_kfunc(J, J->fn);
emitir(IRTG(IR_EQ, IRT_FUNC), fn, kfunc);
#if LJ_FR2
J->base[-2] = kfunc;
#else
J->base[-1] = TREF_FRAME | kfunc;
#endif
fn = kfunc;
}
tr = lj_record_constify(J, uvval(uvp));
@ -1638,11 +1682,14 @@ static void rec_func_setup(jit_State *J)
static void rec_func_vararg(jit_State *J)
{
GCproto *pt = J->pt;
BCReg s, fixargs, vframe = J->maxslot+1;
BCReg s, fixargs, vframe = J->maxslot+1+LJ_FR2;
lua_assert((pt->flags & PROTO_VARARG));
if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS)
lj_trace_err(J, LJ_TRERR_STACKOV);
J->base[vframe-1] = J->base[-1]; /* Copy function up. */
J->base[vframe-1-LJ_FR2] = J->base[-1-LJ_FR2]; /* Copy function up. */
#if LJ_FR2
J->base[vframe-1] = TREF_FRAME;
#endif
/* Copy fixarg slots up and set their original slots to nil. */
fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot;
for (s = 0; s < fixargs; s++) {
@ -1704,7 +1751,7 @@ static int select_detect(jit_State *J)
static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
{
int32_t numparams = J->pt->numparams;
ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1;
ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1 - LJ_FR2;
lua_assert(frame_isvarg(J->L->base-1));
if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */
ptrdiff_t i;
@ -1716,10 +1763,10 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
J->maxslot = dst + (BCReg)nresults;
}
for (i = 0; i < nresults; i++)
J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1) : TREF_NIL;
J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1 - LJ_FR2) : TREF_NIL;
} else { /* Unknown number of varargs passed to trace. */
TRef fr = emitir(IRTI(IR_SLOAD), 0, IRSLOAD_READONLY|IRSLOAD_FRAME);
int32_t frofs = 8*(1+numparams)+FRAME_VARG;
TRef fr = emitir(IRTI(IR_SLOAD), LJ_FR2, IRSLOAD_READONLY|IRSLOAD_FRAME);
int32_t frofs = 8*(1+LJ_FR2+numparams)+FRAME_VARG;
if (nresults >= 0) { /* Known fixed number of results. */
ptrdiff_t i;
if (nvararg > 0) {
@ -1733,7 +1780,7 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr);
vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, lj_ir_kint(J, frofs-8));
for (i = 0; i < nload; i++) {
IRType t = itype2irt(&J->L->base[i-1-nvararg]);
IRType t = itype2irt(&J->L->base[i-1-LJ_FR2-nvararg]);
TRef aref = emitir(IRT(IR_AREF, IRT_PGC),
vbase, lj_ir_kint(J, (int32_t)i));
TRef tr = emitir(IRTG(IR_VLOAD, t), aref, 0);
@ -1781,14 +1828,15 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
if (idx != 0 && idx <= nvararg) {
IRType t;
TRef aref, vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr);
vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, lj_ir_kint(J, frofs-8));
t = itype2irt(&J->L->base[idx-2-nvararg]);
vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase,
lj_ir_kint(J, frofs-(8<<LJ_FR2)));
t = itype2irt(&J->L->base[idx-2-LJ_FR2-nvararg]);
aref = emitir(IRT(IR_AREF, IRT_PGC), vbase, tridx);
tr = emitir(IRTG(IR_VLOAD, t), aref, 0);
if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */
}
J->base[dst-2] = tr;
J->maxslot = dst-1;
J->base[dst-2-LJ_FR2] = tr;
J->maxslot = dst-1-LJ_FR2;
J->bcskip = 2; /* Skip CALLM + select. */
} else {
nyivarg:
@ -1881,7 +1929,15 @@ static void rec_comp_fixup(jit_State *J, const BCIns *pc, int cond)
const BCIns *npc = pc + 2 + (cond ? bc_j(jmpins) : 0);
SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
/* Set PC to opposite target to avoid re-recording the comp. in side trace. */
#if LJ_FR2
SnapEntry *flink = &J->cur.snapmap[snap->mapofs + snap->nent];
uint64_t pcbase;
memcpy(&pcbase, flink, sizeof(uint64_t));
pcbase = (pcbase & 0xff) | (u64ptr(npc) << 8);
memcpy(flink, &pcbase, sizeof(uint64_t));
#else
J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc);
#endif
J->needsnap = 1;
if (bc_a(jmpins) < J->maxslot) J->maxslot = bc_a(jmpins);
lj_snap_shrink(J); /* Shrink last snapshot if possible. */
@ -2179,7 +2235,13 @@ void lj_record_ins(jit_State *J)
case BC_MOV:
/* Clear gap of method call to avoid resurrecting previous refs. */
if (ra > J->maxslot) J->base[ra-1] = 0;
if (ra > J->maxslot) {
#if LJ_FR2
memset(J->base + J->maxslot, 0, (ra - J->maxslot) * sizeof(TRef));
#else
J->base[ra-1] = 0;
#endif
}
break;
case BC_KSTR: case BC_KNUM: case BC_KPRI:
break;
@ -2248,14 +2310,14 @@ void lj_record_ins(jit_State *J)
/* -- Calls and vararg handling ----------------------------------------- */
case BC_ITERC:
J->base[ra] = getslot(J, ra-3-LJ_FR2);
J->base[ra+1] = getslot(J, ra-2-LJ_FR2);
J->base[ra+2] = getslot(J, ra-1-LJ_FR2);
J->base[ra] = getslot(J, ra-3);
J->base[ra+1+LJ_FR2] = getslot(J, ra-2);
J->base[ra+2+LJ_FR2] = getslot(J, ra-1);
{ /* Do the actual copy now because lj_record_call needs the values. */
TValue *b = &J->L->base[ra];
copyTV(J->L, b, b-3-LJ_FR2);
copyTV(J->L, b+1, b-2-LJ_FR2);
copyTV(J->L, b+2, b-1-LJ_FR2);
copyTV(J->L, b, b-3);
copyTV(J->L, b+1+LJ_FR2, b-2);
copyTV(J->L, b+2+LJ_FR2, b-1);
}
lj_record_call(J, ra, (ptrdiff_t)rc-1);
break;
@ -2378,7 +2440,12 @@ void lj_record_ins(jit_State *J)
/* rc == 0 if we have no result yet, e.g. pending __index metamethod call. */
if (bcmode_a(op) == BCMdst && rc) {
J->base[ra] = rc;
if (ra >= J->maxslot) J->maxslot = ra+1;
if (ra >= J->maxslot) {
#if LJ_FR2
if (ra > J->maxslot) J->base[ra-1] = 0;
#endif
J->maxslot = ra+1;
}
}
#undef rav
@ -2463,7 +2530,7 @@ void lj_record_setup(jit_State *J)
J->scev.idx = REF_NIL;
setmref(J->scev.pc, NULL);
J->baseslot = 1; /* Invoking function is at base[-1]. */
J->baseslot = 1+LJ_FR2; /* Invoking function is at base[-1-LJ_FR2]. */
J->base = J->slot + J->baseslot;
J->maxslot = 0;
J->framedepth = 0;

View File

@ -68,10 +68,18 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
for (s = 0; s < nslots; s++) {
TRef tr = J->slot[s];
IRRef ref = tref_ref(tr);
#if LJ_FR2
if (s == 1) continue;
if ((tr & (TREF_FRAME | TREF_CONT)) && !ref) {
TValue *base = J->L->base - J->baseslot;
tr = J->slot[s] = (tr & 0xff0000) | lj_ir_k64(J, IR_KNUM, base[s].u64);
ref = tref_ref(tr);
}
#endif
if (ref) {
SnapEntry sn = SNAP_TR(s, tr);
IRIns *ir = &J->cur.ir[ref];
if (!(sn & (SNAP_CONT|SNAP_FRAME)) &&
if ((LJ_FR2 || !(sn & (SNAP_CONT|SNAP_FRAME))) &&
ir->o == IR_SLOAD && ir->op1 == s && ref > retf) {
/* No need to snapshot unmodified non-inherited slots. */
if (!(ir->op2 & IRSLOAD_INHERIT))
@ -90,34 +98,51 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
}
/* Add frame links at the end of the snapshot. */
static BCReg snapshot_framelinks(jit_State *J, SnapEntry *map)
static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot)
{
cTValue *frame = J->L->base - 1;
cTValue *lim = J->L->base - J->baseslot;
cTValue *lim = J->L->base - J->baseslot + LJ_FR2;
GCfunc *fn = frame_func(frame);
cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top;
#if LJ_FR2
uint64_t pcbase = (u64ptr(J->pc) << 8) | (J->baseslot - 2);
lua_assert(2 <= J->baseslot && J->baseslot <= 257);
memcpy(map, &pcbase, sizeof(uint64_t));
#else
MSize f = 0;
lua_assert(!LJ_FR2); /* TODO_FR2: store 64 bit PCs. */
map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */
#endif
while (frame > lim) { /* Backwards traversal of all frames above base. */
if (frame_islua(frame)) {
#if !LJ_FR2
map[f++] = SNAP_MKPC(frame_pc(frame));
#endif
frame = frame_prevl(frame);
} else if (frame_iscont(frame)) {
#if !LJ_FR2
map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
map[f++] = SNAP_MKPC(frame_contpc(frame));
#endif
frame = frame_prevd(frame);
} else {
lua_assert(!frame_isc(frame));
#if !LJ_FR2
map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
#endif
frame = frame_prevd(frame);
continue;
}
if (frame + funcproto(frame_func(frame))->framesize > ftop)
ftop = frame + funcproto(frame_func(frame))->framesize;
}
*topslot = (uint8_t)(ftop - lim);
#if LJ_FR2
lua_assert(sizeof(SnapEntry) * 2 == sizeof(uint64_t));
return 2;
#else
lua_assert(f == (MSize)(1 + J->framedepth));
return (BCReg)(ftop - lim);
return f;
#endif
}
/* Take a snapshot of the current stack. */
@ -127,16 +152,16 @@ static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap)
MSize nent;
SnapEntry *p;
/* Conservative estimate. */
lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth+1);
lj_snap_grow_map(J, nsnapmap + nslots + (MSize)(LJ_FR2?2:J->framedepth+1));
p = &J->cur.snapmap[nsnapmap];
nent = snapshot_slots(J, p, nslots);
snap->topslot = (uint8_t)snapshot_framelinks(J, p + nent);
snap->nent = (uint8_t)nent;
nent += snapshot_framelinks(J, p + nent, &snap->topslot);
snap->mapofs = (uint16_t)nsnapmap;
snap->ref = (IRRef1)J->cur.nins;
snap->nent = (uint8_t)nent;
snap->nslots = (uint8_t)nslots;
snap->count = 0;
J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + 1 + J->framedepth);
J->cur.nsnapmap = (uint16_t)(nsnapmap + nent);
}
/* Add or merge a snapshot. */
@ -284,8 +309,15 @@ void lj_snap_shrink(jit_State *J)
MSize n, m, nlim, nent = snap->nent;
uint8_t udf[SNAP_USEDEF_SLOTS];
BCReg maxslot = J->maxslot;
BCReg minslot = snap_usedef(J, udf, snap_pc(map[nent]), maxslot);
BCReg baseslot = J->baseslot;
#if LJ_FR2
BCReg minslot;
uint64_t pcbase;
memcpy(&pcbase, &map[nent], sizeof(uint64_t));
minslot = snap_usedef(J, udf, (const BCIns *)(pcbase >> 8), maxslot);
#else
BCReg minslot = snap_usedef(J, udf, snap_pc(map[nent]), maxslot);
#endif
maxslot += baseslot;
minslot += baseslot;
snap->nslots = (uint8_t)maxslot;
@ -794,12 +826,19 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
SnapShot *snap = &T->snap[snapno];
MSize n, nent = snap->nent;
SnapEntry *map = &T->snapmap[snap->mapofs];
SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1];
ptrdiff_t ftsz0;
SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1-LJ_FR2];
TValue *frame;
BloomFilter rfilt = snap_renamefilter(T, snapno);
const BCIns *pc = snap_pc(map[nent]);
lua_State *L = J->L;
#if LJ_FR2
const BCIns *pc;
uint64_t pcbase;
memcpy(&pcbase, flinks, sizeof(uint64_t));
pc = (const BCIns *)(pcbase >> 8);
#else
ptrdiff_t ftsz0;
const BCIns *pc = snap_pc(map[nent]);
#endif
/* Set interpreter PC to the next PC to get correct error messages. */
setcframe_pc(cframe_raw(L->cframe), pc+1);
@ -811,8 +850,10 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
}
/* Fill stack slots with data from the registers and spill slots. */
frame = L->base-1;
frame = L->base-1-LJ_FR2;
#if !LJ_FR2
ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */
#endif
for (n = 0; n < nent; n++) {
SnapEntry sn = map[n];
if (!(sn & SNAP_NORESTORE)) {
@ -835,14 +876,18 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
TValue tmp;
snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp);
o->u32.hi = tmp.u32.lo;
#if !LJ_FR2
} else if ((sn & (SNAP_CONT|SNAP_FRAME))) {
lua_assert(!LJ_FR2); /* TODO_FR2: store 64 bit PCs. */
/* Overwrite tag with frame link. */
setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0);
L->base = o+1;
#endif
}
}
}
#if LJ_FR2
L->base += (pcbase & 0xff);
#endif
lua_assert(map + nent == flinks);
/* Compute current stack top. */