Add shadow frame link stack for trace recorder.

Simplifies snapshots. Prerequisite for pre-call snapshots.
Increases consistency for fast function calls, too.
This commit is contained in:
Mike Pall 2010-02-04 03:08:29 +01:00
parent 78f5e2ffd3
commit 7256690364
6 changed files with 82 additions and 56 deletions

View File

@ -3042,7 +3042,7 @@ static void asm_tail_sync(ASMState *as)
SnapShot *snap = &as->T->snap[as->T->nsnap-1]; /* Last snapshot. */
MSize n, nent = snap->nent;
SnapEntry *map = &as->T->snapmap[snap->mapofs];
SnapEntry *flinks = map + nent + snap->nframelinks;
SnapEntry *flinks = map + nent + 1;
BCReg newbase = 0;
BCReg nslots, topslot = 0;
@ -3116,11 +3116,11 @@ static void asm_tail_sync(ASMState *as)
if (!(sn & (SNAP_CONT|SNAP_FRAME)))
emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t));
else if (s != 0) /* Do not overwrite link to previous frame. */
emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*--flinks));
emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks++));
}
checkmclim(as);
}
lua_assert(map + nent == flinks-1);
lua_assert(map + nent + 1 + snap->depth == flinks);
}
/* Fixup the tail code. */

View File

@ -66,6 +66,7 @@ typedef unsigned __int32 uintptr_t;
/* JIT compiler limits. */
#define LJ_MAX_JSLOTS 250 /* Max. # of stack slots for a trace. */
#define LJ_MAX_JFRAME 20 /* Max. # of frames for a trace. */
#define LJ_MAX_PHI 32 /* Max. # of PHIs for a loop. */
#define LJ_MAX_EXITSTUBGR 8 /* Max. # of exit stub groups. */

View File

@ -114,7 +114,7 @@ typedef struct SnapShot {
IRRef1 ref; /* First IR ref for this snapshot. */
uint8_t nslots; /* Number of valid slots. */
uint8_t nent; /* Number of compressed entries. */
uint8_t nframelinks; /* Number of frame links. */
uint8_t depth; /* Number of frame links. */
uint8_t count; /* Count of taken exits for this snapshot. */
} SnapShot;
@ -252,6 +252,7 @@ typedef struct jit_State {
IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */
TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */
SnapEntry frame[LJ_MAX_JFRAME+2]; /* Frame link stack. */
int32_t param[JIT_P__MAX]; /* JIT engine parameters. */

View File

@ -167,7 +167,7 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap,
SnapEntry *loopmap, IRRef1 *subst)
{
SnapEntry *nmap, *omap = &J->cur.snapmap[osnap->mapofs];
MSize nmapofs, nframelinks;
MSize nmapofs, depth;
MSize on, ln, nn, onent = osnap->nent;
BCReg nslots = osnap->nslots;
SnapShot *snap = &J->cur.snap[J->cur.nsnap];
@ -179,11 +179,11 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap,
nmapofs = snap->mapofs;
}
J->guardemit.irt = 0;
nframelinks = osnap->nframelinks;
depth = osnap->depth;
/* Setup new snapshot. */
snap->mapofs = (uint16_t)nmapofs;
snap->ref = (IRRef1)J->cur.nins;
snap->nframelinks = (uint8_t)nframelinks;
snap->depth = (uint8_t)depth;
snap->nslots = nslots;
snap->count = 0;
nmap = &J->cur.snapmap[nmapofs];
@ -205,10 +205,10 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap,
while (snap_slot(loopmap[ln]) < nslots) /* Copy remaining loop slots. */
nmap[nn++] = loopmap[ln++];
snap->nent = (uint8_t)nn;
J->cur.nsnapmap = (uint16_t)(nmapofs + nn + nframelinks);
J->cur.nsnapmap = (uint16_t)(nmapofs + nn + 1 + depth);
omap += onent;
nmap += nn;
for (nn = 0; nn < nframelinks; nn++) /* Copy frame links. */
for (nn = 0; nn <= depth; nn++) /* Copy PC + frame links. */
nmap[nn] = omap[nn];
}
@ -314,7 +314,7 @@ static void loop_undo(jit_State *J, IRRef ins, MSize nsnap)
SnapShot *snap = &J->cur.snap[nsnap-1];
SnapEntry *map = J->cur.snapmap;
map[snap->mapofs + snap->nent] = map[J->cur.snap[0].nent]; /* Restore PC. */
J->cur.nsnapmap = (uint16_t)(snap->mapofs + snap->nent + snap->nframelinks);
J->cur.nsnapmap = (uint16_t)(snap->mapofs + snap->nent + 1 + snap->depth);
J->cur.nsnap = nsnap;
J->guardemit.irt = 0;
lj_ir_rollback(J, ins);

View File

@ -101,20 +101,45 @@ static void rec_check_ir(jit_State *J)
}
}
/* Compare frame stack of the recorder and the VM. */
static void rec_check_frames(jit_State *J)
{
cTValue *frame = J->L->base - 1;
cTValue *lim = J->L->base - J->baseslot;
int32_t depth = J->framedepth;
while (frame > lim) {
depth--;
lua_assert(depth >= 0);
lua_assert((SnapEntry)frame_ftsz(frame) == J->frame[depth]);
if (frame_iscont(frame)) {
depth--;
lua_assert(depth >= 0);
lua_assert((SnapEntry)frame_ftsz(frame-1) == J->frame[depth]);
}
frame = frame_prev(frame);
}
lua_assert(depth == 0);
}
/* Sanity check the slots. */
static void rec_check_slots(jit_State *J)
{
BCReg s, nslots = J->baseslot + J->maxslot;
int32_t depth;
lua_assert(J->baseslot >= 1 && J->baseslot < LJ_MAX_JSLOTS);
lua_assert(nslots < LJ_MAX_JSLOTS);
for (s = 0; s < nslots; s++) {
TRef tr = J->slot[s];
if (s != 0 && (tr & (TREF_CONT|TREF_FRAME)))
depth++;
if (tr) {
IRRef ref = tref_ref(tr);
lua_assert(ref >= J->cur.nk && ref < J->cur.nins);
lua_assert(irt_t(IR(ref)->t) == tref_t(tr));
}
}
lua_assert(J->framedepth == depth);
rec_check_frames(J);
}
#endif
@ -854,6 +879,7 @@ typedef struct RecordFFData {
ptrdiff_t nres; /* Number of returned results (defaults to 1). */
ptrdiff_t cres; /* Wanted number of call results. */
uint32_t data; /* Per-ffid auxiliary data (opcode, literal etc.). */
int metacall; /* True if function was resolved via __call. */
} RecordFFData;
/* Type of handler to record a fast function. */
@ -1020,9 +1046,14 @@ static void recff_tostring(jit_State *J, TRef *res, RecordFFData *rd)
ix.tab = tr;
copyTV(J->L, &ix.tabv, &rd->argv[0]);
if (rec_mm_lookup(J, &ix, MM_tostring)) { /* Has __tostring metamethod? */
if (rd->metacall) /* Must not use kludge. */
recff_err_nyi(J, rd);
res[0] = ix.mobj;
copyTV(J->L, rd->argv - 1, &ix.mobjv);
if (!rec_call(J, (BCReg)(res - J->base), 1, 1)) /* Pending call? */
copyTV(J->L, rd->argv - 1, &ix.mobjv); /* Kludge. */
J->framedepth--;
if (rec_call(J, (BCReg)(res - J->base), 1, 1))
J->framedepth++;
else
rd->cres = CALLRES_PENDING;
/* Otherwise res[0] already contains the result. */
} else if (tref_isnumber(tr)) {
@ -1067,6 +1098,8 @@ static void recff_pcall(jit_State *J, TRef *res, RecordFFData *rd)
{
if (rd->nargs >= 1) {
BCReg parg = (BCReg)(arg - J->base);
J->pc = (const BCIns *)(sizeof(TValue) - 4 +
(hook_active(J2G(J)) ? FRAME_PCALLH : FRAME_PCALL));
if (rec_call(J, parg, CALLRES_MULTI, rd->nargs - 1)) { /* Resolved call. */
res[0] = TREF_TRUE; /* Prepend true result. No need to move results. */
rd->nres = (ptrdiff_t)J->maxslot - (ptrdiff_t)parg + 1;
@ -1108,6 +1141,8 @@ static void recff_xpcall(jit_State *J, TRef *res, RecordFFData *rd)
copyTV(J->L, &rd->argv[0], &argv1);
copyTV(J->L, &rd->argv[1], &argv0);
oargv = savestack(J->L, rd->argv);
J->pc = (const BCIns *)(2*sizeof(TValue) - 4 +
(hook_active(J2G(J)) ? FRAME_PCALLH : FRAME_PCALL));
/* Need to protect rec_call because the recorder may throw. */
rx.parg = parg;
rx.nargs = rd->nargs - 2;
@ -1549,7 +1584,7 @@ static void rec_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
} else if (frame_iscont(frame)) { /* Return to continuation frame. */
ASMFunction cont = frame_contf(frame);
BCReg cbase = (BCReg)frame_delta(frame);
if (J->framedepth-- <= 0)
if ((J->framedepth -= 2) <= 0)
lj_trace_err(J, LJ_TRERR_NYIRETL);
J->baseslot -= (BCReg)cbase;
J->base -= cbase;
@ -1602,6 +1637,7 @@ static int rec_call(jit_State *J, BCReg func, ptrdiff_t cres, ptrdiff_t nargs)
if (tref_isfunc(res[0])) { /* Regular function call. */
rd.fn = funcV(tv);
rd.argv = tv+1;
rd.metacall = 0;
} else { /* Otherwise resolve __call metamethod for called object. */
RecordIndex ix;
ptrdiff_t i;
@ -1615,13 +1651,21 @@ static int rec_call(jit_State *J, BCReg func, ptrdiff_t cres, ptrdiff_t nargs)
res[0] = ix.mobj;
rd.fn = funcV(&ix.mobjv);
rd.argv = tv; /* The called object is the 1st arg. */
rd.metacall = 1;
}
/* Specialize to the runtime value of the called function. */
trfunc = lj_ir_kfunc(J, rd.fn);
emitir(IRTG(IR_EQ, IRT_FUNC), res[0], trfunc);
res[0] = trfunc | TREF_FRAME;
J->framedepth++;
/* Add frame links. */
J->frame[J->framedepth++] = SNAP_MKPC(J->pc+1);
if (cres == CALLRES_CONT) /* Continuations need an extra frame stack slot. */
J->frame[J->framedepth++] = SNAP_MKFTSZ((func+1)*sizeof(TValue)+FRAME_CONT);
/* NYI: func is wrong if any fast function ever sets up a continuation. */
if (J->framedepth > LJ_MAX_JFRAME)
lj_trace_err(J, LJ_TRERR_STACKOV);
if (isluafunc(rd.fn)) { /* Record call to Lua function. */
GCproto *pt = funcproto(rd.fn);
@ -1659,6 +1703,7 @@ static int rec_call(jit_State *J, BCReg func, ptrdiff_t cres, ptrdiff_t nargs)
return 0; /* No result yet. */
} else { /* Record call to C function or fast function. */
uint32_t m = 0;
BCReg oldmaxslot = J->maxslot;
res[1+nargs] = 0;
rd.nargs = nargs;
if (rd.fn->c.ffid < sizeof(recff_idmap)/sizeof(recff_idmap[0]))
@ -1682,10 +1727,12 @@ static int rec_call(jit_State *J, BCReg func, ptrdiff_t cres, ptrdiff_t nargs)
rec_ret(J, func, rd.nres);
} else if (cres == CALLRES_CONT) {
/* Note: immediately resolved continuations must not change J->maxslot. */
J->maxslot = oldmaxslot;
J->framedepth--;
res[rd.nres] = TREF_NIL; /* Turn 0 results into nil result. */
} else {
J->framedepth++;
lua_assert(cres == CALLRES_PENDING);
J->framedepth++;
return 0; /* Pending call, no result yet. */
}
return 1; /* Result resolved immediately. */
@ -2213,13 +2260,13 @@ static void rec_setup_side(jit_State *J, Trace *T)
}
setslot:
J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME)); /* Same as TREF_* flags. */
if ((sn & SNAP_FRAME) && s != 0) {
if ((sn & SNAP_FRAME) && s != 0)
J->baseslot = s+1;
J->framedepth++;
}
}
J->base = J->slot + J->baseslot;
J->maxslot = snap->nslots - J->baseslot;
J->framedepth = snap->depth; /* Copy frames from snapshot. */
memcpy(J->frame, &map[nent+1], sizeof(SnapEntry)*(size_t)snap->depth);
lj_snap_add(J);
}

View File

@ -68,49 +68,26 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
return n;
}
/* Add frame links at the end of the snapshot. */
static MSize snapshot_framelinks(jit_State *J, SnapEntry *map)
{
cTValue *frame = J->L->base - 1;
cTValue *lim = J->L->base - J->baseslot;
MSize f = 0;
map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */
while (frame > lim) { /* Backwards traversal of all frames above base. */
if (frame_islua(frame)) {
map[f++] = SNAP_MKPC(frame_pc(frame));
frame = frame_prevl(frame);
} else if (frame_ispcall(frame)) {
map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
frame = frame_prevd(frame);
} else if (frame_iscont(frame)) {
map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
map[f++] = SNAP_MKPC(frame_contpc(frame));
frame = frame_prevd(frame);
} else {
lua_assert(0);
}
}
return f;
}
/* Take a snapshot of the current stack. */
static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap)
{
BCReg nslots = J->baseslot + J->maxslot;
MSize nent, nframelinks;
MSize nent;
SnapEntry *p;
/* Conservative estimate. Continuation frames need 2 slots. */
lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth*2+1);
/* Conservative estimate. */
lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth+1);
p = &J->cur.snapmap[nsnapmap];
nent = snapshot_slots(J, p, nslots);
nframelinks = snapshot_framelinks(J, p + nent);
J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + nframelinks);
snap->mapofs = (uint16_t)nsnapmap;
snap->ref = (IRRef1)J->cur.nins;
snap->nent = (uint8_t)nent;
snap->nframelinks = (uint8_t)nframelinks;
snap->depth = (uint8_t)J->framedepth;
snap->nslots = (uint8_t)nslots;
snap->count = 0;
J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + 1 + J->framedepth);
/* Add frame links at the end of the snapshot. */
p[nent] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */
memcpy(&p[nent+1], J->frame, sizeof(SnapEntry)*(size_t)J->framedepth);
}
/* Add or merge a snapshot. */
@ -141,14 +118,14 @@ void lj_snap_shrink(jit_State *J)
lua_assert(nslots < snap->nslots);
snap->nslots = (uint8_t)nslots;
if (nent > 0 && snap_slot(map[nent-1]) >= nslots) {
MSize s, delta, nframelinks = snap->nframelinks;
MSize s, delta, depth = snap->depth;
for (nent--; nent > 0 && snap_slot(map[nent-1]) >= nslots; nent--)
;
delta = snap->nent - nent;
snap->nent = (uint8_t)nent;
J->cur.nsnapmap = (uint16_t)(snap->mapofs + nent + nframelinks);
J->cur.nsnapmap = (uint16_t)(snap->mapofs + nent + 1 + depth);
map += nent;
for (s = 0; s < nframelinks; s++) /* Move frame links down. */
for (s = 0; s <= depth; s++) /* Move PC + frame links down. */
map[s] = map[s+delta];
}
}
@ -210,7 +187,7 @@ void lj_snap_restore(jit_State *J, void *exptr)
SnapShot *snap = &T->snap[snapno];
MSize n, nent = snap->nent;
SnapEntry *map = &T->snapmap[snap->mapofs];
SnapEntry *flinks = map + nent + snap->nframelinks;
SnapEntry *flinks = map + nent;
int32_t ftsz0;
BCReg nslots = snap->nslots;
TValue *frame;
@ -224,6 +201,7 @@ void lj_snap_restore(jit_State *J, void *exptr)
}
/* Fill stack slots with data from the registers and spill slots. */
J->pc = snap_pc(*flinks++);
frame = L->base-1;
ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */
for (n = 0; n < nent; n++) {
@ -236,7 +214,7 @@ void lj_snap_restore(jit_State *J, void *exptr)
lj_ir_kvalue(L, o, ir);
if ((sn & (SNAP_CONT|SNAP_FRAME))) {
/* Overwrite tag with frame link. */
o->fr.tp.ftsz = s != 0 ? (int32_t)*--flinks : ftsz0;
o->fr.tp.ftsz = s != 0 ? (int32_t)*flinks++ : ftsz0;
if ((sn & SNAP_FRAME)) {
GCfunc *fn = ir_kfunc(ir);
if (isluafunc(fn)) {
@ -291,8 +269,7 @@ void lj_snap_restore(jit_State *J, void *exptr)
}
}
L->top = curr_topL(L);
J->pc = snap_pc(*--flinks);
lua_assert(map + nent == flinks);
lua_assert(map + nent + 1 + snap->depth == flinks);
}
#undef IR