diff --git a/src/lj_asm.c b/src/lj_asm.c index 38bbb1c2..5e7ff7f5 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -3042,7 +3042,7 @@ static void asm_tail_sync(ASMState *as) SnapShot *snap = &as->T->snap[as->T->nsnap-1]; /* Last snapshot. */ MSize n, nent = snap->nent; SnapEntry *map = &as->T->snapmap[snap->mapofs]; - SnapEntry *flinks = map + nent + snap->nframelinks; + SnapEntry *flinks = map + nent + 1; BCReg newbase = 0; BCReg nslots, topslot = 0; @@ -3116,11 +3116,11 @@ static void asm_tail_sync(ASMState *as) if (!(sn & (SNAP_CONT|SNAP_FRAME))) emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); else if (s != 0) /* Do not overwrite link to previous frame. */ - emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*--flinks)); + emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks++)); } checkmclim(as); } - lua_assert(map + nent == flinks-1); + lua_assert(map + nent + 1 + snap->depth == flinks); } /* Fixup the tail code. */ diff --git a/src/lj_def.h b/src/lj_def.h index 872a7830..64b08f7b 100644 --- a/src/lj_def.h +++ b/src/lj_def.h @@ -66,6 +66,7 @@ typedef unsigned __int32 uintptr_t; /* JIT compiler limits. */ #define LJ_MAX_JSLOTS 250 /* Max. # of stack slots for a trace. */ +#define LJ_MAX_JFRAME 20 /* Max. # of frames for a trace. */ #define LJ_MAX_PHI 32 /* Max. # of PHIs for a loop. */ #define LJ_MAX_EXITSTUBGR 8 /* Max. # of exit stub groups. */ diff --git a/src/lj_jit.h b/src/lj_jit.h index 1e029182..229642a5 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -114,7 +114,7 @@ typedef struct SnapShot { IRRef1 ref; /* First IR ref for this snapshot. */ uint8_t nslots; /* Number of valid slots. */ uint8_t nent; /* Number of compressed entries. */ - uint8_t nframelinks; /* Number of frame links. */ + uint8_t depth; /* Number of frame links. */ uint8_t count; /* Count of taken exits for this snapshot. */ } SnapShot; @@ -252,6 +252,7 @@ typedef struct jit_State { IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */ TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */ + SnapEntry frame[LJ_MAX_JFRAME+2]; /* Frame link stack. */ int32_t param[JIT_P__MAX]; /* JIT engine parameters. */ diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c index e5ad5b43..e0e6990e 100644 --- a/src/lj_opt_loop.c +++ b/src/lj_opt_loop.c @@ -167,7 +167,7 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap, SnapEntry *loopmap, IRRef1 *subst) { SnapEntry *nmap, *omap = &J->cur.snapmap[osnap->mapofs]; - MSize nmapofs, nframelinks; + MSize nmapofs, depth; MSize on, ln, nn, onent = osnap->nent; BCReg nslots = osnap->nslots; SnapShot *snap = &J->cur.snap[J->cur.nsnap]; @@ -179,11 +179,11 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap, nmapofs = snap->mapofs; } J->guardemit.irt = 0; - nframelinks = osnap->nframelinks; + depth = osnap->depth; /* Setup new snapshot. */ snap->mapofs = (uint16_t)nmapofs; snap->ref = (IRRef1)J->cur.nins; - snap->nframelinks = (uint8_t)nframelinks; + snap->depth = (uint8_t)depth; snap->nslots = nslots; snap->count = 0; nmap = &J->cur.snapmap[nmapofs]; @@ -205,10 +205,10 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap, while (snap_slot(loopmap[ln]) < nslots) /* Copy remaining loop slots. */ nmap[nn++] = loopmap[ln++]; snap->nent = (uint8_t)nn; - J->cur.nsnapmap = (uint16_t)(nmapofs + nn + nframelinks); + J->cur.nsnapmap = (uint16_t)(nmapofs + nn + 1 + depth); omap += onent; nmap += nn; - for (nn = 0; nn < nframelinks; nn++) /* Copy frame links. */ + for (nn = 0; nn <= depth; nn++) /* Copy PC + frame links. */ nmap[nn] = omap[nn]; } @@ -314,7 +314,7 @@ static void loop_undo(jit_State *J, IRRef ins, MSize nsnap) SnapShot *snap = &J->cur.snap[nsnap-1]; SnapEntry *map = J->cur.snapmap; map[snap->mapofs + snap->nent] = map[J->cur.snap[0].nent]; /* Restore PC. */ - J->cur.nsnapmap = (uint16_t)(snap->mapofs + snap->nent + snap->nframelinks); + J->cur.nsnapmap = (uint16_t)(snap->mapofs + snap->nent + 1 + snap->depth); J->cur.nsnap = nsnap; J->guardemit.irt = 0; lj_ir_rollback(J, ins); diff --git a/src/lj_record.c b/src/lj_record.c index 824d2fd1..f6d13264 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -101,20 +101,45 @@ static void rec_check_ir(jit_State *J) } } +/* Compare frame stack of the recorder and the VM. */ +static void rec_check_frames(jit_State *J) +{ + cTValue *frame = J->L->base - 1; + cTValue *lim = J->L->base - J->baseslot; + int32_t depth = J->framedepth; + while (frame > lim) { + depth--; + lua_assert(depth >= 0); + lua_assert((SnapEntry)frame_ftsz(frame) == J->frame[depth]); + if (frame_iscont(frame)) { + depth--; + lua_assert(depth >= 0); + lua_assert((SnapEntry)frame_ftsz(frame-1) == J->frame[depth]); + } + frame = frame_prev(frame); + } + lua_assert(depth == 0); +} + /* Sanity check the slots. */ static void rec_check_slots(jit_State *J) { BCReg s, nslots = J->baseslot + J->maxslot; + int32_t depth; lua_assert(J->baseslot >= 1 && J->baseslot < LJ_MAX_JSLOTS); lua_assert(nslots < LJ_MAX_JSLOTS); for (s = 0; s < nslots; s++) { TRef tr = J->slot[s]; + if (s != 0 && (tr & (TREF_CONT|TREF_FRAME))) + depth++; if (tr) { IRRef ref = tref_ref(tr); lua_assert(ref >= J->cur.nk && ref < J->cur.nins); lua_assert(irt_t(IR(ref)->t) == tref_t(tr)); } } + lua_assert(J->framedepth == depth); + rec_check_frames(J); } #endif @@ -854,6 +879,7 @@ typedef struct RecordFFData { ptrdiff_t nres; /* Number of returned results (defaults to 1). */ ptrdiff_t cres; /* Wanted number of call results. */ uint32_t data; /* Per-ffid auxiliary data (opcode, literal etc.). */ + int metacall; /* True if function was resolved via __call. */ } RecordFFData; /* Type of handler to record a fast function. */ @@ -1020,9 +1046,14 @@ static void recff_tostring(jit_State *J, TRef *res, RecordFFData *rd) ix.tab = tr; copyTV(J->L, &ix.tabv, &rd->argv[0]); if (rec_mm_lookup(J, &ix, MM_tostring)) { /* Has __tostring metamethod? */ + if (rd->metacall) /* Must not use kludge. */ + recff_err_nyi(J, rd); res[0] = ix.mobj; - copyTV(J->L, rd->argv - 1, &ix.mobjv); - if (!rec_call(J, (BCReg)(res - J->base), 1, 1)) /* Pending call? */ + copyTV(J->L, rd->argv - 1, &ix.mobjv); /* Kludge. */ + J->framedepth--; + if (rec_call(J, (BCReg)(res - J->base), 1, 1)) + J->framedepth++; + else rd->cres = CALLRES_PENDING; /* Otherwise res[0] already contains the result. */ } else if (tref_isnumber(tr)) { @@ -1067,6 +1098,8 @@ static void recff_pcall(jit_State *J, TRef *res, RecordFFData *rd) { if (rd->nargs >= 1) { BCReg parg = (BCReg)(arg - J->base); + J->pc = (const BCIns *)(sizeof(TValue) - 4 + + (hook_active(J2G(J)) ? FRAME_PCALLH : FRAME_PCALL)); if (rec_call(J, parg, CALLRES_MULTI, rd->nargs - 1)) { /* Resolved call. */ res[0] = TREF_TRUE; /* Prepend true result. No need to move results. */ rd->nres = (ptrdiff_t)J->maxslot - (ptrdiff_t)parg + 1; @@ -1108,6 +1141,8 @@ static void recff_xpcall(jit_State *J, TRef *res, RecordFFData *rd) copyTV(J->L, &rd->argv[0], &argv1); copyTV(J->L, &rd->argv[1], &argv0); oargv = savestack(J->L, rd->argv); + J->pc = (const BCIns *)(2*sizeof(TValue) - 4 + + (hook_active(J2G(J)) ? FRAME_PCALLH : FRAME_PCALL)); /* Need to protect rec_call because the recorder may throw. */ rx.parg = parg; rx.nargs = rd->nargs - 2; @@ -1549,7 +1584,7 @@ static void rec_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) } else if (frame_iscont(frame)) { /* Return to continuation frame. */ ASMFunction cont = frame_contf(frame); BCReg cbase = (BCReg)frame_delta(frame); - if (J->framedepth-- <= 0) + if ((J->framedepth -= 2) <= 0) lj_trace_err(J, LJ_TRERR_NYIRETL); J->baseslot -= (BCReg)cbase; J->base -= cbase; @@ -1602,6 +1637,7 @@ static int rec_call(jit_State *J, BCReg func, ptrdiff_t cres, ptrdiff_t nargs) if (tref_isfunc(res[0])) { /* Regular function call. */ rd.fn = funcV(tv); rd.argv = tv+1; + rd.metacall = 0; } else { /* Otherwise resolve __call metamethod for called object. */ RecordIndex ix; ptrdiff_t i; @@ -1615,13 +1651,21 @@ static int rec_call(jit_State *J, BCReg func, ptrdiff_t cres, ptrdiff_t nargs) res[0] = ix.mobj; rd.fn = funcV(&ix.mobjv); rd.argv = tv; /* The called object is the 1st arg. */ + rd.metacall = 1; } /* Specialize to the runtime value of the called function. */ trfunc = lj_ir_kfunc(J, rd.fn); emitir(IRTG(IR_EQ, IRT_FUNC), res[0], trfunc); res[0] = trfunc | TREF_FRAME; - J->framedepth++; + + /* Add frame links. */ + J->frame[J->framedepth++] = SNAP_MKPC(J->pc+1); + if (cres == CALLRES_CONT) /* Continuations need an extra frame stack slot. */ + J->frame[J->framedepth++] = SNAP_MKFTSZ((func+1)*sizeof(TValue)+FRAME_CONT); + /* NYI: func is wrong if any fast function ever sets up a continuation. */ + if (J->framedepth > LJ_MAX_JFRAME) + lj_trace_err(J, LJ_TRERR_STACKOV); if (isluafunc(rd.fn)) { /* Record call to Lua function. */ GCproto *pt = funcproto(rd.fn); @@ -1659,6 +1703,7 @@ static int rec_call(jit_State *J, BCReg func, ptrdiff_t cres, ptrdiff_t nargs) return 0; /* No result yet. */ } else { /* Record call to C function or fast function. */ uint32_t m = 0; + BCReg oldmaxslot = J->maxslot; res[1+nargs] = 0; rd.nargs = nargs; if (rd.fn->c.ffid < sizeof(recff_idmap)/sizeof(recff_idmap[0])) @@ -1682,10 +1727,12 @@ static int rec_call(jit_State *J, BCReg func, ptrdiff_t cres, ptrdiff_t nargs) rec_ret(J, func, rd.nres); } else if (cres == CALLRES_CONT) { /* Note: immediately resolved continuations must not change J->maxslot. */ + J->maxslot = oldmaxslot; + J->framedepth--; res[rd.nres] = TREF_NIL; /* Turn 0 results into nil result. */ } else { - J->framedepth++; lua_assert(cres == CALLRES_PENDING); + J->framedepth++; return 0; /* Pending call, no result yet. */ } return 1; /* Result resolved immediately. */ @@ -2213,13 +2260,13 @@ static void rec_setup_side(jit_State *J, Trace *T) } setslot: J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME)); /* Same as TREF_* flags. */ - if ((sn & SNAP_FRAME) && s != 0) { + if ((sn & SNAP_FRAME) && s != 0) J->baseslot = s+1; - J->framedepth++; - } } J->base = J->slot + J->baseslot; J->maxslot = snap->nslots - J->baseslot; + J->framedepth = snap->depth; /* Copy frames from snapshot. */ + memcpy(J->frame, &map[nent+1], sizeof(SnapEntry)*(size_t)snap->depth); lj_snap_add(J); } diff --git a/src/lj_snap.c b/src/lj_snap.c index 8a53e3f6..95dc77da 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c @@ -68,49 +68,26 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) return n; } -/* Add frame links at the end of the snapshot. */ -static MSize snapshot_framelinks(jit_State *J, SnapEntry *map) -{ - cTValue *frame = J->L->base - 1; - cTValue *lim = J->L->base - J->baseslot; - MSize f = 0; - map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ - while (frame > lim) { /* Backwards traversal of all frames above base. */ - if (frame_islua(frame)) { - map[f++] = SNAP_MKPC(frame_pc(frame)); - frame = frame_prevl(frame); - } else if (frame_ispcall(frame)) { - map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); - frame = frame_prevd(frame); - } else if (frame_iscont(frame)) { - map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); - map[f++] = SNAP_MKPC(frame_contpc(frame)); - frame = frame_prevd(frame); - } else { - lua_assert(0); - } - } - return f; -} - /* Take a snapshot of the current stack. */ static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap) { BCReg nslots = J->baseslot + J->maxslot; - MSize nent, nframelinks; + MSize nent; SnapEntry *p; - /* Conservative estimate. Continuation frames need 2 slots. */ - lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth*2+1); + /* Conservative estimate. */ + lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth+1); p = &J->cur.snapmap[nsnapmap]; nent = snapshot_slots(J, p, nslots); - nframelinks = snapshot_framelinks(J, p + nent); - J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + nframelinks); snap->mapofs = (uint16_t)nsnapmap; snap->ref = (IRRef1)J->cur.nins; snap->nent = (uint8_t)nent; - snap->nframelinks = (uint8_t)nframelinks; + snap->depth = (uint8_t)J->framedepth; snap->nslots = (uint8_t)nslots; snap->count = 0; + J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + 1 + J->framedepth); + /* Add frame links at the end of the snapshot. */ + p[nent] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ + memcpy(&p[nent+1], J->frame, sizeof(SnapEntry)*(size_t)J->framedepth); } /* Add or merge a snapshot. */ @@ -141,14 +118,14 @@ void lj_snap_shrink(jit_State *J) lua_assert(nslots < snap->nslots); snap->nslots = (uint8_t)nslots; if (nent > 0 && snap_slot(map[nent-1]) >= nslots) { - MSize s, delta, nframelinks = snap->nframelinks; + MSize s, delta, depth = snap->depth; for (nent--; nent > 0 && snap_slot(map[nent-1]) >= nslots; nent--) ; delta = snap->nent - nent; snap->nent = (uint8_t)nent; - J->cur.nsnapmap = (uint16_t)(snap->mapofs + nent + nframelinks); + J->cur.nsnapmap = (uint16_t)(snap->mapofs + nent + 1 + depth); map += nent; - for (s = 0; s < nframelinks; s++) /* Move frame links down. */ + for (s = 0; s <= depth; s++) /* Move PC + frame links down. */ map[s] = map[s+delta]; } } @@ -210,7 +187,7 @@ void lj_snap_restore(jit_State *J, void *exptr) SnapShot *snap = &T->snap[snapno]; MSize n, nent = snap->nent; SnapEntry *map = &T->snapmap[snap->mapofs]; - SnapEntry *flinks = map + nent + snap->nframelinks; + SnapEntry *flinks = map + nent; int32_t ftsz0; BCReg nslots = snap->nslots; TValue *frame; @@ -224,6 +201,7 @@ void lj_snap_restore(jit_State *J, void *exptr) } /* Fill stack slots with data from the registers and spill slots. */ + J->pc = snap_pc(*flinks++); frame = L->base-1; ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */ for (n = 0; n < nent; n++) { @@ -236,7 +214,7 @@ void lj_snap_restore(jit_State *J, void *exptr) lj_ir_kvalue(L, o, ir); if ((sn & (SNAP_CONT|SNAP_FRAME))) { /* Overwrite tag with frame link. */ - o->fr.tp.ftsz = s != 0 ? (int32_t)*--flinks : ftsz0; + o->fr.tp.ftsz = s != 0 ? (int32_t)*flinks++ : ftsz0; if ((sn & SNAP_FRAME)) { GCfunc *fn = ir_kfunc(ir); if (isluafunc(fn)) { @@ -291,8 +269,7 @@ void lj_snap_restore(jit_State *J, void *exptr) } } L->top = curr_topL(L); - J->pc = snap_pc(*--flinks); - lua_assert(map + nent == flinks); + lua_assert(map + nent + 1 + snap->depth == flinks); } #undef IR