Compress snapshots using a simple, extensible 1D-compression.

Typically reduces storage overhead for snapshot maps by 60%.
The extensible format is a prerequisite for the next redesign steps:
Eliminate IR_FRAME and implement return-to-lower-frame.
This commit is contained in:
Mike Pall 2010-01-26 21:49:04 +01:00
parent e058714a2e
commit 67ca399a30
11 changed files with 366 additions and 320 deletions

View File

@ -11,7 +11,7 @@ buildvm_lib.o: buildvm_lib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
buildvm_peobj.o: buildvm_peobj.c buildvm.h lj_def.h lua.h luaconf.h \
lj_arch.h lj_bc.h
lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_alloc.h
lj_arch.h lj_err.h lj_errmsg.h lj_state.h lj_lib.h lj_alloc.h
lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h \
lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_ctype.h lj_lib.h lj_libdef.h
@ -87,8 +87,8 @@ lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h \
lj_traceerr.h lj_vm.h lj_folddef.h
lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h \
lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_vm.h
lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \
lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_vm.h
lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_tab.h lj_ir.h lj_jit.h lj_iropt.h
lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \

View File

@ -332,18 +332,25 @@ LJLIB_CF(jit_util_tracesnap)
if (T && sn < T->nsnap) {
SnapShot *snap = &T->snap[sn];
SnapEntry *map = &T->snapmap[snap->mapofs];
BCReg s, nslots = snap->nslots;
MSize n, nent = snap->nent;
BCReg nslots = snap->nslots;
GCtab *t;
lua_createtable(L, nslots ? (int)nslots : 1, 0);
t = tabV(L->top-1);
setintV(lj_tab_setint(L, t, 0), (int32_t)snap->ref - REF_BIAS);
for (s = 0; s < nslots; s++) {
TValue *o = lj_tab_setint(L, t, (int32_t)(s+1));
IRRef ref = snap_ref(map[s]);
if (ref)
setintV(o, (int32_t)ref - REF_BIAS);
else
/* NYI: get rid of this and expose the compressed slot map. */
{
BCReg s;
for (s = 0; s < nslots; s++) {
TValue *o = lj_tab_setint(L, t, (int32_t)(s+1));
setboolV(o, 0);
}
}
for (n = 0; n < nent; n++) {
BCReg s = snap_slot(map[n]);
IRRef ref = snap_ref(map[n]);
TValue *o = lj_tab_setint(L, t, (int32_t)(s+1));
setintV(o, (int32_t)ref - REF_BIAS);
}
return 1;
}

View File

@ -926,9 +926,9 @@ static void asm_snap_alloc(ASMState *as)
{
SnapShot *snap = &as->T->snap[as->snapno];
SnapEntry *map = &as->T->snapmap[snap->mapofs];
BCReg s, nslots = snap->nslots;
for (s = 0; s < nslots; s++) {
IRRef ref = snap_ref(map[s]);
MSize n, nent = snap->nent;
for (n = 0; n < nent; n++) {
IRRef ref = snap_ref(map[n]);
if (!irref_isk(ref)) {
IRIns *ir = IR(ref);
if (!ra_used(ir) && ir->o != IR_FRAME) {
@ -960,9 +960,9 @@ static int asm_snap_checkrename(ASMState *as, IRRef ren)
{
SnapShot *snap = &as->T->snap[as->snapno];
SnapEntry *map = &as->T->snapmap[snap->mapofs];
BCReg s, nslots = snap->nslots;
for (s = 0; s < nslots; s++) {
IRRef ref = snap_ref(map[s]);
MSize n, nent = snap->nent;
for (n = 0; n < nent; n++) {
IRRef ref = snap_ref(map[n]);
if (ref == ren) {
IRIns *ir = IR(ref);
ra_spill(as, ir); /* Register renamed, so force a spill slot. */
@ -2465,18 +2465,17 @@ static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base)
*/
RegSet allow = rset_exclude(RSET_SCRATCH & RSET_GPR, base);
SnapEntry *map = &as->T->snapmap[snap->mapofs];
BCReg s, nslots = snap->nslots;
for (s = 0; s < nslots; s++) {
IRRef ref = snap_ref(map[s]);
MSize n, nent = snap->nent;
for (n = 0; n < nent; n++) {
IRRef ref = snap_ref(map[n]);
if (!irref_isk(ref)) {
int32_t ofs = 8*(int32_t)(snap_slot(map[n])-1);
IRIns *ir = IR(ref);
if (ir->o == IR_FRAME) {
/* NYI: sync the frame, bump base, set topslot, clear new slots. */
lj_trace_err(as->J, LJ_TRERR_NYIGCF);
} else if (irt_isgcv(ir->t) &&
!(ir->o == IR_SLOAD && ir->op1 < nslots && map[ir->op1] == 0)) {
} else if (irt_isgcv(ir->t)) {
Reg src = ra_alloc1(as, ref, allow);
int32_t ofs = 8*(int32_t)(s-1);
emit_movtomro(as, src, base, ofs);
emit_movmroi(as, base, ofs+4, irt_toitype(ir->t));
checkmclim(as);
@ -2504,7 +2503,7 @@ static void asm_gc_check(ASMState *as, SnapShot *snap)
emit_loadi(as, tmp, (int32_t)as->gcsteps);
/* We don't know spadj yet, so get the C frame from L->cframe. */
emit_movmroi(as, tmp, CFRAME_OFS_PC,
(int32_t)as->T->snapmap[snap->mapofs+snap->nslots]);
(int32_t)as->T->snapmap[snap->mapofs+snap->nent]);
emit_gri(as, XG_ARITHi(XOg_AND), tmp, CFRAME_RAWMASK);
lstate = IR(ASMREF_L)->r;
emit_movrmro(as, tmp, lstate, offsetof(lua_State, cframe));
@ -2965,19 +2964,19 @@ static void asm_head_side(ASMState *as)
static void asm_tail_sync(ASMState *as)
{
SnapShot *snap = &as->T->snap[as->T->nsnap-1]; /* Last snapshot. */
BCReg s, nslots = snap->nslots;
MSize n, nent = snap->nent;
SnapEntry *map = &as->T->snapmap[snap->mapofs];
SnapEntry *flinks = map + nslots + snap->nframelinks;
SnapEntry *flinks = map + nent + snap->nframelinks;
BCReg newbase = 0;
BCReg secondbase = ~(BCReg)0;
BCReg topslot = 0;
BCReg nslots, topslot = 0;
checkmclim(as);
ra_allocref(as, REF_BASE, RID2RSET(RID_BASE));
/* Must check all frames to find topslot (outer can be larger than inner). */
for (s = 0; s < nslots; s++) {
IRRef ref = snap_ref(map[s]);
for (n = 0; n < nent; n++) {
IRRef ref = snap_ref(map[n]);
BCReg s = snap_slot(map[n]);
if (!irref_isk(ref)) {
IRIns *ir = IR(ref);
if (ir->o == IR_FRAME && irt_isfunc(ir->t)) {
@ -2985,10 +2984,7 @@ static void asm_tail_sync(ASMState *as)
if (isluafunc(fn)) {
BCReg fs = s + funcproto(fn)->framesize;
if (fs > topslot) topslot = fs;
if (s != 0) {
newbase = s;
if (secondbase == ~(BCReg)0) secondbase = s;
}
newbase = s;
}
}
}
@ -2998,7 +2994,7 @@ static void asm_tail_sync(ASMState *as)
if (as->T->link == TRACE_INTERP) {
/* Setup fixed registers for exit to interpreter. */
emit_loada(as, RID_DISPATCH, J2GG(as->J)->dispatch);
emit_loadi(as, RID_PC, (int32_t)map[nslots]);
emit_loadi(as, RID_PC, (int32_t)map[nent]);
} else if (newbase) {
/* Save modified BASE for linking to trace with higher start frame. */
emit_setgl(as, RID_BASE, jit_base);
@ -3007,51 +3003,50 @@ static void asm_tail_sync(ASMState *as)
emit_addptr(as, RID_BASE, 8*(int32_t)newbase);
/* Clear stack slots of newly added frames. */
nslots = snap->nslots;
if (nslots <= topslot) {
if (nslots < topslot) {
BCReg s;
for (s = nslots; s <= topslot; s++) {
emit_movtomro(as, RID_EAX, RID_BASE, 8*(int32_t)s-4);
emit_movtomro(as, RID_EAX, RID_BASE, 8*((int32_t)s-1)+4);
checkmclim(as);
}
emit_loadi(as, RID_EAX, LJ_TNIL);
} else {
emit_movmroi(as, RID_BASE, 8*(int32_t)nslots-4, LJ_TNIL);
emit_movmroi(as, RID_BASE, 8*((int32_t)nslots-1)+4, LJ_TNIL);
}
}
/* Store the value of all modified slots to the Lua stack. */
for (s = 0; s < nslots; s++) {
for (n = 0; n < nent; n++) {
BCReg s = snap_slot(map[n]);
int32_t ofs = 8*((int32_t)s-1);
IRRef ref = snap_ref(map[s]);
if (ref) {
IRIns *ir = IR(ref);
/* No need to restore readonly slots and unmodified non-parent slots. */
if (ir->o == IR_SLOAD && ir->op1 == s &&
(ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT)
continue;
if (irt_isnum(ir->t)) {
Reg src = ra_alloc1(as, ref, RSET_FPR);
emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs);
} else if (ir->o == IR_FRAME) {
emit_movmroi(as, RID_BASE, ofs, ptr2addr(ir_kgc(IR(ir->op2))));
if (s != 0) /* Do not overwrite link to previous frame. */
emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*--flinks));
} else {
lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t));
if (!irref_isk(ref)) {
Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
emit_movtomro(as, src, RID_BASE, ofs);
} else if (!irt_ispri(ir->t)) {
emit_movmroi(as, RID_BASE, ofs, ir->i);
}
emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t));
}
IRRef ref = snap_ref(map[n]);
IRIns *ir = IR(ref);
/* No need to restore readonly slots and unmodified non-parent slots. */
if (ir->o == IR_SLOAD && ir->op1 == s &&
(ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT)
continue;
if (irt_isnum(ir->t)) {
Reg src = ra_alloc1(as, ref, RSET_FPR);
emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs);
} else if (ir->o == IR_FRAME) {
emit_movmroi(as, RID_BASE, ofs, ptr2addr(ir_kgc(IR(ir->op2))));
if (s != 0) /* Do not overwrite link to previous frame. */
emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*--flinks));
} else {
lua_assert(!(s > secondbase));
lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t));
if (!irref_isk(ref)) {
Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
emit_movtomro(as, src, RID_BASE, ofs);
} else if (!irt_ispri(ir->t)) {
emit_movmroi(as, RID_BASE, ofs, ir->i);
}
emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t));
}
checkmclim(as);
}
lua_assert(map + nslots == flinks-1);
lua_assert(map + nent == flinks-1);
}
/* Fixup the tail code. */

View File

@ -698,8 +698,8 @@ void lj_gdbjit_addtrace(jit_State *J, Trace *T, TraceNo traceno)
lua_State *L = J->L;
GCproto *pt = &gcref(T->startpt)->pt;
TraceNo parent = T->ir[REF_BASE].op1;
uintptr_t pcofs = (uintptr_t)(T->snap[0].mapofs+T->snap[0].nslots);
const BCIns *startpc = (const BCIns *)(uintptr_t)T->snapmap[pcofs];
uintptr_t pcofs = (uintptr_t)(T->snap[0].mapofs+T->snap[0].nent);
const BCIns *startpc = snap_pc(T->snapmap[pcofs]);
ctx.T = T;
ctx.mcaddr = (uintptr_t)T->mcode;
ctx.szmcode = T->szmcode;

View File

@ -112,17 +112,27 @@ typedef uint8_t MCode;
typedef struct SnapShot {
uint16_t mapofs; /* Offset into snapshot map. */
IRRef1 ref; /* First IR ref for this snapshot. */
uint8_t nslots; /* Number of stack slots. */
uint8_t nslots; /* Number of valid slots. */
uint8_t nent; /* Number of compressed entries. */
uint8_t nframelinks; /* Number of frame links. */
uint8_t count; /* Count of taken exits for this snapshot. */
uint8_t unused1;
} SnapShot;
#define SNAPCOUNT_DONE 255 /* Already compiled and linked a side trace. */
/* Snapshot entry. */
/* Compressed snapshot entry. */
typedef uint32_t SnapEntry;
#define SNAP_FRAME 0x010000 /* Slot has frame link. */
#define SNAP(slot, flags, ref) ((SnapEntry)((slot) << 24) + (flags) + (ref))
#define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc))
#define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz))
#define snap_ref(sn) ((sn) & 0xffff)
#define snap_slot(sn) ((BCReg)((sn) >> 24))
#define snap_isframe(sn) ((sn) & SNAP_FRAME)
#define snap_pc(sn) ((const BCIns *)(uintptr_t)(sn))
#define snap_setref(sn, ref) (((sn) & 0xffff0000) | (ref))
/* Snapshot and exit numbers. */
typedef uint32_t SnapNo;

View File

@ -24,9 +24,9 @@ static void dce_marksnap(jit_State *J)
for (i = 0; i < nsnap; i++) {
SnapShot *snap = &J->cur.snap[i];
SnapEntry *map = &J->cur.snapmap[snap->mapofs];
BCReg s, nslots = snap->nslots;
for (s = 0; s < nslots; s++) {
IRRef ref = snap_ref(map[s]);
MSize n, nent = snap->nent;
for (n = 0; n < nent; n++) {
IRRef ref = snap_ref(map[n]);
if (!irref_isk(ref))
irt_setmark(IR(ref)->t);
}

View File

@ -10,7 +10,6 @@
#if LJ_HASJIT
#include "lj_gc.h"
#include "lj_err.h"
#include "lj_str.h"
#include "lj_ir.h"
@ -163,21 +162,69 @@ static void loop_emit_phi(jit_State *J, IRRef1 *subst, IRRef1 *phi, IRRef nphi)
/* -- Loop unrolling using copy-substitution ------------------------------ */
/* Copy-substitute snapshot. */
static void loop_subst_snap(jit_State *J, SnapShot *osnap,
SnapEntry *loopmap, IRRef1 *subst)
{
SnapEntry *nmap, *omap = &J->cur.snapmap[osnap->mapofs];
MSize nmapofs, nframelinks;
MSize on, ln, nn, onent = osnap->nent;
BCReg nslots = osnap->nslots;
SnapShot *snap = &J->cur.snap[J->cur.nsnap];
if (irt_isguard(J->guardemit)) { /* Guard inbetween? */
nmapofs = J->cur.nsnapmap;
J->cur.nsnap++; /* Add new snapshot. */
} else { /* Otherwise overwrite previous snapshot. */
snap--;
nmapofs = snap->mapofs;
}
J->guardemit.irt = 0;
nframelinks = osnap->nframelinks;
/* Setup new snapshot. */
snap->mapofs = (uint16_t)nmapofs;
snap->ref = (IRRef1)J->cur.nins;
snap->nframelinks = (uint8_t)nframelinks;
snap->nslots = nslots;
snap->count = 0;
nmap = &J->cur.snapmap[nmapofs];
/* Substitute snapshot slots. */
on = ln = nn = 0;
while (on < onent) {
SnapEntry osn = omap[on], lsn = loopmap[ln];
if (snap_slot(lsn) < snap_slot(osn)) { /* Copy slot from loop map. */
nmap[nn++] = lsn;
ln++;
} else { /* Copy substituted slot from snapshot map. */
if (snap_slot(lsn) == snap_slot(osn)) ln++; /* Shadowed loop slot. */
if (!irref_isk(snap_ref(osn)))
osn = snap_setref(osn, subst[snap_ref(osn)]);
nmap[nn++] = osn;
on++;
}
}
while (snap_slot(loopmap[ln]) < nslots) /* Copy remaining loop slots. */
nmap[nn++] = loopmap[ln++];
snap->nent = (uint8_t)nn;
J->cur.nsnapmap = (uint16_t)(nmapofs + nn + nframelinks);
omap += onent;
nmap += nn;
for (nn = 0; nn < nframelinks; nn++) /* Copy frame links. */
nmap[nn] = omap[nn];
}
/* Unroll loop. */
static void loop_unroll(jit_State *J)
{
IRRef1 phi[LJ_MAX_PHI];
uint32_t nphi = 0;
IRRef1 *subst;
SnapShot *osnap, *snap;
SnapEntry *loopmap;
BCReg loopslots;
MSize nsnap, nsnapmap;
IRRef ins, invar, osnapref;
SnapShot *osnap;
SnapEntry *loopmap, *psentinel;
IRRef ins, invar;
/* Use temp buffer for substitution table.
** Only non-constant refs in [REF_BIAS,invar) are valid indexes.
** Note: don't call into the VM or run the GC or the buffer may be gone.
** Caveat: don't call into the VM or run the GC or the buffer may be gone.
*/
invar = J->cur.nins;
subst = (IRRef1 *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf,
@ -187,80 +234,37 @@ static void loop_unroll(jit_State *J)
/* LOOP separates the pre-roll from the loop body. */
emitir_raw(IRTG(IR_LOOP, IRT_NIL), 0, 0);
/* Ensure size for copy-substituted snapshots (minus #0 and loop snapshot). */
nsnap = J->cur.nsnap;
if (LJ_UNLIKELY(2*nsnap-2 > J->sizesnap)) {
MSize maxsnap = (MSize)J->param[JIT_P_maxsnap];
if (2*nsnap-2 > maxsnap)
lj_trace_err(J, LJ_TRERR_SNAPOV);
lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot);
J->cur.snap = J->snapbuf;
}
nsnapmap = J->cur.nsnapmap; /* Use temp. copy to avoid undo. */
if (LJ_UNLIKELY(nsnapmap*2 > J->sizesnapmap)) {
J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf,
J->sizesnapmap*sizeof(SnapEntry),
2*J->sizesnapmap*sizeof(SnapEntry));
J->cur.snapmap = J->snapmapbuf;
J->sizesnapmap *= 2;
}
/* Grow snapshot buffer and map for copy-substituted snapshots.
** Need up to twice the number of snapshots minus #0 and loop snapshot.
** Need up to twice the number of entries plus fallback substitutions
** from the loop snapshot entries for each new snapshot.
** Caveat: both calls may reallocate J->cur.snap and J->cur.snapmap!
*/
{
MSize nsnap = J->cur.nsnap;
SnapShot *loopsnap;
lj_snap_grow_buf(J, 2*nsnap-2);
lj_snap_grow_map(J, J->cur.nsnapmap*2+(nsnap-2)*J->cur.snap[nsnap-1].nent);
/* The loop snapshot is used for fallback substitutions. */
snap = &J->cur.snap[nsnap-1];
loopmap = &J->cur.snapmap[snap->mapofs];
loopslots = snap->nslots;
/* The PC of snapshot #0 and the loop snapshot must match. */
lua_assert(loopmap[loopslots] == J->cur.snapmap[J->cur.snap[0].nslots]);
/* The loop snapshot is used for fallback substitutions. */
loopsnap = &J->cur.snap[nsnap-1];
loopmap = &J->cur.snapmap[loopsnap->mapofs];
/* The PC of snapshot #0 and the loop snapshot must match. */
psentinel = &loopmap[loopsnap->nent];
lua_assert(*psentinel == J->cur.snapmap[J->cur.snap[0].nent]);
*psentinel = SNAP(255, 0, 0); /* Replace PC with temporary sentinel. */
}
/* Start substitution with snapshot #1 (#0 is empty for root traces). */
osnap = &J->cur.snap[1];
osnapref = osnap->ref;
/* Copy and substitute all recorded instructions and snapshots. */
for (ins = REF_FIRST; ins < invar; ins++) {
IRIns *ir;
IRRef op1, op2;
/* Copy-substitute snapshot. */
if (ins >= osnapref) {
SnapEntry *nmap, *omap = &J->cur.snapmap[osnap->mapofs];
BCReg s, nslots;
uint32_t nmapofs, nframelinks;
if (irt_isguard(J->guardemit)) { /* Guard inbetween? */
nmapofs = nsnapmap;
snap++; /* Add new snapshot. */
} else {
nmapofs = snap->mapofs; /* Overwrite previous snapshot. */
}
J->guardemit.irt = 0;
nslots = osnap->nslots;
nframelinks = osnap->nframelinks;
snap->mapofs = (uint16_t)nmapofs;
snap->ref = (IRRef1)J->cur.nins;
snap->nslots = (uint8_t)nslots;
snap->nframelinks = (uint8_t)nframelinks;
snap->count = 0;
osnap++;
osnapref = osnap->ref;
nsnapmap = nmapofs + nslots + nframelinks;
nmap = &J->cur.snapmap[nmapofs];
/* Substitute snapshot slots. */
for (s = 0; s < nslots; s++) {
IRRef ref = snap_ref(omap[s]);
if (ref) {
if (!irref_isk(ref))
ref = subst[ref];
} else if (s < loopslots) {
ref = loopmap[s];
}
nmap[s] = ref;
}
/* Copy frame links. */
nmap += nslots;
omap += nslots;
for (s = 0; s < nframelinks; s++)
nmap[s] = omap[s];
}
if (ins >= osnap->ref) /* Instruction belongs to next snapshot? */
loop_subst_snap(J, osnap++, loopmap, subst); /* Copy-substitute it. */
/* Substitute instruction operands. */
ir = IR(ins);
@ -295,22 +299,24 @@ static void loop_unroll(jit_State *J)
}
}
}
if (irt_isguard(J->guardemit)) { /* Guard inbetween? */
J->cur.nsnapmap = (uint16_t)nsnapmap;
snap++;
} else {
J->cur.nsnapmap = (uint16_t)snap->mapofs; /* Last snapshot is redundant. */
}
J->cur.nsnap = (uint16_t)(snap - J->cur.snap);
if (!irt_isguard(J->guardemit)) /* Drop redundant snapshot. */
J->cur.nsnapmap = (uint16_t)J->cur.snap[--J->cur.nsnap].mapofs;
lua_assert(J->cur.nsnapmap <= J->sizesnapmap);
*psentinel = J->cur.snapmap[J->cur.snap[0].nent]; /* Restore PC. */
loop_emit_phi(J, subst, phi, nphi);
}
/* Undo any partial changes made by the loop optimization. */
static void loop_undo(jit_State *J, IRRef ins)
static void loop_undo(jit_State *J, IRRef ins, MSize nsnap)
{
ptrdiff_t i;
SnapShot *snap = &J->cur.snap[nsnap-1];
SnapEntry *map = J->cur.snapmap;
map[snap->mapofs + snap->nent] = map[J->cur.snap[0].nent]; /* Restore PC. */
J->cur.nsnapmap = (uint16_t)(snap->mapofs + snap->nent + snap->nframelinks);
J->cur.nsnap = nsnap;
J->guardemit.irt = 0;
lj_ir_rollback(J, ins);
for (i = 0; i < BPROP_SLOTS; i++) { /* Remove backprop. cache entries. */
BPropEntry *bp = &J->bpropcache[i];
@ -336,6 +342,7 @@ static TValue *cploop_opt(lua_State *L, lua_CFunction dummy, void *ud)
int lj_opt_loop(jit_State *J)
{
IRRef nins = J->cur.nins;
MSize nsnap = J->cur.nsnap;
int errcode = lj_vm_cpcall(J->L, NULL, J, cploop_opt);
if (LJ_UNLIKELY(errcode)) {
lua_State *L = J->L;
@ -348,8 +355,7 @@ int lj_opt_loop(jit_State *J)
if (--J->instunroll < 0) /* But do not unroll forever. */
break;
L->top--; /* Remove error object. */
J->guardemit.irt = 0;
loop_undo(J, nins);
loop_undo(J, nins, nsnap);
return 1; /* Loop optimization failed, continue recording. */
default:
break;

View File

@ -1696,7 +1696,7 @@ static void optstate_comp(jit_State *J, int cond)
const BCIns *npc = J->pc + 2 + (cond ? bc_j(jmpins) : 0);
SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
/* Avoid re-recording the comparison in side traces. */
J->cur.snapmap[snap->mapofs + snap->nslots] = u32ptr(npc);
J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc);
J->needsnap = 1;
/* Shrink last snapshot if possible. */
if (bc_a(jmpins) < J->maxslot) {
@ -2159,61 +2159,62 @@ static void rec_setup_side(jit_State *J, Trace *T)
{
SnapShot *snap = &T->snap[J->exitno];
SnapEntry *map = &T->snapmap[snap->mapofs];
BCReg s, nslots = snap->nslots;
MSize n, nent = snap->nent;
BloomFilter seen = 0;
for (s = 0; s < nslots; s++) {
IRRef ref = snap_ref(map[s]);
if (ref) {
IRIns *ir = &T->ir[ref];
TRef tr = 0;
/* The bloom filter avoids O(nslots^2) overhead for de-duping slots. */
if (bloomtest(seen, ref)) {
BCReg j;
for (j = 0; j < s; j++)
if (snap_ref(map[j]) == ref) {
if (ir->o == IR_FRAME && irt_isfunc(ir->t)) {
lua_assert(s != 0);
J->baseslot = s+1;
J->framedepth++;
}
tr = J->slot[j];
goto dupslot;
}
}
bloomset(seen, ref);
switch ((IROp)ir->o) {
case IR_KPRI: tr = TREF_PRI(irt_type(ir->t)); break;
case IR_KINT: tr = lj_ir_kint(J, ir->i); break;
case IR_KGC: tr = lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); break;
case IR_KNUM: tr = lj_ir_knum_addr(J, ir_knum(ir)); break;
case IR_FRAME: /* Placeholder FRAMEs don't need a guard. */
if (irt_isfunc(ir->t)) {
if (s != 0) {
/* Emit IR for slots inherited from parent snapshot. */
for (n = 0; n < nent; n++) {
IRRef ref = snap_ref(map[n]);
BCReg s = snap_slot(map[n]);
IRIns *ir = &T->ir[ref];
TRef tr;
/* The bloom filter avoids O(nent^2) overhead for de-duping slots. */
if (bloomtest(seen, ref)) {
MSize j;
for (j = 0; j < n; j++)
if (snap_ref(map[j]) == ref) {
tr = J->slot[snap_slot(map[j])];
if (ir->o == IR_FRAME && irt_isfunc(ir->t)) {
lua_assert(s != 0);
J->baseslot = s+1;
J->framedepth++;
}
tr = lj_ir_kfunc(J, ir_kfunc(&T->ir[ir->op2]));
tr = emitir_raw(IRT(IR_FRAME, IRT_FUNC), tr, tr);
} else {
tr = lj_ir_kptr(J, mref(T->ir[ir->op2].ptr, void));
tr = emitir_raw(IRT(IR_FRAME, IRT_PTR), tr, tr);
goto dupslot;
}
break;
case IR_SLOAD: /* Inherited SLOADs don't need a guard or type check. */
tr = emitir_raw(ir->ot & ~IRT_GUARD, s,
(ir->op2&IRSLOAD_READONLY) | IRSLOAD_INHERIT|IRSLOAD_PARENT);
break;
default: /* Parent refs are already typed and don't need a guard. */
tr = emitir_raw(IRT(IR_SLOAD, irt_type(ir->t)), s,
IRSLOAD_INHERIT|IRSLOAD_PARENT);
break;
}
dupslot:
J->slot[s] = tr;
}
bloomset(seen, ref);
switch ((IROp)ir->o) {
/* Only have to deal with constants that can occur in stack slots. */
case IR_KPRI: tr = TREF_PRI(irt_type(ir->t)); break;
case IR_KINT: tr = lj_ir_kint(J, ir->i); break;
case IR_KGC: tr = lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); break;
case IR_KNUM: tr = lj_ir_knum_addr(J, ir_knum(ir)); break;
case IR_FRAME: /* Placeholder FRAMEs don't need a guard. */
if (irt_isfunc(ir->t)) {
if (s != 0) {
J->baseslot = s+1;
J->framedepth++;
}
tr = lj_ir_kfunc(J, ir_kfunc(&T->ir[ir->op2]));
tr = emitir_raw(IRT(IR_FRAME, IRT_FUNC), tr, tr);
} else {
tr = lj_ir_kptr(J, mref(T->ir[ir->op2].ptr, void));
tr = emitir_raw(IRT(IR_FRAME, IRT_PTR), tr, tr);
}
break;
case IR_SLOAD: /* Inherited SLOADs don't need a guard or type check. */
tr = emitir_raw(ir->ot & ~IRT_GUARD, s,
(ir->op2&IRSLOAD_READONLY) | IRSLOAD_INHERIT|IRSLOAD_PARENT);
break;
default: /* Parent refs are already typed and don't need a guard. */
tr = emitir_raw(IRT(IR_SLOAD, irt_type(ir->t)), s,
IRSLOAD_INHERIT|IRSLOAD_PARENT);
break;
}
dupslot:
J->slot[s] = tr;
}
J->base = J->slot + J->baseslot;
J->maxslot = nslots - J->baseslot;
J->maxslot = snap->nslots - J->baseslot;
lj_snap_add(J);
}
@ -2259,7 +2260,7 @@ void lj_record_setup(jit_State *J)
J->cur.root = (uint16_t)root;
J->cur.startins = BCINS_AD(BC_JMP, 0, 0);
/* Check whether we could at least potentially form an extra loop. */
if (J->exitno == 0 && T->snap[0].nslots == 1 && T->snapmap[0] == 0) {
if (J->exitno == 0 && T->snap[0].nent == 0) {
/* We can narrow a FORL for some side traces, too. */
if (J->pc > J->pt->bc && bc_op(J->pc[-1]) == BC_JFORI &&
bc_d(J->pc[bc_j(J->pc[-1])-1]) == root) {

View File

@ -23,28 +23,50 @@
/* Some local macros to save typing. Undef'd at the end. */
#define IR(ref) (&J->cur.ir[(ref)])
/* -- Snapshot buffer allocation ------------------------------------------ */
/* Grow snapshot buffer. */
void lj_snap_grow_buf_(jit_State *J, MSize need)
{
MSize maxsnap = (MSize)J->param[JIT_P_maxsnap];
if (need > maxsnap)
lj_trace_err(J, LJ_TRERR_SNAPOV);
lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot);
J->cur.snap = J->snapbuf;
}
/* Grow snapshot map buffer. */
void lj_snap_grow_map_(jit_State *J, MSize need)
{
if (need < 2*J->sizesnapmap)
need = 2*J->sizesnapmap;
else if (need < 64)
need = 64;
J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf,
J->sizesnapmap*sizeof(SnapEntry), need*sizeof(SnapEntry));
J->cur.snapmap = J->snapmapbuf;
J->sizesnapmap = need;
}
/* -- Snapshot generation ------------------------------------------------- */
/* NYI: Snapshots are in need of a redesign. The current storage model for
** snapshot maps is too wasteful. They could be compressed (1D or 2D) and
** made more flexible at the same time. Iterators should no longer need to
** skip unmodified slots. IR_FRAME should be eliminated, too.
*/
/* NYI: IR_FRAME should be eliminated, too. */
/* Add all modified slots to the snapshot. */
static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
{
BCReg s;
MSize n = 0;
for (s = 0; s < nslots; s++) {
IRRef ref = tref_ref(J->slot[s]);
if (ref) {
IRIns *ir = IR(ref);
if (ir->o == IR_SLOAD && ir->op1 == s && !(ir->op2 & IRSLOAD_INHERIT))
ref = 0;
if (!(ir->o == IR_SLOAD && ir->op1 == s &&
!(ir->op2 & IRSLOAD_INHERIT)))
map[n++] = SNAP(s, ir->o == IR_FRAME ? SNAP_FRAME : 0, ref);
}
map[s] = (SnapEntry)ref;
}
return nslots;
return n;
}
/* Add frame links at the end of the snapshot. */
@ -53,17 +75,17 @@ static MSize snapshot_framelinks(jit_State *J, SnapEntry *map)
cTValue *frame = J->L->base - 1;
cTValue *lim = J->L->base - J->baseslot;
MSize f = 0;
map[f++] = u32ptr(J->pc);
while (frame > lim) {
map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */
while (frame > lim) { /* Backwards traversal of all frames above base. */
if (frame_islua(frame)) {
map[f++] = u32ptr(frame_pc(frame));
map[f++] = SNAP_MKPC(frame_pc(frame));
frame = frame_prevl(frame);
} else if (frame_ispcall(frame)) {
map[f++] = (uint32_t)frame_ftsz(frame);
map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
frame = frame_prevd(frame);
} else if (frame_iscont(frame)) {
map[f++] = (uint32_t)frame_ftsz(frame);
map[f++] = u32ptr(frame_contpc(frame));
map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
map[f++] = SNAP_MKPC(frame_contpc(frame));
frame = frame_prevd(frame);
} else {
lua_assert(0);
@ -76,28 +98,19 @@ static MSize snapshot_framelinks(jit_State *J, SnapEntry *map)
static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap)
{
BCReg nslots = J->baseslot + J->maxslot;
MSize nsm, nframelinks;
MSize nent, nframelinks;
SnapEntry *p;
/* Conservative estimate. Continuation frames need 2 slots. */
nsm = nsnapmap + nslots + (uint32_t)J->framedepth*2+1;
if (LJ_UNLIKELY(nsm > J->sizesnapmap)) { /* Need to grow snapshot map? */
if (nsm < 2*J->sizesnapmap)
nsm = 2*J->sizesnapmap;
else if (nsm < 64)
nsm = 64;
J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf,
J->sizesnapmap*sizeof(SnapEntry), nsm*sizeof(SnapEntry));
J->cur.snapmap = J->snapmapbuf;
J->sizesnapmap = nsm;
}
lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth*2+1);
p = &J->cur.snapmap[nsnapmap];
nslots = snapshot_slots(J, p, nslots);
nframelinks = snapshot_framelinks(J, p + nslots);
J->cur.nsnapmap = (uint16_t)(nsnapmap + nslots + nframelinks);
nent = snapshot_slots(J, p, nslots);
nframelinks = snapshot_framelinks(J, p + nent);
J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + nframelinks);
snap->mapofs = (uint16_t)nsnapmap;
snap->ref = (IRRef1)J->cur.nins;
snap->nslots = (uint8_t)nslots;
snap->nent = (uint8_t)nent;
snap->nframelinks = (uint8_t)nframelinks;
snap->nslots = (uint8_t)nslots;
snap->count = 0;
}
@ -111,14 +124,7 @@ void lj_snap_add(jit_State *J)
(nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) {
nsnapmap = J->cur.snap[--nsnap].mapofs;
} else {
/* Need to grow snapshot buffer? */
if (LJ_UNLIKELY(nsnap >= J->sizesnap)) {
MSize maxsnap = (MSize)J->param[JIT_P_maxsnap];
if (nsnap >= maxsnap)
lj_trace_err(J, LJ_TRERR_SNAPOV);
lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot);
J->cur.snap = J->snapbuf;
}
lj_snap_grow_buf(J, nsnap+1);
J->cur.nsnap = (uint16_t)(nsnap+1);
}
J->mergesnap = 0;
@ -131,14 +137,21 @@ void lj_snap_shrink(jit_State *J)
{
BCReg nslots = J->baseslot + J->maxslot;
SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
SnapEntry *oflinks = &J->cur.snapmap[snap->mapofs + snap->nslots];
SnapEntry *nflinks = &J->cur.snapmap[snap->mapofs + nslots];
uint32_t s, nframelinks = snap->nframelinks;
SnapEntry *map = &J->cur.snapmap[snap->mapofs];
MSize nent = snap->nent;
lua_assert(nslots < snap->nslots);
snap->nslots = (uint8_t)nslots;
J->cur.nsnapmap = (uint16_t)(snap->mapofs + nslots + nframelinks);
for (s = 0; s < nframelinks; s++) /* Move frame links down. */
nflinks[s] = oflinks[s];
if (nent > 0 && snap_slot(map[nent-1]) >= nslots) {
MSize s, delta, nframelinks = snap->nframelinks;
for (nent--; nent > 0 && snap_slot(map[nent-1]) >= nslots; nent--)
;
delta = snap->nent - nent;
snap->nent = (uint8_t)nent;
J->cur.nsnapmap = (uint16_t)(snap->mapofs + nent + nframelinks);
map += nent;
for (s = 0; s < nframelinks; s++) /* Move frame links down. */
map[s] = map[s+delta];
}
}
/* -- Snapshot access ----------------------------------------------------- */
@ -167,21 +180,24 @@ static RegSP snap_renameref(Trace *T, SnapNo lim, IRRef ref, RegSP rs)
return rs;
}
/* Convert a snapshot into a linear slot -> RegSP map. */
/* Convert a snapshot into a linear slot -> RegSP map.
** Note: unused slots are not initialized!
*/
void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno)
{
SnapShot *snap = &T->snap[snapno];
BCReg s, nslots = snap->nslots;
MSize n, nent = snap->nent;
SnapEntry *map = &T->snapmap[snap->mapofs];
BloomFilter rfilt = snap_renamefilter(T, snapno);
for (s = 0; s < nslots; s++) {
IRRef ref = snap_ref(map[s]);
for (n = 0; n < nent; n++) {
SnapEntry sn = map[n];
IRRef ref = snap_ref(sn);
if (!irref_isk(ref)) {
IRIns *ir = &T->ir[ref];
uint32_t rs = ir->prev;
if (bloomtest(rfilt, ref))
rs = snap_renameref(T, snapno, ref, rs);
rsmap[s] = (uint16_t)rs;
rsmap[snap_slot(sn)] = (uint16_t)rs;
}
}
}
@ -193,89 +209,88 @@ void lj_snap_restore(jit_State *J, void *exptr)
SnapNo snapno = J->exitno; /* For now, snapno == exitno. */
Trace *T = J->trace[J->parent];
SnapShot *snap = &T->snap[snapno];
BCReg s, nslots = snap->nslots;
MSize n, nent = snap->nent;
SnapEntry *map = &T->snapmap[snap->mapofs];
SnapEntry *flinks = map + nslots + snap->nframelinks;
TValue *o, *newbase, *ntop;
SnapEntry *flinks = map + nent + snap->nframelinks;
BCReg nslots = snap->nslots;
TValue *frame;
BloomFilter rfilt = snap_renamefilter(T, snapno);
lua_State *L = J->L;
/* Make sure the stack is big enough for the slots from the snapshot. */
if (L->base + nslots >= L->maxstack) {
if (LJ_UNLIKELY(L->base + nslots > L->maxstack)) {
L->top = curr_topL(L);
lj_state_growstack(L, nslots - curr_proto(L)->framesize);
}
/* Fill stack slots with data from the registers and spill slots. */
newbase = NULL;
ntop = L->base;
for (s = 0, o = L->base-1; s < nslots; s++, o++) {
IRRef ref = snap_ref(map[s]);
if (ref) {
IRIns *ir = &T->ir[ref];
if (irref_isk(ref)) { /* Restore constant slot. */
lj_ir_kvalue(L, o, ir);
} else {
IRType1 t = ir->t;
RegSP rs = ir->prev;
if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
rs = snap_renameref(T, snapno, ref, rs);
if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */
int32_t *sps = &ex->spill[regsp_spill(rs)];
if (irt_isinteger(t)) {
setintV(o, *sps);
} else if (irt_isnum(t)) {
o->u64 = *(uint64_t *)sps;
} else {
lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */
setgcrefi(o->gcr, *sps);
setitype(o, irt_toitype(t));
}
} else if (ra_hasreg(regsp_reg(rs))) { /* Restore from register. */
Reg r = regsp_reg(rs);
if (irt_isinteger(t)) {
setintV(o, ex->gpr[r-RID_MIN_GPR]);
} else if (irt_isnum(t)) {
setnumV(o, ex->fpr[r-RID_MIN_FPR]);
} else {
if (!irt_ispri(t))
setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]);
setitype(o, irt_toitype(t));
}
} else { /* Restore frame slot. */
lua_assert(ir->o == IR_FRAME);
/* This works for both PTR and FUNC IR_FRAME. */
setgcrefp(o->fr.func, mref(T->ir[ir->op2].ptr, void));
if (s != 0) /* Do not overwrite link to previous frame. */
o->fr.tp.ftsz = (int32_t)*--flinks;
if (irt_isfunc(ir->t)) {
GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr));
if (isluafunc(fn)) {
TValue *fs;
fs = o+1 + funcproto(fn)->framesize;
if (fs > ntop) ntop = fs; /* Update top for newly added frames. */
if (s != 0) newbase = o+1;
frame = L->base-1;
for (n = 0; n < nent; n++) {
IRRef ref = snap_ref(map[n]);
BCReg s = snap_slot(map[n]);
TValue *o = &frame[s]; /* Stack slots are relative to start frame. */
IRIns *ir = &T->ir[ref];
if (irref_isk(ref)) { /* Restore constant slot. */
lj_ir_kvalue(L, o, ir);
} else {
IRType1 t = ir->t;
RegSP rs = ir->prev;
if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
rs = snap_renameref(T, snapno, ref, rs);
if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */
int32_t *sps = &ex->spill[regsp_spill(rs)];
if (irt_isinteger(t)) {
setintV(o, *sps);
} else if (irt_isnum(t)) {
o->u64 = *(uint64_t *)sps;
} else {
lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */
setgcrefi(o->gcr, *sps);
setitype(o, irt_toitype(t));
}
} else if (ra_hasreg(regsp_reg(rs))) { /* Restore from register. */
Reg r = regsp_reg(rs);
if (irt_isinteger(t)) {
setintV(o, ex->gpr[r-RID_MIN_GPR]);
} else if (irt_isnum(t)) {
setnumV(o, ex->fpr[r-RID_MIN_FPR]);
} else {
if (!irt_ispri(t))
setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]);
setitype(o, irt_toitype(t));
}
} else { /* Restore frame slot. */
lua_assert(ir->o == IR_FRAME);
/* This works for both PTR and FUNC IR_FRAME. */
setgcrefp(o->fr.func, mref(T->ir[ir->op2].ptr, void));
if (s != 0) /* Do not overwrite link to previous frame. */
o->fr.tp.ftsz = (int32_t)*--flinks;
if (irt_isfunc(ir->t)) {
GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr));
if (isluafunc(fn)) {
MSize framesize = funcproto(fn)->framesize;
TValue *fs;
L->base = ++o;
if (LJ_UNLIKELY(o + framesize > L->maxstack)) { /* Grow again? */
ptrdiff_t fsave = savestack(L, frame);
L->top = o;
lj_state_growstack(L, framesize);
frame = restorestack(L, fsave);
o = L->top;
}
fs = o + framesize;
if (s == 0) /* Only partially clear tail call frame at #0. */
o = &frame[nslots];
while (o < fs) /* Clear slots of newly added frames. */
setnilV(o++);
}
}
}
} else {
lua_assert(!newbase);
}
}
if (newbase) L->base = newbase;
if (ntop >= L->maxstack) { /* Need to grow the stack again. */
MSize need = (MSize)(ntop - o);
L->top = o;
lj_state_growstack(L, need);
o = L->top;
ntop = o + need;
}
L->top = curr_topL(L);
for (; o < ntop; o++) /* Clear remainder of newly added frames. */
setnilV(o);
lua_assert(map + nslots == flinks-1);
J->pc = (const BCIns *)(uintptr_t)(*--flinks);
J->pc = snap_pc(*--flinks);
lua_assert(map + nent == flinks);
}
#undef IR

View File

@ -14,6 +14,19 @@ LJ_FUNC void lj_snap_add(jit_State *J);
LJ_FUNC void lj_snap_shrink(jit_State *J);
LJ_FUNC void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno);
LJ_FUNC void lj_snap_restore(jit_State *J, void *exptr);
LJ_FUNC void lj_snap_grow_buf_(jit_State *J, MSize need);
LJ_FUNC void lj_snap_grow_map_(jit_State *J, MSize need);
static LJ_AINLINE void lj_snap_grow_buf(jit_State *J, MSize need)
{
if (LJ_UNLIKELY(need > J->sizesnap)) lj_snap_grow_buf_(J, need);
}
static LJ_AINLINE void lj_snap_grow_map(jit_State *J, MSize need)
{
if (LJ_UNLIKELY(need > J->sizesnapmap)) lj_snap_grow_map_(J, need);
}
#endif
#endif

View File

@ -161,8 +161,8 @@ void lj_trace_reenableproto(GCproto *pt)
static void trace_unpatch(jit_State *J, Trace *T)
{
BCOp op = bc_op(T->startins);
uint32_t pcofs = T->snap[0].mapofs + T->snap[0].nslots;
BCIns *pc = ((BCIns *)(uintptr_t)T->snapmap[pcofs]) - 1;
MSize pcofs = T->snap[0].mapofs + T->snap[0].nent;
BCIns *pc = ((BCIns *)snap_pc(T->snapmap[pcofs])) - 1;
switch (op) {
case BC_FORL:
lua_assert(bc_op(*pc) == BC_JFORI);
@ -352,7 +352,6 @@ static void trace_start(jit_State *J)
J->cur.ir = J->irbuf;
J->cur.snap = J->snapbuf;
J->cur.snapmap = J->snapmapbuf;
/* J->cur.nsnapmap = 0; */
J->mergesnap = 0;
J->needsnap = 0;
J->guardemit.irt = 0;