From 67ca399a30cec05acacd7ea33d5cb0e361f92755 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Tue, 26 Jan 2010 21:49:04 +0100 Subject: [PATCH] Compress snapshots using a simple, extensible 1D-compression. Typically reduces storage overhead for snapshot maps by 60%. The extensible format is a prerequisite for the next redesign steps: Eliminate IR_FRAME and implement return-to-lower-frame. --- src/Makefile.dep | 6 +- src/lib_jit.c | 21 ++-- src/lj_asm.c | 101 +++++++++---------- src/lj_gdbjit.c | 4 +- src/lj_jit.h | 16 ++- src/lj_opt_dce.c | 6 +- src/lj_opt_loop.c | 168 ++++++++++++++++--------------- src/lj_record.c | 99 ++++++++++--------- src/lj_snap.c | 247 ++++++++++++++++++++++++---------------------- src/lj_snap.h | 13 +++ src/lj_trace.c | 5 +- 11 files changed, 366 insertions(+), 320 deletions(-) diff --git a/src/Makefile.dep b/src/Makefile.dep index 779ee545..ffb7d79b 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep @@ -11,7 +11,7 @@ buildvm_lib.o: buildvm_lib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ buildvm_peobj.o: buildvm_peobj.c buildvm.h lj_def.h lua.h luaconf.h \ lj_arch.h lj_bc.h lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ - lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_alloc.h + lj_arch.h lj_err.h lj_errmsg.h lj_state.h lj_lib.h lj_alloc.h lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h \ lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_ctype.h lj_lib.h lj_libdef.h @@ -87,8 +87,8 @@ lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h \ lj_traceerr.h lj_vm.h lj_folddef.h lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h \ - lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_vm.h + lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ + lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_vm.h lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ diff --git a/src/lib_jit.c b/src/lib_jit.c index aa421613..f3425d98 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c @@ -332,18 +332,25 @@ LJLIB_CF(jit_util_tracesnap) if (T && sn < T->nsnap) { SnapShot *snap = &T->snap[sn]; SnapEntry *map = &T->snapmap[snap->mapofs]; - BCReg s, nslots = snap->nslots; + MSize n, nent = snap->nent; + BCReg nslots = snap->nslots; GCtab *t; lua_createtable(L, nslots ? (int)nslots : 1, 0); t = tabV(L->top-1); setintV(lj_tab_setint(L, t, 0), (int32_t)snap->ref - REF_BIAS); - for (s = 0; s < nslots; s++) { - TValue *o = lj_tab_setint(L, t, (int32_t)(s+1)); - IRRef ref = snap_ref(map[s]); - if (ref) - setintV(o, (int32_t)ref - REF_BIAS); - else + /* NYI: get rid of this and expose the compressed slot map. */ + { + BCReg s; + for (s = 0; s < nslots; s++) { + TValue *o = lj_tab_setint(L, t, (int32_t)(s+1)); setboolV(o, 0); + } + } + for (n = 0; n < nent; n++) { + BCReg s = snap_slot(map[n]); + IRRef ref = snap_ref(map[n]); + TValue *o = lj_tab_setint(L, t, (int32_t)(s+1)); + setintV(o, (int32_t)ref - REF_BIAS); } return 1; } diff --git a/src/lj_asm.c b/src/lj_asm.c index 9f5ce030..b3656e00 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -926,9 +926,9 @@ static void asm_snap_alloc(ASMState *as) { SnapShot *snap = &as->T->snap[as->snapno]; SnapEntry *map = &as->T->snapmap[snap->mapofs]; - BCReg s, nslots = snap->nslots; - for (s = 0; s < nslots; s++) { - IRRef ref = snap_ref(map[s]); + MSize n, nent = snap->nent; + for (n = 0; n < nent; n++) { + IRRef ref = snap_ref(map[n]); if (!irref_isk(ref)) { IRIns *ir = IR(ref); if (!ra_used(ir) && ir->o != IR_FRAME) { @@ -960,9 +960,9 @@ static int asm_snap_checkrename(ASMState *as, IRRef ren) { SnapShot *snap = &as->T->snap[as->snapno]; SnapEntry *map = &as->T->snapmap[snap->mapofs]; - BCReg s, nslots = snap->nslots; - for (s = 0; s < nslots; s++) { - IRRef ref = snap_ref(map[s]); + MSize n, nent = snap->nent; + for (n = 0; n < nent; n++) { + IRRef ref = snap_ref(map[n]); if (ref == ren) { IRIns *ir = IR(ref); ra_spill(as, ir); /* Register renamed, so force a spill slot. */ @@ -2465,18 +2465,17 @@ static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base) */ RegSet allow = rset_exclude(RSET_SCRATCH & RSET_GPR, base); SnapEntry *map = &as->T->snapmap[snap->mapofs]; - BCReg s, nslots = snap->nslots; - for (s = 0; s < nslots; s++) { - IRRef ref = snap_ref(map[s]); + MSize n, nent = snap->nent; + for (n = 0; n < nent; n++) { + IRRef ref = snap_ref(map[n]); if (!irref_isk(ref)) { + int32_t ofs = 8*(int32_t)(snap_slot(map[n])-1); IRIns *ir = IR(ref); if (ir->o == IR_FRAME) { /* NYI: sync the frame, bump base, set topslot, clear new slots. */ lj_trace_err(as->J, LJ_TRERR_NYIGCF); - } else if (irt_isgcv(ir->t) && - !(ir->o == IR_SLOAD && ir->op1 < nslots && map[ir->op1] == 0)) { + } else if (irt_isgcv(ir->t)) { Reg src = ra_alloc1(as, ref, allow); - int32_t ofs = 8*(int32_t)(s-1); emit_movtomro(as, src, base, ofs); emit_movmroi(as, base, ofs+4, irt_toitype(ir->t)); checkmclim(as); @@ -2504,7 +2503,7 @@ static void asm_gc_check(ASMState *as, SnapShot *snap) emit_loadi(as, tmp, (int32_t)as->gcsteps); /* We don't know spadj yet, so get the C frame from L->cframe. */ emit_movmroi(as, tmp, CFRAME_OFS_PC, - (int32_t)as->T->snapmap[snap->mapofs+snap->nslots]); + (int32_t)as->T->snapmap[snap->mapofs+snap->nent]); emit_gri(as, XG_ARITHi(XOg_AND), tmp, CFRAME_RAWMASK); lstate = IR(ASMREF_L)->r; emit_movrmro(as, tmp, lstate, offsetof(lua_State, cframe)); @@ -2965,19 +2964,19 @@ static void asm_head_side(ASMState *as) static void asm_tail_sync(ASMState *as) { SnapShot *snap = &as->T->snap[as->T->nsnap-1]; /* Last snapshot. */ - BCReg s, nslots = snap->nslots; + MSize n, nent = snap->nent; SnapEntry *map = &as->T->snapmap[snap->mapofs]; - SnapEntry *flinks = map + nslots + snap->nframelinks; + SnapEntry *flinks = map + nent + snap->nframelinks; BCReg newbase = 0; - BCReg secondbase = ~(BCReg)0; - BCReg topslot = 0; + BCReg nslots, topslot = 0; checkmclim(as); ra_allocref(as, REF_BASE, RID2RSET(RID_BASE)); /* Must check all frames to find topslot (outer can be larger than inner). */ - for (s = 0; s < nslots; s++) { - IRRef ref = snap_ref(map[s]); + for (n = 0; n < nent; n++) { + IRRef ref = snap_ref(map[n]); + BCReg s = snap_slot(map[n]); if (!irref_isk(ref)) { IRIns *ir = IR(ref); if (ir->o == IR_FRAME && irt_isfunc(ir->t)) { @@ -2985,10 +2984,7 @@ static void asm_tail_sync(ASMState *as) if (isluafunc(fn)) { BCReg fs = s + funcproto(fn)->framesize; if (fs > topslot) topslot = fs; - if (s != 0) { - newbase = s; - if (secondbase == ~(BCReg)0) secondbase = s; - } + newbase = s; } } } @@ -2998,7 +2994,7 @@ static void asm_tail_sync(ASMState *as) if (as->T->link == TRACE_INTERP) { /* Setup fixed registers for exit to interpreter. */ emit_loada(as, RID_DISPATCH, J2GG(as->J)->dispatch); - emit_loadi(as, RID_PC, (int32_t)map[nslots]); + emit_loadi(as, RID_PC, (int32_t)map[nent]); } else if (newbase) { /* Save modified BASE for linking to trace with higher start frame. */ emit_setgl(as, RID_BASE, jit_base); @@ -3007,51 +3003,50 @@ static void asm_tail_sync(ASMState *as) emit_addptr(as, RID_BASE, 8*(int32_t)newbase); /* Clear stack slots of newly added frames. */ + nslots = snap->nslots; if (nslots <= topslot) { if (nslots < topslot) { + BCReg s; for (s = nslots; s <= topslot; s++) { - emit_movtomro(as, RID_EAX, RID_BASE, 8*(int32_t)s-4); + emit_movtomro(as, RID_EAX, RID_BASE, 8*((int32_t)s-1)+4); checkmclim(as); } emit_loadi(as, RID_EAX, LJ_TNIL); } else { - emit_movmroi(as, RID_BASE, 8*(int32_t)nslots-4, LJ_TNIL); + emit_movmroi(as, RID_BASE, 8*((int32_t)nslots-1)+4, LJ_TNIL); } } /* Store the value of all modified slots to the Lua stack. */ - for (s = 0; s < nslots; s++) { + for (n = 0; n < nent; n++) { + BCReg s = snap_slot(map[n]); int32_t ofs = 8*((int32_t)s-1); - IRRef ref = snap_ref(map[s]); - if (ref) { - IRIns *ir = IR(ref); - /* No need to restore readonly slots and unmodified non-parent slots. */ - if (ir->o == IR_SLOAD && ir->op1 == s && - (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) - continue; - if (irt_isnum(ir->t)) { - Reg src = ra_alloc1(as, ref, RSET_FPR); - emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs); - } else if (ir->o == IR_FRAME) { - emit_movmroi(as, RID_BASE, ofs, ptr2addr(ir_kgc(IR(ir->op2)))); - if (s != 0) /* Do not overwrite link to previous frame. */ - emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*--flinks)); - } else { - lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t)); - if (!irref_isk(ref)) { - Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); - emit_movtomro(as, src, RID_BASE, ofs); - } else if (!irt_ispri(ir->t)) { - emit_movmroi(as, RID_BASE, ofs, ir->i); - } - emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); - } + IRRef ref = snap_ref(map[n]); + IRIns *ir = IR(ref); + /* No need to restore readonly slots and unmodified non-parent slots. */ + if (ir->o == IR_SLOAD && ir->op1 == s && + (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) + continue; + if (irt_isnum(ir->t)) { + Reg src = ra_alloc1(as, ref, RSET_FPR); + emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs); + } else if (ir->o == IR_FRAME) { + emit_movmroi(as, RID_BASE, ofs, ptr2addr(ir_kgc(IR(ir->op2)))); + if (s != 0) /* Do not overwrite link to previous frame. */ + emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*--flinks)); } else { - lua_assert(!(s > secondbase)); + lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t)); + if (!irref_isk(ref)) { + Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); + emit_movtomro(as, src, RID_BASE, ofs); + } else if (!irt_ispri(ir->t)) { + emit_movmroi(as, RID_BASE, ofs, ir->i); + } + emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); } checkmclim(as); } - lua_assert(map + nslots == flinks-1); + lua_assert(map + nent == flinks-1); } /* Fixup the tail code. */ diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c index 4fce5eb9..345afb51 100644 --- a/src/lj_gdbjit.c +++ b/src/lj_gdbjit.c @@ -698,8 +698,8 @@ void lj_gdbjit_addtrace(jit_State *J, Trace *T, TraceNo traceno) lua_State *L = J->L; GCproto *pt = &gcref(T->startpt)->pt; TraceNo parent = T->ir[REF_BASE].op1; - uintptr_t pcofs = (uintptr_t)(T->snap[0].mapofs+T->snap[0].nslots); - const BCIns *startpc = (const BCIns *)(uintptr_t)T->snapmap[pcofs]; + uintptr_t pcofs = (uintptr_t)(T->snap[0].mapofs+T->snap[0].nent); + const BCIns *startpc = snap_pc(T->snapmap[pcofs]); ctx.T = T; ctx.mcaddr = (uintptr_t)T->mcode; ctx.szmcode = T->szmcode; diff --git a/src/lj_jit.h b/src/lj_jit.h index 36e60113..1a1e407a 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -112,17 +112,27 @@ typedef uint8_t MCode; typedef struct SnapShot { uint16_t mapofs; /* Offset into snapshot map. */ IRRef1 ref; /* First IR ref for this snapshot. */ - uint8_t nslots; /* Number of stack slots. */ + uint8_t nslots; /* Number of valid slots. */ + uint8_t nent; /* Number of compressed entries. */ uint8_t nframelinks; /* Number of frame links. */ uint8_t count; /* Count of taken exits for this snapshot. */ - uint8_t unused1; } SnapShot; #define SNAPCOUNT_DONE 255 /* Already compiled and linked a side trace. */ -/* Snapshot entry. */ +/* Compressed snapshot entry. */ typedef uint32_t SnapEntry; + +#define SNAP_FRAME 0x010000 /* Slot has frame link. */ + +#define SNAP(slot, flags, ref) ((SnapEntry)((slot) << 24) + (flags) + (ref)) +#define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc)) +#define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz)) #define snap_ref(sn) ((sn) & 0xffff) +#define snap_slot(sn) ((BCReg)((sn) >> 24)) +#define snap_isframe(sn) ((sn) & SNAP_FRAME) +#define snap_pc(sn) ((const BCIns *)(uintptr_t)(sn)) +#define snap_setref(sn, ref) (((sn) & 0xffff0000) | (ref)) /* Snapshot and exit numbers. */ typedef uint32_t SnapNo; diff --git a/src/lj_opt_dce.c b/src/lj_opt_dce.c index 636d5183..90e81526 100644 --- a/src/lj_opt_dce.c +++ b/src/lj_opt_dce.c @@ -24,9 +24,9 @@ static void dce_marksnap(jit_State *J) for (i = 0; i < nsnap; i++) { SnapShot *snap = &J->cur.snap[i]; SnapEntry *map = &J->cur.snapmap[snap->mapofs]; - BCReg s, nslots = snap->nslots; - for (s = 0; s < nslots; s++) { - IRRef ref = snap_ref(map[s]); + MSize n, nent = snap->nent; + for (n = 0; n < nent; n++) { + IRRef ref = snap_ref(map[n]); if (!irref_isk(ref)) irt_setmark(IR(ref)->t); } diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c index f2950fe9..e5ad5b43 100644 --- a/src/lj_opt_loop.c +++ b/src/lj_opt_loop.c @@ -10,7 +10,6 @@ #if LJ_HASJIT -#include "lj_gc.h" #include "lj_err.h" #include "lj_str.h" #include "lj_ir.h" @@ -163,21 +162,69 @@ static void loop_emit_phi(jit_State *J, IRRef1 *subst, IRRef1 *phi, IRRef nphi) /* -- Loop unrolling using copy-substitution ------------------------------ */ +/* Copy-substitute snapshot. */ +static void loop_subst_snap(jit_State *J, SnapShot *osnap, + SnapEntry *loopmap, IRRef1 *subst) +{ + SnapEntry *nmap, *omap = &J->cur.snapmap[osnap->mapofs]; + MSize nmapofs, nframelinks; + MSize on, ln, nn, onent = osnap->nent; + BCReg nslots = osnap->nslots; + SnapShot *snap = &J->cur.snap[J->cur.nsnap]; + if (irt_isguard(J->guardemit)) { /* Guard inbetween? */ + nmapofs = J->cur.nsnapmap; + J->cur.nsnap++; /* Add new snapshot. */ + } else { /* Otherwise overwrite previous snapshot. */ + snap--; + nmapofs = snap->mapofs; + } + J->guardemit.irt = 0; + nframelinks = osnap->nframelinks; + /* Setup new snapshot. */ + snap->mapofs = (uint16_t)nmapofs; + snap->ref = (IRRef1)J->cur.nins; + snap->nframelinks = (uint8_t)nframelinks; + snap->nslots = nslots; + snap->count = 0; + nmap = &J->cur.snapmap[nmapofs]; + /* Substitute snapshot slots. */ + on = ln = nn = 0; + while (on < onent) { + SnapEntry osn = omap[on], lsn = loopmap[ln]; + if (snap_slot(lsn) < snap_slot(osn)) { /* Copy slot from loop map. */ + nmap[nn++] = lsn; + ln++; + } else { /* Copy substituted slot from snapshot map. */ + if (snap_slot(lsn) == snap_slot(osn)) ln++; /* Shadowed loop slot. */ + if (!irref_isk(snap_ref(osn))) + osn = snap_setref(osn, subst[snap_ref(osn)]); + nmap[nn++] = osn; + on++; + } + } + while (snap_slot(loopmap[ln]) < nslots) /* Copy remaining loop slots. */ + nmap[nn++] = loopmap[ln++]; + snap->nent = (uint8_t)nn; + J->cur.nsnapmap = (uint16_t)(nmapofs + nn + nframelinks); + omap += onent; + nmap += nn; + for (nn = 0; nn < nframelinks; nn++) /* Copy frame links. */ + nmap[nn] = omap[nn]; +} + /* Unroll loop. */ static void loop_unroll(jit_State *J) { IRRef1 phi[LJ_MAX_PHI]; uint32_t nphi = 0; IRRef1 *subst; - SnapShot *osnap, *snap; - SnapEntry *loopmap; - BCReg loopslots; - MSize nsnap, nsnapmap; - IRRef ins, invar, osnapref; + SnapShot *osnap; + SnapEntry *loopmap, *psentinel; + IRRef ins, invar; /* Use temp buffer for substitution table. ** Only non-constant refs in [REF_BIAS,invar) are valid indexes. - ** Note: don't call into the VM or run the GC or the buffer may be gone. + ** Caveat: don't call into the VM or run the GC or the buffer may be gone. */ invar = J->cur.nins; subst = (IRRef1 *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, @@ -187,80 +234,37 @@ static void loop_unroll(jit_State *J) /* LOOP separates the pre-roll from the loop body. */ emitir_raw(IRTG(IR_LOOP, IRT_NIL), 0, 0); - /* Ensure size for copy-substituted snapshots (minus #0 and loop snapshot). */ - nsnap = J->cur.nsnap; - if (LJ_UNLIKELY(2*nsnap-2 > J->sizesnap)) { - MSize maxsnap = (MSize)J->param[JIT_P_maxsnap]; - if (2*nsnap-2 > maxsnap) - lj_trace_err(J, LJ_TRERR_SNAPOV); - lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot); - J->cur.snap = J->snapbuf; - } - nsnapmap = J->cur.nsnapmap; /* Use temp. copy to avoid undo. */ - if (LJ_UNLIKELY(nsnapmap*2 > J->sizesnapmap)) { - J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf, - J->sizesnapmap*sizeof(SnapEntry), - 2*J->sizesnapmap*sizeof(SnapEntry)); - J->cur.snapmap = J->snapmapbuf; - J->sizesnapmap *= 2; - } + /* Grow snapshot buffer and map for copy-substituted snapshots. + ** Need up to twice the number of snapshots minus #0 and loop snapshot. + ** Need up to twice the number of entries plus fallback substitutions + ** from the loop snapshot entries for each new snapshot. + ** Caveat: both calls may reallocate J->cur.snap and J->cur.snapmap! + */ + { + MSize nsnap = J->cur.nsnap; + SnapShot *loopsnap; + lj_snap_grow_buf(J, 2*nsnap-2); + lj_snap_grow_map(J, J->cur.nsnapmap*2+(nsnap-2)*J->cur.snap[nsnap-1].nent); - /* The loop snapshot is used for fallback substitutions. */ - snap = &J->cur.snap[nsnap-1]; - loopmap = &J->cur.snapmap[snap->mapofs]; - loopslots = snap->nslots; - /* The PC of snapshot #0 and the loop snapshot must match. */ - lua_assert(loopmap[loopslots] == J->cur.snapmap[J->cur.snap[0].nslots]); + /* The loop snapshot is used for fallback substitutions. */ + loopsnap = &J->cur.snap[nsnap-1]; + loopmap = &J->cur.snapmap[loopsnap->mapofs]; + /* The PC of snapshot #0 and the loop snapshot must match. */ + psentinel = &loopmap[loopsnap->nent]; + lua_assert(*psentinel == J->cur.snapmap[J->cur.snap[0].nent]); + *psentinel = SNAP(255, 0, 0); /* Replace PC with temporary sentinel. */ + } /* Start substitution with snapshot #1 (#0 is empty for root traces). */ osnap = &J->cur.snap[1]; - osnapref = osnap->ref; /* Copy and substitute all recorded instructions and snapshots. */ for (ins = REF_FIRST; ins < invar; ins++) { IRIns *ir; IRRef op1, op2; - /* Copy-substitute snapshot. */ - if (ins >= osnapref) { - SnapEntry *nmap, *omap = &J->cur.snapmap[osnap->mapofs]; - BCReg s, nslots; - uint32_t nmapofs, nframelinks; - if (irt_isguard(J->guardemit)) { /* Guard inbetween? */ - nmapofs = nsnapmap; - snap++; /* Add new snapshot. */ - } else { - nmapofs = snap->mapofs; /* Overwrite previous snapshot. */ - } - J->guardemit.irt = 0; - nslots = osnap->nslots; - nframelinks = osnap->nframelinks; - snap->mapofs = (uint16_t)nmapofs; - snap->ref = (IRRef1)J->cur.nins; - snap->nslots = (uint8_t)nslots; - snap->nframelinks = (uint8_t)nframelinks; - snap->count = 0; - osnap++; - osnapref = osnap->ref; - nsnapmap = nmapofs + nslots + nframelinks; - nmap = &J->cur.snapmap[nmapofs]; - /* Substitute snapshot slots. */ - for (s = 0; s < nslots; s++) { - IRRef ref = snap_ref(omap[s]); - if (ref) { - if (!irref_isk(ref)) - ref = subst[ref]; - } else if (s < loopslots) { - ref = loopmap[s]; - } - nmap[s] = ref; - } - /* Copy frame links. */ - nmap += nslots; - omap += nslots; - for (s = 0; s < nframelinks; s++) - nmap[s] = omap[s]; - } + if (ins >= osnap->ref) /* Instruction belongs to next snapshot? */ + loop_subst_snap(J, osnap++, loopmap, subst); /* Copy-substitute it. */ /* Substitute instruction operands. */ ir = IR(ins); @@ -295,22 +299,24 @@ static void loop_unroll(jit_State *J) } } } - if (irt_isguard(J->guardemit)) { /* Guard inbetween? */ - J->cur.nsnapmap = (uint16_t)nsnapmap; - snap++; - } else { - J->cur.nsnapmap = (uint16_t)snap->mapofs; /* Last snapshot is redundant. */ - } - J->cur.nsnap = (uint16_t)(snap - J->cur.snap); + if (!irt_isguard(J->guardemit)) /* Drop redundant snapshot. */ + J->cur.nsnapmap = (uint16_t)J->cur.snap[--J->cur.nsnap].mapofs; lua_assert(J->cur.nsnapmap <= J->sizesnapmap); + *psentinel = J->cur.snapmap[J->cur.snap[0].nent]; /* Restore PC. */ loop_emit_phi(J, subst, phi, nphi); } /* Undo any partial changes made by the loop optimization. */ -static void loop_undo(jit_State *J, IRRef ins) +static void loop_undo(jit_State *J, IRRef ins, MSize nsnap) { ptrdiff_t i; + SnapShot *snap = &J->cur.snap[nsnap-1]; + SnapEntry *map = J->cur.snapmap; + map[snap->mapofs + snap->nent] = map[J->cur.snap[0].nent]; /* Restore PC. */ + J->cur.nsnapmap = (uint16_t)(snap->mapofs + snap->nent + snap->nframelinks); + J->cur.nsnap = nsnap; + J->guardemit.irt = 0; lj_ir_rollback(J, ins); for (i = 0; i < BPROP_SLOTS; i++) { /* Remove backprop. cache entries. */ BPropEntry *bp = &J->bpropcache[i]; @@ -336,6 +342,7 @@ static TValue *cploop_opt(lua_State *L, lua_CFunction dummy, void *ud) int lj_opt_loop(jit_State *J) { IRRef nins = J->cur.nins; + MSize nsnap = J->cur.nsnap; int errcode = lj_vm_cpcall(J->L, NULL, J, cploop_opt); if (LJ_UNLIKELY(errcode)) { lua_State *L = J->L; @@ -348,8 +355,7 @@ int lj_opt_loop(jit_State *J) if (--J->instunroll < 0) /* But do not unroll forever. */ break; L->top--; /* Remove error object. */ - J->guardemit.irt = 0; - loop_undo(J, nins); + loop_undo(J, nins, nsnap); return 1; /* Loop optimization failed, continue recording. */ default: break; diff --git a/src/lj_record.c b/src/lj_record.c index 6af25ccb..3f442088 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -1696,7 +1696,7 @@ static void optstate_comp(jit_State *J, int cond) const BCIns *npc = J->pc + 2 + (cond ? bc_j(jmpins) : 0); SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; /* Avoid re-recording the comparison in side traces. */ - J->cur.snapmap[snap->mapofs + snap->nslots] = u32ptr(npc); + J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc); J->needsnap = 1; /* Shrink last snapshot if possible. */ if (bc_a(jmpins) < J->maxslot) { @@ -2159,61 +2159,62 @@ static void rec_setup_side(jit_State *J, Trace *T) { SnapShot *snap = &T->snap[J->exitno]; SnapEntry *map = &T->snapmap[snap->mapofs]; - BCReg s, nslots = snap->nslots; + MSize n, nent = snap->nent; BloomFilter seen = 0; - for (s = 0; s < nslots; s++) { - IRRef ref = snap_ref(map[s]); - if (ref) { - IRIns *ir = &T->ir[ref]; - TRef tr = 0; - /* The bloom filter avoids O(nslots^2) overhead for de-duping slots. */ - if (bloomtest(seen, ref)) { - BCReg j; - for (j = 0; j < s; j++) - if (snap_ref(map[j]) == ref) { - if (ir->o == IR_FRAME && irt_isfunc(ir->t)) { - lua_assert(s != 0); - J->baseslot = s+1; - J->framedepth++; - } - tr = J->slot[j]; - goto dupslot; - } - } - bloomset(seen, ref); - switch ((IROp)ir->o) { - case IR_KPRI: tr = TREF_PRI(irt_type(ir->t)); break; - case IR_KINT: tr = lj_ir_kint(J, ir->i); break; - case IR_KGC: tr = lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); break; - case IR_KNUM: tr = lj_ir_knum_addr(J, ir_knum(ir)); break; - case IR_FRAME: /* Placeholder FRAMEs don't need a guard. */ - if (irt_isfunc(ir->t)) { - if (s != 0) { + /* Emit IR for slots inherited from parent snapshot. */ + for (n = 0; n < nent; n++) { + IRRef ref = snap_ref(map[n]); + BCReg s = snap_slot(map[n]); + IRIns *ir = &T->ir[ref]; + TRef tr; + /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */ + if (bloomtest(seen, ref)) { + MSize j; + for (j = 0; j < n; j++) + if (snap_ref(map[j]) == ref) { + tr = J->slot[snap_slot(map[j])]; + if (ir->o == IR_FRAME && irt_isfunc(ir->t)) { + lua_assert(s != 0); J->baseslot = s+1; J->framedepth++; } - tr = lj_ir_kfunc(J, ir_kfunc(&T->ir[ir->op2])); - tr = emitir_raw(IRT(IR_FRAME, IRT_FUNC), tr, tr); - } else { - tr = lj_ir_kptr(J, mref(T->ir[ir->op2].ptr, void)); - tr = emitir_raw(IRT(IR_FRAME, IRT_PTR), tr, tr); + goto dupslot; } - break; - case IR_SLOAD: /* Inherited SLOADs don't need a guard or type check. */ - tr = emitir_raw(ir->ot & ~IRT_GUARD, s, - (ir->op2&IRSLOAD_READONLY) | IRSLOAD_INHERIT|IRSLOAD_PARENT); - break; - default: /* Parent refs are already typed and don't need a guard. */ - tr = emitir_raw(IRT(IR_SLOAD, irt_type(ir->t)), s, - IRSLOAD_INHERIT|IRSLOAD_PARENT); - break; - } - dupslot: - J->slot[s] = tr; } + bloomset(seen, ref); + switch ((IROp)ir->o) { + /* Only have to deal with constants that can occur in stack slots. */ + case IR_KPRI: tr = TREF_PRI(irt_type(ir->t)); break; + case IR_KINT: tr = lj_ir_kint(J, ir->i); break; + case IR_KGC: tr = lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); break; + case IR_KNUM: tr = lj_ir_knum_addr(J, ir_knum(ir)); break; + case IR_FRAME: /* Placeholder FRAMEs don't need a guard. */ + if (irt_isfunc(ir->t)) { + if (s != 0) { + J->baseslot = s+1; + J->framedepth++; + } + tr = lj_ir_kfunc(J, ir_kfunc(&T->ir[ir->op2])); + tr = emitir_raw(IRT(IR_FRAME, IRT_FUNC), tr, tr); + } else { + tr = lj_ir_kptr(J, mref(T->ir[ir->op2].ptr, void)); + tr = emitir_raw(IRT(IR_FRAME, IRT_PTR), tr, tr); + } + break; + case IR_SLOAD: /* Inherited SLOADs don't need a guard or type check. */ + tr = emitir_raw(ir->ot & ~IRT_GUARD, s, + (ir->op2&IRSLOAD_READONLY) | IRSLOAD_INHERIT|IRSLOAD_PARENT); + break; + default: /* Parent refs are already typed and don't need a guard. */ + tr = emitir_raw(IRT(IR_SLOAD, irt_type(ir->t)), s, + IRSLOAD_INHERIT|IRSLOAD_PARENT); + break; + } + dupslot: + J->slot[s] = tr; } J->base = J->slot + J->baseslot; - J->maxslot = nslots - J->baseslot; + J->maxslot = snap->nslots - J->baseslot; lj_snap_add(J); } @@ -2259,7 +2260,7 @@ void lj_record_setup(jit_State *J) J->cur.root = (uint16_t)root; J->cur.startins = BCINS_AD(BC_JMP, 0, 0); /* Check whether we could at least potentially form an extra loop. */ - if (J->exitno == 0 && T->snap[0].nslots == 1 && T->snapmap[0] == 0) { + if (J->exitno == 0 && T->snap[0].nent == 0) { /* We can narrow a FORL for some side traces, too. */ if (J->pc > J->pt->bc && bc_op(J->pc[-1]) == BC_JFORI && bc_d(J->pc[bc_j(J->pc[-1])-1]) == root) { diff --git a/src/lj_snap.c b/src/lj_snap.c index f262e1c9..d22c90a4 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c @@ -23,28 +23,50 @@ /* Some local macros to save typing. Undef'd at the end. */ #define IR(ref) (&J->cur.ir[(ref)]) +/* -- Snapshot buffer allocation ------------------------------------------ */ + +/* Grow snapshot buffer. */ +void lj_snap_grow_buf_(jit_State *J, MSize need) +{ + MSize maxsnap = (MSize)J->param[JIT_P_maxsnap]; + if (need > maxsnap) + lj_trace_err(J, LJ_TRERR_SNAPOV); + lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot); + J->cur.snap = J->snapbuf; +} + +/* Grow snapshot map buffer. */ +void lj_snap_grow_map_(jit_State *J, MSize need) +{ + if (need < 2*J->sizesnapmap) + need = 2*J->sizesnapmap; + else if (need < 64) + need = 64; + J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf, + J->sizesnapmap*sizeof(SnapEntry), need*sizeof(SnapEntry)); + J->cur.snapmap = J->snapmapbuf; + J->sizesnapmap = need; +} + /* -- Snapshot generation ------------------------------------------------- */ -/* NYI: Snapshots are in need of a redesign. The current storage model for -** snapshot maps is too wasteful. They could be compressed (1D or 2D) and -** made more flexible at the same time. Iterators should no longer need to -** skip unmodified slots. IR_FRAME should be eliminated, too. -*/ +/* NYI: IR_FRAME should be eliminated, too. */ /* Add all modified slots to the snapshot. */ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) { BCReg s; + MSize n = 0; for (s = 0; s < nslots; s++) { IRRef ref = tref_ref(J->slot[s]); if (ref) { IRIns *ir = IR(ref); - if (ir->o == IR_SLOAD && ir->op1 == s && !(ir->op2 & IRSLOAD_INHERIT)) - ref = 0; + if (!(ir->o == IR_SLOAD && ir->op1 == s && + !(ir->op2 & IRSLOAD_INHERIT))) + map[n++] = SNAP(s, ir->o == IR_FRAME ? SNAP_FRAME : 0, ref); } - map[s] = (SnapEntry)ref; } - return nslots; + return n; } /* Add frame links at the end of the snapshot. */ @@ -53,17 +75,17 @@ static MSize snapshot_framelinks(jit_State *J, SnapEntry *map) cTValue *frame = J->L->base - 1; cTValue *lim = J->L->base - J->baseslot; MSize f = 0; - map[f++] = u32ptr(J->pc); - while (frame > lim) { + map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ + while (frame > lim) { /* Backwards traversal of all frames above base. */ if (frame_islua(frame)) { - map[f++] = u32ptr(frame_pc(frame)); + map[f++] = SNAP_MKPC(frame_pc(frame)); frame = frame_prevl(frame); } else if (frame_ispcall(frame)) { - map[f++] = (uint32_t)frame_ftsz(frame); + map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); frame = frame_prevd(frame); } else if (frame_iscont(frame)) { - map[f++] = (uint32_t)frame_ftsz(frame); - map[f++] = u32ptr(frame_contpc(frame)); + map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); + map[f++] = SNAP_MKPC(frame_contpc(frame)); frame = frame_prevd(frame); } else { lua_assert(0); @@ -76,28 +98,19 @@ static MSize snapshot_framelinks(jit_State *J, SnapEntry *map) static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap) { BCReg nslots = J->baseslot + J->maxslot; - MSize nsm, nframelinks; + MSize nent, nframelinks; SnapEntry *p; /* Conservative estimate. Continuation frames need 2 slots. */ - nsm = nsnapmap + nslots + (uint32_t)J->framedepth*2+1; - if (LJ_UNLIKELY(nsm > J->sizesnapmap)) { /* Need to grow snapshot map? */ - if (nsm < 2*J->sizesnapmap) - nsm = 2*J->sizesnapmap; - else if (nsm < 64) - nsm = 64; - J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf, - J->sizesnapmap*sizeof(SnapEntry), nsm*sizeof(SnapEntry)); - J->cur.snapmap = J->snapmapbuf; - J->sizesnapmap = nsm; - } + lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth*2+1); p = &J->cur.snapmap[nsnapmap]; - nslots = snapshot_slots(J, p, nslots); - nframelinks = snapshot_framelinks(J, p + nslots); - J->cur.nsnapmap = (uint16_t)(nsnapmap + nslots + nframelinks); + nent = snapshot_slots(J, p, nslots); + nframelinks = snapshot_framelinks(J, p + nent); + J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + nframelinks); snap->mapofs = (uint16_t)nsnapmap; snap->ref = (IRRef1)J->cur.nins; - snap->nslots = (uint8_t)nslots; + snap->nent = (uint8_t)nent; snap->nframelinks = (uint8_t)nframelinks; + snap->nslots = (uint8_t)nslots; snap->count = 0; } @@ -111,14 +124,7 @@ void lj_snap_add(jit_State *J) (nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) { nsnapmap = J->cur.snap[--nsnap].mapofs; } else { - /* Need to grow snapshot buffer? */ - if (LJ_UNLIKELY(nsnap >= J->sizesnap)) { - MSize maxsnap = (MSize)J->param[JIT_P_maxsnap]; - if (nsnap >= maxsnap) - lj_trace_err(J, LJ_TRERR_SNAPOV); - lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot); - J->cur.snap = J->snapbuf; - } + lj_snap_grow_buf(J, nsnap+1); J->cur.nsnap = (uint16_t)(nsnap+1); } J->mergesnap = 0; @@ -131,14 +137,21 @@ void lj_snap_shrink(jit_State *J) { BCReg nslots = J->baseslot + J->maxslot; SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; - SnapEntry *oflinks = &J->cur.snapmap[snap->mapofs + snap->nslots]; - SnapEntry *nflinks = &J->cur.snapmap[snap->mapofs + nslots]; - uint32_t s, nframelinks = snap->nframelinks; + SnapEntry *map = &J->cur.snapmap[snap->mapofs]; + MSize nent = snap->nent; lua_assert(nslots < snap->nslots); snap->nslots = (uint8_t)nslots; - J->cur.nsnapmap = (uint16_t)(snap->mapofs + nslots + nframelinks); - for (s = 0; s < nframelinks; s++) /* Move frame links down. */ - nflinks[s] = oflinks[s]; + if (nent > 0 && snap_slot(map[nent-1]) >= nslots) { + MSize s, delta, nframelinks = snap->nframelinks; + for (nent--; nent > 0 && snap_slot(map[nent-1]) >= nslots; nent--) + ; + delta = snap->nent - nent; + snap->nent = (uint8_t)nent; + J->cur.nsnapmap = (uint16_t)(snap->mapofs + nent + nframelinks); + map += nent; + for (s = 0; s < nframelinks; s++) /* Move frame links down. */ + map[s] = map[s+delta]; + } } /* -- Snapshot access ----------------------------------------------------- */ @@ -167,21 +180,24 @@ static RegSP snap_renameref(Trace *T, SnapNo lim, IRRef ref, RegSP rs) return rs; } -/* Convert a snapshot into a linear slot -> RegSP map. */ +/* Convert a snapshot into a linear slot -> RegSP map. +** Note: unused slots are not initialized! +*/ void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno) { SnapShot *snap = &T->snap[snapno]; - BCReg s, nslots = snap->nslots; + MSize n, nent = snap->nent; SnapEntry *map = &T->snapmap[snap->mapofs]; BloomFilter rfilt = snap_renamefilter(T, snapno); - for (s = 0; s < nslots; s++) { - IRRef ref = snap_ref(map[s]); + for (n = 0; n < nent; n++) { + SnapEntry sn = map[n]; + IRRef ref = snap_ref(sn); if (!irref_isk(ref)) { IRIns *ir = &T->ir[ref]; uint32_t rs = ir->prev; if (bloomtest(rfilt, ref)) rs = snap_renameref(T, snapno, ref, rs); - rsmap[s] = (uint16_t)rs; + rsmap[snap_slot(sn)] = (uint16_t)rs; } } } @@ -193,89 +209,88 @@ void lj_snap_restore(jit_State *J, void *exptr) SnapNo snapno = J->exitno; /* For now, snapno == exitno. */ Trace *T = J->trace[J->parent]; SnapShot *snap = &T->snap[snapno]; - BCReg s, nslots = snap->nslots; + MSize n, nent = snap->nent; SnapEntry *map = &T->snapmap[snap->mapofs]; - SnapEntry *flinks = map + nslots + snap->nframelinks; - TValue *o, *newbase, *ntop; + SnapEntry *flinks = map + nent + snap->nframelinks; + BCReg nslots = snap->nslots; + TValue *frame; BloomFilter rfilt = snap_renamefilter(T, snapno); lua_State *L = J->L; /* Make sure the stack is big enough for the slots from the snapshot. */ - if (L->base + nslots >= L->maxstack) { + if (LJ_UNLIKELY(L->base + nslots > L->maxstack)) { L->top = curr_topL(L); lj_state_growstack(L, nslots - curr_proto(L)->framesize); } /* Fill stack slots with data from the registers and spill slots. */ - newbase = NULL; - ntop = L->base; - for (s = 0, o = L->base-1; s < nslots; s++, o++) { - IRRef ref = snap_ref(map[s]); - if (ref) { - IRIns *ir = &T->ir[ref]; - if (irref_isk(ref)) { /* Restore constant slot. */ - lj_ir_kvalue(L, o, ir); - } else { - IRType1 t = ir->t; - RegSP rs = ir->prev; - if (LJ_UNLIKELY(bloomtest(rfilt, ref))) - rs = snap_renameref(T, snapno, ref, rs); - if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */ - int32_t *sps = &ex->spill[regsp_spill(rs)]; - if (irt_isinteger(t)) { - setintV(o, *sps); - } else if (irt_isnum(t)) { - o->u64 = *(uint64_t *)sps; - } else { - lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ - setgcrefi(o->gcr, *sps); - setitype(o, irt_toitype(t)); - } - } else if (ra_hasreg(regsp_reg(rs))) { /* Restore from register. */ - Reg r = regsp_reg(rs); - if (irt_isinteger(t)) { - setintV(o, ex->gpr[r-RID_MIN_GPR]); - } else if (irt_isnum(t)) { - setnumV(o, ex->fpr[r-RID_MIN_FPR]); - } else { - if (!irt_ispri(t)) - setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]); - setitype(o, irt_toitype(t)); - } - } else { /* Restore frame slot. */ - lua_assert(ir->o == IR_FRAME); - /* This works for both PTR and FUNC IR_FRAME. */ - setgcrefp(o->fr.func, mref(T->ir[ir->op2].ptr, void)); - if (s != 0) /* Do not overwrite link to previous frame. */ - o->fr.tp.ftsz = (int32_t)*--flinks; - if (irt_isfunc(ir->t)) { - GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr)); - if (isluafunc(fn)) { - TValue *fs; - fs = o+1 + funcproto(fn)->framesize; - if (fs > ntop) ntop = fs; /* Update top for newly added frames. */ - if (s != 0) newbase = o+1; + frame = L->base-1; + for (n = 0; n < nent; n++) { + IRRef ref = snap_ref(map[n]); + BCReg s = snap_slot(map[n]); + TValue *o = &frame[s]; /* Stack slots are relative to start frame. */ + IRIns *ir = &T->ir[ref]; + if (irref_isk(ref)) { /* Restore constant slot. */ + lj_ir_kvalue(L, o, ir); + } else { + IRType1 t = ir->t; + RegSP rs = ir->prev; + if (LJ_UNLIKELY(bloomtest(rfilt, ref))) + rs = snap_renameref(T, snapno, ref, rs); + if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */ + int32_t *sps = &ex->spill[regsp_spill(rs)]; + if (irt_isinteger(t)) { + setintV(o, *sps); + } else if (irt_isnum(t)) { + o->u64 = *(uint64_t *)sps; + } else { + lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ + setgcrefi(o->gcr, *sps); + setitype(o, irt_toitype(t)); + } + } else if (ra_hasreg(regsp_reg(rs))) { /* Restore from register. */ + Reg r = regsp_reg(rs); + if (irt_isinteger(t)) { + setintV(o, ex->gpr[r-RID_MIN_GPR]); + } else if (irt_isnum(t)) { + setnumV(o, ex->fpr[r-RID_MIN_FPR]); + } else { + if (!irt_ispri(t)) + setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]); + setitype(o, irt_toitype(t)); + } + } else { /* Restore frame slot. */ + lua_assert(ir->o == IR_FRAME); + /* This works for both PTR and FUNC IR_FRAME. */ + setgcrefp(o->fr.func, mref(T->ir[ir->op2].ptr, void)); + if (s != 0) /* Do not overwrite link to previous frame. */ + o->fr.tp.ftsz = (int32_t)*--flinks; + if (irt_isfunc(ir->t)) { + GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr)); + if (isluafunc(fn)) { + MSize framesize = funcproto(fn)->framesize; + TValue *fs; + L->base = ++o; + if (LJ_UNLIKELY(o + framesize > L->maxstack)) { /* Grow again? */ + ptrdiff_t fsave = savestack(L, frame); + L->top = o; + lj_state_growstack(L, framesize); + frame = restorestack(L, fsave); + o = L->top; } + fs = o + framesize; + if (s == 0) /* Only partially clear tail call frame at #0. */ + o = &frame[nslots]; + while (o < fs) /* Clear slots of newly added frames. */ + setnilV(o++); } } } - } else { - lua_assert(!newbase); } } - if (newbase) L->base = newbase; - if (ntop >= L->maxstack) { /* Need to grow the stack again. */ - MSize need = (MSize)(ntop - o); - L->top = o; - lj_state_growstack(L, need); - o = L->top; - ntop = o + need; - } L->top = curr_topL(L); - for (; o < ntop; o++) /* Clear remainder of newly added frames. */ - setnilV(o); - lua_assert(map + nslots == flinks-1); - J->pc = (const BCIns *)(uintptr_t)(*--flinks); + J->pc = snap_pc(*--flinks); + lua_assert(map + nent == flinks); } #undef IR diff --git a/src/lj_snap.h b/src/lj_snap.h index 776a0bcf..ed7d98a1 100644 --- a/src/lj_snap.h +++ b/src/lj_snap.h @@ -14,6 +14,19 @@ LJ_FUNC void lj_snap_add(jit_State *J); LJ_FUNC void lj_snap_shrink(jit_State *J); LJ_FUNC void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno); LJ_FUNC void lj_snap_restore(jit_State *J, void *exptr); +LJ_FUNC void lj_snap_grow_buf_(jit_State *J, MSize need); +LJ_FUNC void lj_snap_grow_map_(jit_State *J, MSize need); + +static LJ_AINLINE void lj_snap_grow_buf(jit_State *J, MSize need) +{ + if (LJ_UNLIKELY(need > J->sizesnap)) lj_snap_grow_buf_(J, need); +} + +static LJ_AINLINE void lj_snap_grow_map(jit_State *J, MSize need) +{ + if (LJ_UNLIKELY(need > J->sizesnapmap)) lj_snap_grow_map_(J, need); +} + #endif #endif diff --git a/src/lj_trace.c b/src/lj_trace.c index a5468655..fb36c7ee 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -161,8 +161,8 @@ void lj_trace_reenableproto(GCproto *pt) static void trace_unpatch(jit_State *J, Trace *T) { BCOp op = bc_op(T->startins); - uint32_t pcofs = T->snap[0].mapofs + T->snap[0].nslots; - BCIns *pc = ((BCIns *)(uintptr_t)T->snapmap[pcofs]) - 1; + MSize pcofs = T->snap[0].mapofs + T->snap[0].nent; + BCIns *pc = ((BCIns *)snap_pc(T->snapmap[pcofs])) - 1; switch (op) { case BC_FORL: lua_assert(bc_op(*pc) == BC_JFORI); @@ -352,7 +352,6 @@ static void trace_start(jit_State *J) J->cur.ir = J->irbuf; J->cur.snap = J->snapbuf; J->cur.snapmap = J->snapmapbuf; - /* J->cur.nsnapmap = 0; */ J->mergesnap = 0; J->needsnap = 0; J->guardemit.irt = 0;