mirror of
https://github.com/LuaJIT/LuaJIT.git
synced 2025-02-07 15:14:08 +00:00
Compress snapshots using a simple, extensible 1D-compression.
Typically reduces storage overhead for snapshot maps by 60%. The extensible format is a prerequisite for the next redesign steps: Eliminate IR_FRAME and implement return-to-lower-frame.
This commit is contained in:
parent
e058714a2e
commit
67ca399a30
@ -11,7 +11,7 @@ buildvm_lib.o: buildvm_lib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
|
||||
buildvm_peobj.o: buildvm_peobj.c buildvm.h lj_def.h lua.h luaconf.h \
|
||||
lj_arch.h lj_bc.h
|
||||
lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
|
||||
lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_alloc.h
|
||||
lj_arch.h lj_err.h lj_errmsg.h lj_state.h lj_lib.h lj_alloc.h
|
||||
lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
|
||||
lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h \
|
||||
lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_ctype.h lj_lib.h lj_libdef.h
|
||||
@ -87,8 +87,8 @@ lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
|
||||
lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h \
|
||||
lj_traceerr.h lj_vm.h lj_folddef.h
|
||||
lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
|
||||
lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h \
|
||||
lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_vm.h
|
||||
lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \
|
||||
lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_vm.h
|
||||
lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
|
||||
lj_tab.h lj_ir.h lj_jit.h lj_iropt.h
|
||||
lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \
|
||||
|
@ -332,18 +332,25 @@ LJLIB_CF(jit_util_tracesnap)
|
||||
if (T && sn < T->nsnap) {
|
||||
SnapShot *snap = &T->snap[sn];
|
||||
SnapEntry *map = &T->snapmap[snap->mapofs];
|
||||
BCReg s, nslots = snap->nslots;
|
||||
MSize n, nent = snap->nent;
|
||||
BCReg nslots = snap->nslots;
|
||||
GCtab *t;
|
||||
lua_createtable(L, nslots ? (int)nslots : 1, 0);
|
||||
t = tabV(L->top-1);
|
||||
setintV(lj_tab_setint(L, t, 0), (int32_t)snap->ref - REF_BIAS);
|
||||
for (s = 0; s < nslots; s++) {
|
||||
TValue *o = lj_tab_setint(L, t, (int32_t)(s+1));
|
||||
IRRef ref = snap_ref(map[s]);
|
||||
if (ref)
|
||||
setintV(o, (int32_t)ref - REF_BIAS);
|
||||
else
|
||||
/* NYI: get rid of this and expose the compressed slot map. */
|
||||
{
|
||||
BCReg s;
|
||||
for (s = 0; s < nslots; s++) {
|
||||
TValue *o = lj_tab_setint(L, t, (int32_t)(s+1));
|
||||
setboolV(o, 0);
|
||||
}
|
||||
}
|
||||
for (n = 0; n < nent; n++) {
|
||||
BCReg s = snap_slot(map[n]);
|
||||
IRRef ref = snap_ref(map[n]);
|
||||
TValue *o = lj_tab_setint(L, t, (int32_t)(s+1));
|
||||
setintV(o, (int32_t)ref - REF_BIAS);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
101
src/lj_asm.c
101
src/lj_asm.c
@ -926,9 +926,9 @@ static void asm_snap_alloc(ASMState *as)
|
||||
{
|
||||
SnapShot *snap = &as->T->snap[as->snapno];
|
||||
SnapEntry *map = &as->T->snapmap[snap->mapofs];
|
||||
BCReg s, nslots = snap->nslots;
|
||||
for (s = 0; s < nslots; s++) {
|
||||
IRRef ref = snap_ref(map[s]);
|
||||
MSize n, nent = snap->nent;
|
||||
for (n = 0; n < nent; n++) {
|
||||
IRRef ref = snap_ref(map[n]);
|
||||
if (!irref_isk(ref)) {
|
||||
IRIns *ir = IR(ref);
|
||||
if (!ra_used(ir) && ir->o != IR_FRAME) {
|
||||
@ -960,9 +960,9 @@ static int asm_snap_checkrename(ASMState *as, IRRef ren)
|
||||
{
|
||||
SnapShot *snap = &as->T->snap[as->snapno];
|
||||
SnapEntry *map = &as->T->snapmap[snap->mapofs];
|
||||
BCReg s, nslots = snap->nslots;
|
||||
for (s = 0; s < nslots; s++) {
|
||||
IRRef ref = snap_ref(map[s]);
|
||||
MSize n, nent = snap->nent;
|
||||
for (n = 0; n < nent; n++) {
|
||||
IRRef ref = snap_ref(map[n]);
|
||||
if (ref == ren) {
|
||||
IRIns *ir = IR(ref);
|
||||
ra_spill(as, ir); /* Register renamed, so force a spill slot. */
|
||||
@ -2465,18 +2465,17 @@ static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base)
|
||||
*/
|
||||
RegSet allow = rset_exclude(RSET_SCRATCH & RSET_GPR, base);
|
||||
SnapEntry *map = &as->T->snapmap[snap->mapofs];
|
||||
BCReg s, nslots = snap->nslots;
|
||||
for (s = 0; s < nslots; s++) {
|
||||
IRRef ref = snap_ref(map[s]);
|
||||
MSize n, nent = snap->nent;
|
||||
for (n = 0; n < nent; n++) {
|
||||
IRRef ref = snap_ref(map[n]);
|
||||
if (!irref_isk(ref)) {
|
||||
int32_t ofs = 8*(int32_t)(snap_slot(map[n])-1);
|
||||
IRIns *ir = IR(ref);
|
||||
if (ir->o == IR_FRAME) {
|
||||
/* NYI: sync the frame, bump base, set topslot, clear new slots. */
|
||||
lj_trace_err(as->J, LJ_TRERR_NYIGCF);
|
||||
} else if (irt_isgcv(ir->t) &&
|
||||
!(ir->o == IR_SLOAD && ir->op1 < nslots && map[ir->op1] == 0)) {
|
||||
} else if (irt_isgcv(ir->t)) {
|
||||
Reg src = ra_alloc1(as, ref, allow);
|
||||
int32_t ofs = 8*(int32_t)(s-1);
|
||||
emit_movtomro(as, src, base, ofs);
|
||||
emit_movmroi(as, base, ofs+4, irt_toitype(ir->t));
|
||||
checkmclim(as);
|
||||
@ -2504,7 +2503,7 @@ static void asm_gc_check(ASMState *as, SnapShot *snap)
|
||||
emit_loadi(as, tmp, (int32_t)as->gcsteps);
|
||||
/* We don't know spadj yet, so get the C frame from L->cframe. */
|
||||
emit_movmroi(as, tmp, CFRAME_OFS_PC,
|
||||
(int32_t)as->T->snapmap[snap->mapofs+snap->nslots]);
|
||||
(int32_t)as->T->snapmap[snap->mapofs+snap->nent]);
|
||||
emit_gri(as, XG_ARITHi(XOg_AND), tmp, CFRAME_RAWMASK);
|
||||
lstate = IR(ASMREF_L)->r;
|
||||
emit_movrmro(as, tmp, lstate, offsetof(lua_State, cframe));
|
||||
@ -2965,19 +2964,19 @@ static void asm_head_side(ASMState *as)
|
||||
static void asm_tail_sync(ASMState *as)
|
||||
{
|
||||
SnapShot *snap = &as->T->snap[as->T->nsnap-1]; /* Last snapshot. */
|
||||
BCReg s, nslots = snap->nslots;
|
||||
MSize n, nent = snap->nent;
|
||||
SnapEntry *map = &as->T->snapmap[snap->mapofs];
|
||||
SnapEntry *flinks = map + nslots + snap->nframelinks;
|
||||
SnapEntry *flinks = map + nent + snap->nframelinks;
|
||||
BCReg newbase = 0;
|
||||
BCReg secondbase = ~(BCReg)0;
|
||||
BCReg topslot = 0;
|
||||
BCReg nslots, topslot = 0;
|
||||
|
||||
checkmclim(as);
|
||||
ra_allocref(as, REF_BASE, RID2RSET(RID_BASE));
|
||||
|
||||
/* Must check all frames to find topslot (outer can be larger than inner). */
|
||||
for (s = 0; s < nslots; s++) {
|
||||
IRRef ref = snap_ref(map[s]);
|
||||
for (n = 0; n < nent; n++) {
|
||||
IRRef ref = snap_ref(map[n]);
|
||||
BCReg s = snap_slot(map[n]);
|
||||
if (!irref_isk(ref)) {
|
||||
IRIns *ir = IR(ref);
|
||||
if (ir->o == IR_FRAME && irt_isfunc(ir->t)) {
|
||||
@ -2985,10 +2984,7 @@ static void asm_tail_sync(ASMState *as)
|
||||
if (isluafunc(fn)) {
|
||||
BCReg fs = s + funcproto(fn)->framesize;
|
||||
if (fs > topslot) topslot = fs;
|
||||
if (s != 0) {
|
||||
newbase = s;
|
||||
if (secondbase == ~(BCReg)0) secondbase = s;
|
||||
}
|
||||
newbase = s;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2998,7 +2994,7 @@ static void asm_tail_sync(ASMState *as)
|
||||
if (as->T->link == TRACE_INTERP) {
|
||||
/* Setup fixed registers for exit to interpreter. */
|
||||
emit_loada(as, RID_DISPATCH, J2GG(as->J)->dispatch);
|
||||
emit_loadi(as, RID_PC, (int32_t)map[nslots]);
|
||||
emit_loadi(as, RID_PC, (int32_t)map[nent]);
|
||||
} else if (newbase) {
|
||||
/* Save modified BASE for linking to trace with higher start frame. */
|
||||
emit_setgl(as, RID_BASE, jit_base);
|
||||
@ -3007,51 +3003,50 @@ static void asm_tail_sync(ASMState *as)
|
||||
emit_addptr(as, RID_BASE, 8*(int32_t)newbase);
|
||||
|
||||
/* Clear stack slots of newly added frames. */
|
||||
nslots = snap->nslots;
|
||||
if (nslots <= topslot) {
|
||||
if (nslots < topslot) {
|
||||
BCReg s;
|
||||
for (s = nslots; s <= topslot; s++) {
|
||||
emit_movtomro(as, RID_EAX, RID_BASE, 8*(int32_t)s-4);
|
||||
emit_movtomro(as, RID_EAX, RID_BASE, 8*((int32_t)s-1)+4);
|
||||
checkmclim(as);
|
||||
}
|
||||
emit_loadi(as, RID_EAX, LJ_TNIL);
|
||||
} else {
|
||||
emit_movmroi(as, RID_BASE, 8*(int32_t)nslots-4, LJ_TNIL);
|
||||
emit_movmroi(as, RID_BASE, 8*((int32_t)nslots-1)+4, LJ_TNIL);
|
||||
}
|
||||
}
|
||||
|
||||
/* Store the value of all modified slots to the Lua stack. */
|
||||
for (s = 0; s < nslots; s++) {
|
||||
for (n = 0; n < nent; n++) {
|
||||
BCReg s = snap_slot(map[n]);
|
||||
int32_t ofs = 8*((int32_t)s-1);
|
||||
IRRef ref = snap_ref(map[s]);
|
||||
if (ref) {
|
||||
IRIns *ir = IR(ref);
|
||||
/* No need to restore readonly slots and unmodified non-parent slots. */
|
||||
if (ir->o == IR_SLOAD && ir->op1 == s &&
|
||||
(ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT)
|
||||
continue;
|
||||
if (irt_isnum(ir->t)) {
|
||||
Reg src = ra_alloc1(as, ref, RSET_FPR);
|
||||
emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs);
|
||||
} else if (ir->o == IR_FRAME) {
|
||||
emit_movmroi(as, RID_BASE, ofs, ptr2addr(ir_kgc(IR(ir->op2))));
|
||||
if (s != 0) /* Do not overwrite link to previous frame. */
|
||||
emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*--flinks));
|
||||
} else {
|
||||
lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t));
|
||||
if (!irref_isk(ref)) {
|
||||
Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
|
||||
emit_movtomro(as, src, RID_BASE, ofs);
|
||||
} else if (!irt_ispri(ir->t)) {
|
||||
emit_movmroi(as, RID_BASE, ofs, ir->i);
|
||||
}
|
||||
emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t));
|
||||
}
|
||||
IRRef ref = snap_ref(map[n]);
|
||||
IRIns *ir = IR(ref);
|
||||
/* No need to restore readonly slots and unmodified non-parent slots. */
|
||||
if (ir->o == IR_SLOAD && ir->op1 == s &&
|
||||
(ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT)
|
||||
continue;
|
||||
if (irt_isnum(ir->t)) {
|
||||
Reg src = ra_alloc1(as, ref, RSET_FPR);
|
||||
emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs);
|
||||
} else if (ir->o == IR_FRAME) {
|
||||
emit_movmroi(as, RID_BASE, ofs, ptr2addr(ir_kgc(IR(ir->op2))));
|
||||
if (s != 0) /* Do not overwrite link to previous frame. */
|
||||
emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*--flinks));
|
||||
} else {
|
||||
lua_assert(!(s > secondbase));
|
||||
lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t));
|
||||
if (!irref_isk(ref)) {
|
||||
Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
|
||||
emit_movtomro(as, src, RID_BASE, ofs);
|
||||
} else if (!irt_ispri(ir->t)) {
|
||||
emit_movmroi(as, RID_BASE, ofs, ir->i);
|
||||
}
|
||||
emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t));
|
||||
}
|
||||
checkmclim(as);
|
||||
}
|
||||
lua_assert(map + nslots == flinks-1);
|
||||
lua_assert(map + nent == flinks-1);
|
||||
}
|
||||
|
||||
/* Fixup the tail code. */
|
||||
|
@ -698,8 +698,8 @@ void lj_gdbjit_addtrace(jit_State *J, Trace *T, TraceNo traceno)
|
||||
lua_State *L = J->L;
|
||||
GCproto *pt = &gcref(T->startpt)->pt;
|
||||
TraceNo parent = T->ir[REF_BASE].op1;
|
||||
uintptr_t pcofs = (uintptr_t)(T->snap[0].mapofs+T->snap[0].nslots);
|
||||
const BCIns *startpc = (const BCIns *)(uintptr_t)T->snapmap[pcofs];
|
||||
uintptr_t pcofs = (uintptr_t)(T->snap[0].mapofs+T->snap[0].nent);
|
||||
const BCIns *startpc = snap_pc(T->snapmap[pcofs]);
|
||||
ctx.T = T;
|
||||
ctx.mcaddr = (uintptr_t)T->mcode;
|
||||
ctx.szmcode = T->szmcode;
|
||||
|
16
src/lj_jit.h
16
src/lj_jit.h
@ -112,17 +112,27 @@ typedef uint8_t MCode;
|
||||
typedef struct SnapShot {
|
||||
uint16_t mapofs; /* Offset into snapshot map. */
|
||||
IRRef1 ref; /* First IR ref for this snapshot. */
|
||||
uint8_t nslots; /* Number of stack slots. */
|
||||
uint8_t nslots; /* Number of valid slots. */
|
||||
uint8_t nent; /* Number of compressed entries. */
|
||||
uint8_t nframelinks; /* Number of frame links. */
|
||||
uint8_t count; /* Count of taken exits for this snapshot. */
|
||||
uint8_t unused1;
|
||||
} SnapShot;
|
||||
|
||||
#define SNAPCOUNT_DONE 255 /* Already compiled and linked a side trace. */
|
||||
|
||||
/* Snapshot entry. */
|
||||
/* Compressed snapshot entry. */
|
||||
typedef uint32_t SnapEntry;
|
||||
|
||||
#define SNAP_FRAME 0x010000 /* Slot has frame link. */
|
||||
|
||||
#define SNAP(slot, flags, ref) ((SnapEntry)((slot) << 24) + (flags) + (ref))
|
||||
#define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc))
|
||||
#define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz))
|
||||
#define snap_ref(sn) ((sn) & 0xffff)
|
||||
#define snap_slot(sn) ((BCReg)((sn) >> 24))
|
||||
#define snap_isframe(sn) ((sn) & SNAP_FRAME)
|
||||
#define snap_pc(sn) ((const BCIns *)(uintptr_t)(sn))
|
||||
#define snap_setref(sn, ref) (((sn) & 0xffff0000) | (ref))
|
||||
|
||||
/* Snapshot and exit numbers. */
|
||||
typedef uint32_t SnapNo;
|
||||
|
@ -24,9 +24,9 @@ static void dce_marksnap(jit_State *J)
|
||||
for (i = 0; i < nsnap; i++) {
|
||||
SnapShot *snap = &J->cur.snap[i];
|
||||
SnapEntry *map = &J->cur.snapmap[snap->mapofs];
|
||||
BCReg s, nslots = snap->nslots;
|
||||
for (s = 0; s < nslots; s++) {
|
||||
IRRef ref = snap_ref(map[s]);
|
||||
MSize n, nent = snap->nent;
|
||||
for (n = 0; n < nent; n++) {
|
||||
IRRef ref = snap_ref(map[n]);
|
||||
if (!irref_isk(ref))
|
||||
irt_setmark(IR(ref)->t);
|
||||
}
|
||||
|
@ -10,7 +10,6 @@
|
||||
|
||||
#if LJ_HASJIT
|
||||
|
||||
#include "lj_gc.h"
|
||||
#include "lj_err.h"
|
||||
#include "lj_str.h"
|
||||
#include "lj_ir.h"
|
||||
@ -163,21 +162,69 @@ static void loop_emit_phi(jit_State *J, IRRef1 *subst, IRRef1 *phi, IRRef nphi)
|
||||
|
||||
/* -- Loop unrolling using copy-substitution ------------------------------ */
|
||||
|
||||
/* Copy-substitute snapshot. */
|
||||
static void loop_subst_snap(jit_State *J, SnapShot *osnap,
|
||||
SnapEntry *loopmap, IRRef1 *subst)
|
||||
{
|
||||
SnapEntry *nmap, *omap = &J->cur.snapmap[osnap->mapofs];
|
||||
MSize nmapofs, nframelinks;
|
||||
MSize on, ln, nn, onent = osnap->nent;
|
||||
BCReg nslots = osnap->nslots;
|
||||
SnapShot *snap = &J->cur.snap[J->cur.nsnap];
|
||||
if (irt_isguard(J->guardemit)) { /* Guard inbetween? */
|
||||
nmapofs = J->cur.nsnapmap;
|
||||
J->cur.nsnap++; /* Add new snapshot. */
|
||||
} else { /* Otherwise overwrite previous snapshot. */
|
||||
snap--;
|
||||
nmapofs = snap->mapofs;
|
||||
}
|
||||
J->guardemit.irt = 0;
|
||||
nframelinks = osnap->nframelinks;
|
||||
/* Setup new snapshot. */
|
||||
snap->mapofs = (uint16_t)nmapofs;
|
||||
snap->ref = (IRRef1)J->cur.nins;
|
||||
snap->nframelinks = (uint8_t)nframelinks;
|
||||
snap->nslots = nslots;
|
||||
snap->count = 0;
|
||||
nmap = &J->cur.snapmap[nmapofs];
|
||||
/* Substitute snapshot slots. */
|
||||
on = ln = nn = 0;
|
||||
while (on < onent) {
|
||||
SnapEntry osn = omap[on], lsn = loopmap[ln];
|
||||
if (snap_slot(lsn) < snap_slot(osn)) { /* Copy slot from loop map. */
|
||||
nmap[nn++] = lsn;
|
||||
ln++;
|
||||
} else { /* Copy substituted slot from snapshot map. */
|
||||
if (snap_slot(lsn) == snap_slot(osn)) ln++; /* Shadowed loop slot. */
|
||||
if (!irref_isk(snap_ref(osn)))
|
||||
osn = snap_setref(osn, subst[snap_ref(osn)]);
|
||||
nmap[nn++] = osn;
|
||||
on++;
|
||||
}
|
||||
}
|
||||
while (snap_slot(loopmap[ln]) < nslots) /* Copy remaining loop slots. */
|
||||
nmap[nn++] = loopmap[ln++];
|
||||
snap->nent = (uint8_t)nn;
|
||||
J->cur.nsnapmap = (uint16_t)(nmapofs + nn + nframelinks);
|
||||
omap += onent;
|
||||
nmap += nn;
|
||||
for (nn = 0; nn < nframelinks; nn++) /* Copy frame links. */
|
||||
nmap[nn] = omap[nn];
|
||||
}
|
||||
|
||||
/* Unroll loop. */
|
||||
static void loop_unroll(jit_State *J)
|
||||
{
|
||||
IRRef1 phi[LJ_MAX_PHI];
|
||||
uint32_t nphi = 0;
|
||||
IRRef1 *subst;
|
||||
SnapShot *osnap, *snap;
|
||||
SnapEntry *loopmap;
|
||||
BCReg loopslots;
|
||||
MSize nsnap, nsnapmap;
|
||||
IRRef ins, invar, osnapref;
|
||||
SnapShot *osnap;
|
||||
SnapEntry *loopmap, *psentinel;
|
||||
IRRef ins, invar;
|
||||
|
||||
/* Use temp buffer for substitution table.
|
||||
** Only non-constant refs in [REF_BIAS,invar) are valid indexes.
|
||||
** Note: don't call into the VM or run the GC or the buffer may be gone.
|
||||
** Caveat: don't call into the VM or run the GC or the buffer may be gone.
|
||||
*/
|
||||
invar = J->cur.nins;
|
||||
subst = (IRRef1 *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf,
|
||||
@ -187,80 +234,37 @@ static void loop_unroll(jit_State *J)
|
||||
/* LOOP separates the pre-roll from the loop body. */
|
||||
emitir_raw(IRTG(IR_LOOP, IRT_NIL), 0, 0);
|
||||
|
||||
/* Ensure size for copy-substituted snapshots (minus #0 and loop snapshot). */
|
||||
nsnap = J->cur.nsnap;
|
||||
if (LJ_UNLIKELY(2*nsnap-2 > J->sizesnap)) {
|
||||
MSize maxsnap = (MSize)J->param[JIT_P_maxsnap];
|
||||
if (2*nsnap-2 > maxsnap)
|
||||
lj_trace_err(J, LJ_TRERR_SNAPOV);
|
||||
lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot);
|
||||
J->cur.snap = J->snapbuf;
|
||||
}
|
||||
nsnapmap = J->cur.nsnapmap; /* Use temp. copy to avoid undo. */
|
||||
if (LJ_UNLIKELY(nsnapmap*2 > J->sizesnapmap)) {
|
||||
J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf,
|
||||
J->sizesnapmap*sizeof(SnapEntry),
|
||||
2*J->sizesnapmap*sizeof(SnapEntry));
|
||||
J->cur.snapmap = J->snapmapbuf;
|
||||
J->sizesnapmap *= 2;
|
||||
}
|
||||
/* Grow snapshot buffer and map for copy-substituted snapshots.
|
||||
** Need up to twice the number of snapshots minus #0 and loop snapshot.
|
||||
** Need up to twice the number of entries plus fallback substitutions
|
||||
** from the loop snapshot entries for each new snapshot.
|
||||
** Caveat: both calls may reallocate J->cur.snap and J->cur.snapmap!
|
||||
*/
|
||||
{
|
||||
MSize nsnap = J->cur.nsnap;
|
||||
SnapShot *loopsnap;
|
||||
lj_snap_grow_buf(J, 2*nsnap-2);
|
||||
lj_snap_grow_map(J, J->cur.nsnapmap*2+(nsnap-2)*J->cur.snap[nsnap-1].nent);
|
||||
|
||||
/* The loop snapshot is used for fallback substitutions. */
|
||||
snap = &J->cur.snap[nsnap-1];
|
||||
loopmap = &J->cur.snapmap[snap->mapofs];
|
||||
loopslots = snap->nslots;
|
||||
/* The PC of snapshot #0 and the loop snapshot must match. */
|
||||
lua_assert(loopmap[loopslots] == J->cur.snapmap[J->cur.snap[0].nslots]);
|
||||
/* The loop snapshot is used for fallback substitutions. */
|
||||
loopsnap = &J->cur.snap[nsnap-1];
|
||||
loopmap = &J->cur.snapmap[loopsnap->mapofs];
|
||||
/* The PC of snapshot #0 and the loop snapshot must match. */
|
||||
psentinel = &loopmap[loopsnap->nent];
|
||||
lua_assert(*psentinel == J->cur.snapmap[J->cur.snap[0].nent]);
|
||||
*psentinel = SNAP(255, 0, 0); /* Replace PC with temporary sentinel. */
|
||||
}
|
||||
|
||||
/* Start substitution with snapshot #1 (#0 is empty for root traces). */
|
||||
osnap = &J->cur.snap[1];
|
||||
osnapref = osnap->ref;
|
||||
|
||||
/* Copy and substitute all recorded instructions and snapshots. */
|
||||
for (ins = REF_FIRST; ins < invar; ins++) {
|
||||
IRIns *ir;
|
||||
IRRef op1, op2;
|
||||
|
||||
/* Copy-substitute snapshot. */
|
||||
if (ins >= osnapref) {
|
||||
SnapEntry *nmap, *omap = &J->cur.snapmap[osnap->mapofs];
|
||||
BCReg s, nslots;
|
||||
uint32_t nmapofs, nframelinks;
|
||||
if (irt_isguard(J->guardemit)) { /* Guard inbetween? */
|
||||
nmapofs = nsnapmap;
|
||||
snap++; /* Add new snapshot. */
|
||||
} else {
|
||||
nmapofs = snap->mapofs; /* Overwrite previous snapshot. */
|
||||
}
|
||||
J->guardemit.irt = 0;
|
||||
nslots = osnap->nslots;
|
||||
nframelinks = osnap->nframelinks;
|
||||
snap->mapofs = (uint16_t)nmapofs;
|
||||
snap->ref = (IRRef1)J->cur.nins;
|
||||
snap->nslots = (uint8_t)nslots;
|
||||
snap->nframelinks = (uint8_t)nframelinks;
|
||||
snap->count = 0;
|
||||
osnap++;
|
||||
osnapref = osnap->ref;
|
||||
nsnapmap = nmapofs + nslots + nframelinks;
|
||||
nmap = &J->cur.snapmap[nmapofs];
|
||||
/* Substitute snapshot slots. */
|
||||
for (s = 0; s < nslots; s++) {
|
||||
IRRef ref = snap_ref(omap[s]);
|
||||
if (ref) {
|
||||
if (!irref_isk(ref))
|
||||
ref = subst[ref];
|
||||
} else if (s < loopslots) {
|
||||
ref = loopmap[s];
|
||||
}
|
||||
nmap[s] = ref;
|
||||
}
|
||||
/* Copy frame links. */
|
||||
nmap += nslots;
|
||||
omap += nslots;
|
||||
for (s = 0; s < nframelinks; s++)
|
||||
nmap[s] = omap[s];
|
||||
}
|
||||
if (ins >= osnap->ref) /* Instruction belongs to next snapshot? */
|
||||
loop_subst_snap(J, osnap++, loopmap, subst); /* Copy-substitute it. */
|
||||
|
||||
/* Substitute instruction operands. */
|
||||
ir = IR(ins);
|
||||
@ -295,22 +299,24 @@ static void loop_unroll(jit_State *J)
|
||||
}
|
||||
}
|
||||
}
|
||||
if (irt_isguard(J->guardemit)) { /* Guard inbetween? */
|
||||
J->cur.nsnapmap = (uint16_t)nsnapmap;
|
||||
snap++;
|
||||
} else {
|
||||
J->cur.nsnapmap = (uint16_t)snap->mapofs; /* Last snapshot is redundant. */
|
||||
}
|
||||
J->cur.nsnap = (uint16_t)(snap - J->cur.snap);
|
||||
if (!irt_isguard(J->guardemit)) /* Drop redundant snapshot. */
|
||||
J->cur.nsnapmap = (uint16_t)J->cur.snap[--J->cur.nsnap].mapofs;
|
||||
lua_assert(J->cur.nsnapmap <= J->sizesnapmap);
|
||||
*psentinel = J->cur.snapmap[J->cur.snap[0].nent]; /* Restore PC. */
|
||||
|
||||
loop_emit_phi(J, subst, phi, nphi);
|
||||
}
|
||||
|
||||
/* Undo any partial changes made by the loop optimization. */
|
||||
static void loop_undo(jit_State *J, IRRef ins)
|
||||
static void loop_undo(jit_State *J, IRRef ins, MSize nsnap)
|
||||
{
|
||||
ptrdiff_t i;
|
||||
SnapShot *snap = &J->cur.snap[nsnap-1];
|
||||
SnapEntry *map = J->cur.snapmap;
|
||||
map[snap->mapofs + snap->nent] = map[J->cur.snap[0].nent]; /* Restore PC. */
|
||||
J->cur.nsnapmap = (uint16_t)(snap->mapofs + snap->nent + snap->nframelinks);
|
||||
J->cur.nsnap = nsnap;
|
||||
J->guardemit.irt = 0;
|
||||
lj_ir_rollback(J, ins);
|
||||
for (i = 0; i < BPROP_SLOTS; i++) { /* Remove backprop. cache entries. */
|
||||
BPropEntry *bp = &J->bpropcache[i];
|
||||
@ -336,6 +342,7 @@ static TValue *cploop_opt(lua_State *L, lua_CFunction dummy, void *ud)
|
||||
int lj_opt_loop(jit_State *J)
|
||||
{
|
||||
IRRef nins = J->cur.nins;
|
||||
MSize nsnap = J->cur.nsnap;
|
||||
int errcode = lj_vm_cpcall(J->L, NULL, J, cploop_opt);
|
||||
if (LJ_UNLIKELY(errcode)) {
|
||||
lua_State *L = J->L;
|
||||
@ -348,8 +355,7 @@ int lj_opt_loop(jit_State *J)
|
||||
if (--J->instunroll < 0) /* But do not unroll forever. */
|
||||
break;
|
||||
L->top--; /* Remove error object. */
|
||||
J->guardemit.irt = 0;
|
||||
loop_undo(J, nins);
|
||||
loop_undo(J, nins, nsnap);
|
||||
return 1; /* Loop optimization failed, continue recording. */
|
||||
default:
|
||||
break;
|
||||
|
@ -1696,7 +1696,7 @@ static void optstate_comp(jit_State *J, int cond)
|
||||
const BCIns *npc = J->pc + 2 + (cond ? bc_j(jmpins) : 0);
|
||||
SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
|
||||
/* Avoid re-recording the comparison in side traces. */
|
||||
J->cur.snapmap[snap->mapofs + snap->nslots] = u32ptr(npc);
|
||||
J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc);
|
||||
J->needsnap = 1;
|
||||
/* Shrink last snapshot if possible. */
|
||||
if (bc_a(jmpins) < J->maxslot) {
|
||||
@ -2159,61 +2159,62 @@ static void rec_setup_side(jit_State *J, Trace *T)
|
||||
{
|
||||
SnapShot *snap = &T->snap[J->exitno];
|
||||
SnapEntry *map = &T->snapmap[snap->mapofs];
|
||||
BCReg s, nslots = snap->nslots;
|
||||
MSize n, nent = snap->nent;
|
||||
BloomFilter seen = 0;
|
||||
for (s = 0; s < nslots; s++) {
|
||||
IRRef ref = snap_ref(map[s]);
|
||||
if (ref) {
|
||||
IRIns *ir = &T->ir[ref];
|
||||
TRef tr = 0;
|
||||
/* The bloom filter avoids O(nslots^2) overhead for de-duping slots. */
|
||||
if (bloomtest(seen, ref)) {
|
||||
BCReg j;
|
||||
for (j = 0; j < s; j++)
|
||||
if (snap_ref(map[j]) == ref) {
|
||||
if (ir->o == IR_FRAME && irt_isfunc(ir->t)) {
|
||||
lua_assert(s != 0);
|
||||
J->baseslot = s+1;
|
||||
J->framedepth++;
|
||||
}
|
||||
tr = J->slot[j];
|
||||
goto dupslot;
|
||||
}
|
||||
}
|
||||
bloomset(seen, ref);
|
||||
switch ((IROp)ir->o) {
|
||||
case IR_KPRI: tr = TREF_PRI(irt_type(ir->t)); break;
|
||||
case IR_KINT: tr = lj_ir_kint(J, ir->i); break;
|
||||
case IR_KGC: tr = lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); break;
|
||||
case IR_KNUM: tr = lj_ir_knum_addr(J, ir_knum(ir)); break;
|
||||
case IR_FRAME: /* Placeholder FRAMEs don't need a guard. */
|
||||
if (irt_isfunc(ir->t)) {
|
||||
if (s != 0) {
|
||||
/* Emit IR for slots inherited from parent snapshot. */
|
||||
for (n = 0; n < nent; n++) {
|
||||
IRRef ref = snap_ref(map[n]);
|
||||
BCReg s = snap_slot(map[n]);
|
||||
IRIns *ir = &T->ir[ref];
|
||||
TRef tr;
|
||||
/* The bloom filter avoids O(nent^2) overhead for de-duping slots. */
|
||||
if (bloomtest(seen, ref)) {
|
||||
MSize j;
|
||||
for (j = 0; j < n; j++)
|
||||
if (snap_ref(map[j]) == ref) {
|
||||
tr = J->slot[snap_slot(map[j])];
|
||||
if (ir->o == IR_FRAME && irt_isfunc(ir->t)) {
|
||||
lua_assert(s != 0);
|
||||
J->baseslot = s+1;
|
||||
J->framedepth++;
|
||||
}
|
||||
tr = lj_ir_kfunc(J, ir_kfunc(&T->ir[ir->op2]));
|
||||
tr = emitir_raw(IRT(IR_FRAME, IRT_FUNC), tr, tr);
|
||||
} else {
|
||||
tr = lj_ir_kptr(J, mref(T->ir[ir->op2].ptr, void));
|
||||
tr = emitir_raw(IRT(IR_FRAME, IRT_PTR), tr, tr);
|
||||
goto dupslot;
|
||||
}
|
||||
break;
|
||||
case IR_SLOAD: /* Inherited SLOADs don't need a guard or type check. */
|
||||
tr = emitir_raw(ir->ot & ~IRT_GUARD, s,
|
||||
(ir->op2&IRSLOAD_READONLY) | IRSLOAD_INHERIT|IRSLOAD_PARENT);
|
||||
break;
|
||||
default: /* Parent refs are already typed and don't need a guard. */
|
||||
tr = emitir_raw(IRT(IR_SLOAD, irt_type(ir->t)), s,
|
||||
IRSLOAD_INHERIT|IRSLOAD_PARENT);
|
||||
break;
|
||||
}
|
||||
dupslot:
|
||||
J->slot[s] = tr;
|
||||
}
|
||||
bloomset(seen, ref);
|
||||
switch ((IROp)ir->o) {
|
||||
/* Only have to deal with constants that can occur in stack slots. */
|
||||
case IR_KPRI: tr = TREF_PRI(irt_type(ir->t)); break;
|
||||
case IR_KINT: tr = lj_ir_kint(J, ir->i); break;
|
||||
case IR_KGC: tr = lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); break;
|
||||
case IR_KNUM: tr = lj_ir_knum_addr(J, ir_knum(ir)); break;
|
||||
case IR_FRAME: /* Placeholder FRAMEs don't need a guard. */
|
||||
if (irt_isfunc(ir->t)) {
|
||||
if (s != 0) {
|
||||
J->baseslot = s+1;
|
||||
J->framedepth++;
|
||||
}
|
||||
tr = lj_ir_kfunc(J, ir_kfunc(&T->ir[ir->op2]));
|
||||
tr = emitir_raw(IRT(IR_FRAME, IRT_FUNC), tr, tr);
|
||||
} else {
|
||||
tr = lj_ir_kptr(J, mref(T->ir[ir->op2].ptr, void));
|
||||
tr = emitir_raw(IRT(IR_FRAME, IRT_PTR), tr, tr);
|
||||
}
|
||||
break;
|
||||
case IR_SLOAD: /* Inherited SLOADs don't need a guard or type check. */
|
||||
tr = emitir_raw(ir->ot & ~IRT_GUARD, s,
|
||||
(ir->op2&IRSLOAD_READONLY) | IRSLOAD_INHERIT|IRSLOAD_PARENT);
|
||||
break;
|
||||
default: /* Parent refs are already typed and don't need a guard. */
|
||||
tr = emitir_raw(IRT(IR_SLOAD, irt_type(ir->t)), s,
|
||||
IRSLOAD_INHERIT|IRSLOAD_PARENT);
|
||||
break;
|
||||
}
|
||||
dupslot:
|
||||
J->slot[s] = tr;
|
||||
}
|
||||
J->base = J->slot + J->baseslot;
|
||||
J->maxslot = nslots - J->baseslot;
|
||||
J->maxslot = snap->nslots - J->baseslot;
|
||||
lj_snap_add(J);
|
||||
}
|
||||
|
||||
@ -2259,7 +2260,7 @@ void lj_record_setup(jit_State *J)
|
||||
J->cur.root = (uint16_t)root;
|
||||
J->cur.startins = BCINS_AD(BC_JMP, 0, 0);
|
||||
/* Check whether we could at least potentially form an extra loop. */
|
||||
if (J->exitno == 0 && T->snap[0].nslots == 1 && T->snapmap[0] == 0) {
|
||||
if (J->exitno == 0 && T->snap[0].nent == 0) {
|
||||
/* We can narrow a FORL for some side traces, too. */
|
||||
if (J->pc > J->pt->bc && bc_op(J->pc[-1]) == BC_JFORI &&
|
||||
bc_d(J->pc[bc_j(J->pc[-1])-1]) == root) {
|
||||
|
247
src/lj_snap.c
247
src/lj_snap.c
@ -23,28 +23,50 @@
|
||||
/* Some local macros to save typing. Undef'd at the end. */
|
||||
#define IR(ref) (&J->cur.ir[(ref)])
|
||||
|
||||
/* -- Snapshot buffer allocation ------------------------------------------ */
|
||||
|
||||
/* Grow snapshot buffer. */
|
||||
void lj_snap_grow_buf_(jit_State *J, MSize need)
|
||||
{
|
||||
MSize maxsnap = (MSize)J->param[JIT_P_maxsnap];
|
||||
if (need > maxsnap)
|
||||
lj_trace_err(J, LJ_TRERR_SNAPOV);
|
||||
lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot);
|
||||
J->cur.snap = J->snapbuf;
|
||||
}
|
||||
|
||||
/* Grow snapshot map buffer. */
|
||||
void lj_snap_grow_map_(jit_State *J, MSize need)
|
||||
{
|
||||
if (need < 2*J->sizesnapmap)
|
||||
need = 2*J->sizesnapmap;
|
||||
else if (need < 64)
|
||||
need = 64;
|
||||
J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf,
|
||||
J->sizesnapmap*sizeof(SnapEntry), need*sizeof(SnapEntry));
|
||||
J->cur.snapmap = J->snapmapbuf;
|
||||
J->sizesnapmap = need;
|
||||
}
|
||||
|
||||
/* -- Snapshot generation ------------------------------------------------- */
|
||||
|
||||
/* NYI: Snapshots are in need of a redesign. The current storage model for
|
||||
** snapshot maps is too wasteful. They could be compressed (1D or 2D) and
|
||||
** made more flexible at the same time. Iterators should no longer need to
|
||||
** skip unmodified slots. IR_FRAME should be eliminated, too.
|
||||
*/
|
||||
/* NYI: IR_FRAME should be eliminated, too. */
|
||||
|
||||
/* Add all modified slots to the snapshot. */
|
||||
static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
|
||||
{
|
||||
BCReg s;
|
||||
MSize n = 0;
|
||||
for (s = 0; s < nslots; s++) {
|
||||
IRRef ref = tref_ref(J->slot[s]);
|
||||
if (ref) {
|
||||
IRIns *ir = IR(ref);
|
||||
if (ir->o == IR_SLOAD && ir->op1 == s && !(ir->op2 & IRSLOAD_INHERIT))
|
||||
ref = 0;
|
||||
if (!(ir->o == IR_SLOAD && ir->op1 == s &&
|
||||
!(ir->op2 & IRSLOAD_INHERIT)))
|
||||
map[n++] = SNAP(s, ir->o == IR_FRAME ? SNAP_FRAME : 0, ref);
|
||||
}
|
||||
map[s] = (SnapEntry)ref;
|
||||
}
|
||||
return nslots;
|
||||
return n;
|
||||
}
|
||||
|
||||
/* Add frame links at the end of the snapshot. */
|
||||
@ -53,17 +75,17 @@ static MSize snapshot_framelinks(jit_State *J, SnapEntry *map)
|
||||
cTValue *frame = J->L->base - 1;
|
||||
cTValue *lim = J->L->base - J->baseslot;
|
||||
MSize f = 0;
|
||||
map[f++] = u32ptr(J->pc);
|
||||
while (frame > lim) {
|
||||
map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */
|
||||
while (frame > lim) { /* Backwards traversal of all frames above base. */
|
||||
if (frame_islua(frame)) {
|
||||
map[f++] = u32ptr(frame_pc(frame));
|
||||
map[f++] = SNAP_MKPC(frame_pc(frame));
|
||||
frame = frame_prevl(frame);
|
||||
} else if (frame_ispcall(frame)) {
|
||||
map[f++] = (uint32_t)frame_ftsz(frame);
|
||||
map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
|
||||
frame = frame_prevd(frame);
|
||||
} else if (frame_iscont(frame)) {
|
||||
map[f++] = (uint32_t)frame_ftsz(frame);
|
||||
map[f++] = u32ptr(frame_contpc(frame));
|
||||
map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
|
||||
map[f++] = SNAP_MKPC(frame_contpc(frame));
|
||||
frame = frame_prevd(frame);
|
||||
} else {
|
||||
lua_assert(0);
|
||||
@ -76,28 +98,19 @@ static MSize snapshot_framelinks(jit_State *J, SnapEntry *map)
|
||||
static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap)
|
||||
{
|
||||
BCReg nslots = J->baseslot + J->maxslot;
|
||||
MSize nsm, nframelinks;
|
||||
MSize nent, nframelinks;
|
||||
SnapEntry *p;
|
||||
/* Conservative estimate. Continuation frames need 2 slots. */
|
||||
nsm = nsnapmap + nslots + (uint32_t)J->framedepth*2+1;
|
||||
if (LJ_UNLIKELY(nsm > J->sizesnapmap)) { /* Need to grow snapshot map? */
|
||||
if (nsm < 2*J->sizesnapmap)
|
||||
nsm = 2*J->sizesnapmap;
|
||||
else if (nsm < 64)
|
||||
nsm = 64;
|
||||
J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf,
|
||||
J->sizesnapmap*sizeof(SnapEntry), nsm*sizeof(SnapEntry));
|
||||
J->cur.snapmap = J->snapmapbuf;
|
||||
J->sizesnapmap = nsm;
|
||||
}
|
||||
lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth*2+1);
|
||||
p = &J->cur.snapmap[nsnapmap];
|
||||
nslots = snapshot_slots(J, p, nslots);
|
||||
nframelinks = snapshot_framelinks(J, p + nslots);
|
||||
J->cur.nsnapmap = (uint16_t)(nsnapmap + nslots + nframelinks);
|
||||
nent = snapshot_slots(J, p, nslots);
|
||||
nframelinks = snapshot_framelinks(J, p + nent);
|
||||
J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + nframelinks);
|
||||
snap->mapofs = (uint16_t)nsnapmap;
|
||||
snap->ref = (IRRef1)J->cur.nins;
|
||||
snap->nslots = (uint8_t)nslots;
|
||||
snap->nent = (uint8_t)nent;
|
||||
snap->nframelinks = (uint8_t)nframelinks;
|
||||
snap->nslots = (uint8_t)nslots;
|
||||
snap->count = 0;
|
||||
}
|
||||
|
||||
@ -111,14 +124,7 @@ void lj_snap_add(jit_State *J)
|
||||
(nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) {
|
||||
nsnapmap = J->cur.snap[--nsnap].mapofs;
|
||||
} else {
|
||||
/* Need to grow snapshot buffer? */
|
||||
if (LJ_UNLIKELY(nsnap >= J->sizesnap)) {
|
||||
MSize maxsnap = (MSize)J->param[JIT_P_maxsnap];
|
||||
if (nsnap >= maxsnap)
|
||||
lj_trace_err(J, LJ_TRERR_SNAPOV);
|
||||
lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot);
|
||||
J->cur.snap = J->snapbuf;
|
||||
}
|
||||
lj_snap_grow_buf(J, nsnap+1);
|
||||
J->cur.nsnap = (uint16_t)(nsnap+1);
|
||||
}
|
||||
J->mergesnap = 0;
|
||||
@ -131,14 +137,21 @@ void lj_snap_shrink(jit_State *J)
|
||||
{
|
||||
BCReg nslots = J->baseslot + J->maxslot;
|
||||
SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
|
||||
SnapEntry *oflinks = &J->cur.snapmap[snap->mapofs + snap->nslots];
|
||||
SnapEntry *nflinks = &J->cur.snapmap[snap->mapofs + nslots];
|
||||
uint32_t s, nframelinks = snap->nframelinks;
|
||||
SnapEntry *map = &J->cur.snapmap[snap->mapofs];
|
||||
MSize nent = snap->nent;
|
||||
lua_assert(nslots < snap->nslots);
|
||||
snap->nslots = (uint8_t)nslots;
|
||||
J->cur.nsnapmap = (uint16_t)(snap->mapofs + nslots + nframelinks);
|
||||
for (s = 0; s < nframelinks; s++) /* Move frame links down. */
|
||||
nflinks[s] = oflinks[s];
|
||||
if (nent > 0 && snap_slot(map[nent-1]) >= nslots) {
|
||||
MSize s, delta, nframelinks = snap->nframelinks;
|
||||
for (nent--; nent > 0 && snap_slot(map[nent-1]) >= nslots; nent--)
|
||||
;
|
||||
delta = snap->nent - nent;
|
||||
snap->nent = (uint8_t)nent;
|
||||
J->cur.nsnapmap = (uint16_t)(snap->mapofs + nent + nframelinks);
|
||||
map += nent;
|
||||
for (s = 0; s < nframelinks; s++) /* Move frame links down. */
|
||||
map[s] = map[s+delta];
|
||||
}
|
||||
}
|
||||
|
||||
/* -- Snapshot access ----------------------------------------------------- */
|
||||
@ -167,21 +180,24 @@ static RegSP snap_renameref(Trace *T, SnapNo lim, IRRef ref, RegSP rs)
|
||||
return rs;
|
||||
}
|
||||
|
||||
/* Convert a snapshot into a linear slot -> RegSP map. */
|
||||
/* Convert a snapshot into a linear slot -> RegSP map.
|
||||
** Note: unused slots are not initialized!
|
||||
*/
|
||||
void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno)
|
||||
{
|
||||
SnapShot *snap = &T->snap[snapno];
|
||||
BCReg s, nslots = snap->nslots;
|
||||
MSize n, nent = snap->nent;
|
||||
SnapEntry *map = &T->snapmap[snap->mapofs];
|
||||
BloomFilter rfilt = snap_renamefilter(T, snapno);
|
||||
for (s = 0; s < nslots; s++) {
|
||||
IRRef ref = snap_ref(map[s]);
|
||||
for (n = 0; n < nent; n++) {
|
||||
SnapEntry sn = map[n];
|
||||
IRRef ref = snap_ref(sn);
|
||||
if (!irref_isk(ref)) {
|
||||
IRIns *ir = &T->ir[ref];
|
||||
uint32_t rs = ir->prev;
|
||||
if (bloomtest(rfilt, ref))
|
||||
rs = snap_renameref(T, snapno, ref, rs);
|
||||
rsmap[s] = (uint16_t)rs;
|
||||
rsmap[snap_slot(sn)] = (uint16_t)rs;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -193,89 +209,88 @@ void lj_snap_restore(jit_State *J, void *exptr)
|
||||
SnapNo snapno = J->exitno; /* For now, snapno == exitno. */
|
||||
Trace *T = J->trace[J->parent];
|
||||
SnapShot *snap = &T->snap[snapno];
|
||||
BCReg s, nslots = snap->nslots;
|
||||
MSize n, nent = snap->nent;
|
||||
SnapEntry *map = &T->snapmap[snap->mapofs];
|
||||
SnapEntry *flinks = map + nslots + snap->nframelinks;
|
||||
TValue *o, *newbase, *ntop;
|
||||
SnapEntry *flinks = map + nent + snap->nframelinks;
|
||||
BCReg nslots = snap->nslots;
|
||||
TValue *frame;
|
||||
BloomFilter rfilt = snap_renamefilter(T, snapno);
|
||||
lua_State *L = J->L;
|
||||
|
||||
/* Make sure the stack is big enough for the slots from the snapshot. */
|
||||
if (L->base + nslots >= L->maxstack) {
|
||||
if (LJ_UNLIKELY(L->base + nslots > L->maxstack)) {
|
||||
L->top = curr_topL(L);
|
||||
lj_state_growstack(L, nslots - curr_proto(L)->framesize);
|
||||
}
|
||||
|
||||
/* Fill stack slots with data from the registers and spill slots. */
|
||||
newbase = NULL;
|
||||
ntop = L->base;
|
||||
for (s = 0, o = L->base-1; s < nslots; s++, o++) {
|
||||
IRRef ref = snap_ref(map[s]);
|
||||
if (ref) {
|
||||
IRIns *ir = &T->ir[ref];
|
||||
if (irref_isk(ref)) { /* Restore constant slot. */
|
||||
lj_ir_kvalue(L, o, ir);
|
||||
} else {
|
||||
IRType1 t = ir->t;
|
||||
RegSP rs = ir->prev;
|
||||
if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
|
||||
rs = snap_renameref(T, snapno, ref, rs);
|
||||
if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */
|
||||
int32_t *sps = &ex->spill[regsp_spill(rs)];
|
||||
if (irt_isinteger(t)) {
|
||||
setintV(o, *sps);
|
||||
} else if (irt_isnum(t)) {
|
||||
o->u64 = *(uint64_t *)sps;
|
||||
} else {
|
||||
lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */
|
||||
setgcrefi(o->gcr, *sps);
|
||||
setitype(o, irt_toitype(t));
|
||||
}
|
||||
} else if (ra_hasreg(regsp_reg(rs))) { /* Restore from register. */
|
||||
Reg r = regsp_reg(rs);
|
||||
if (irt_isinteger(t)) {
|
||||
setintV(o, ex->gpr[r-RID_MIN_GPR]);
|
||||
} else if (irt_isnum(t)) {
|
||||
setnumV(o, ex->fpr[r-RID_MIN_FPR]);
|
||||
} else {
|
||||
if (!irt_ispri(t))
|
||||
setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]);
|
||||
setitype(o, irt_toitype(t));
|
||||
}
|
||||
} else { /* Restore frame slot. */
|
||||
lua_assert(ir->o == IR_FRAME);
|
||||
/* This works for both PTR and FUNC IR_FRAME. */
|
||||
setgcrefp(o->fr.func, mref(T->ir[ir->op2].ptr, void));
|
||||
if (s != 0) /* Do not overwrite link to previous frame. */
|
||||
o->fr.tp.ftsz = (int32_t)*--flinks;
|
||||
if (irt_isfunc(ir->t)) {
|
||||
GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr));
|
||||
if (isluafunc(fn)) {
|
||||
TValue *fs;
|
||||
fs = o+1 + funcproto(fn)->framesize;
|
||||
if (fs > ntop) ntop = fs; /* Update top for newly added frames. */
|
||||
if (s != 0) newbase = o+1;
|
||||
frame = L->base-1;
|
||||
for (n = 0; n < nent; n++) {
|
||||
IRRef ref = snap_ref(map[n]);
|
||||
BCReg s = snap_slot(map[n]);
|
||||
TValue *o = &frame[s]; /* Stack slots are relative to start frame. */
|
||||
IRIns *ir = &T->ir[ref];
|
||||
if (irref_isk(ref)) { /* Restore constant slot. */
|
||||
lj_ir_kvalue(L, o, ir);
|
||||
} else {
|
||||
IRType1 t = ir->t;
|
||||
RegSP rs = ir->prev;
|
||||
if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
|
||||
rs = snap_renameref(T, snapno, ref, rs);
|
||||
if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */
|
||||
int32_t *sps = &ex->spill[regsp_spill(rs)];
|
||||
if (irt_isinteger(t)) {
|
||||
setintV(o, *sps);
|
||||
} else if (irt_isnum(t)) {
|
||||
o->u64 = *(uint64_t *)sps;
|
||||
} else {
|
||||
lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */
|
||||
setgcrefi(o->gcr, *sps);
|
||||
setitype(o, irt_toitype(t));
|
||||
}
|
||||
} else if (ra_hasreg(regsp_reg(rs))) { /* Restore from register. */
|
||||
Reg r = regsp_reg(rs);
|
||||
if (irt_isinteger(t)) {
|
||||
setintV(o, ex->gpr[r-RID_MIN_GPR]);
|
||||
} else if (irt_isnum(t)) {
|
||||
setnumV(o, ex->fpr[r-RID_MIN_FPR]);
|
||||
} else {
|
||||
if (!irt_ispri(t))
|
||||
setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]);
|
||||
setitype(o, irt_toitype(t));
|
||||
}
|
||||
} else { /* Restore frame slot. */
|
||||
lua_assert(ir->o == IR_FRAME);
|
||||
/* This works for both PTR and FUNC IR_FRAME. */
|
||||
setgcrefp(o->fr.func, mref(T->ir[ir->op2].ptr, void));
|
||||
if (s != 0) /* Do not overwrite link to previous frame. */
|
||||
o->fr.tp.ftsz = (int32_t)*--flinks;
|
||||
if (irt_isfunc(ir->t)) {
|
||||
GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr));
|
||||
if (isluafunc(fn)) {
|
||||
MSize framesize = funcproto(fn)->framesize;
|
||||
TValue *fs;
|
||||
L->base = ++o;
|
||||
if (LJ_UNLIKELY(o + framesize > L->maxstack)) { /* Grow again? */
|
||||
ptrdiff_t fsave = savestack(L, frame);
|
||||
L->top = o;
|
||||
lj_state_growstack(L, framesize);
|
||||
frame = restorestack(L, fsave);
|
||||
o = L->top;
|
||||
}
|
||||
fs = o + framesize;
|
||||
if (s == 0) /* Only partially clear tail call frame at #0. */
|
||||
o = &frame[nslots];
|
||||
while (o < fs) /* Clear slots of newly added frames. */
|
||||
setnilV(o++);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
lua_assert(!newbase);
|
||||
}
|
||||
}
|
||||
if (newbase) L->base = newbase;
|
||||
if (ntop >= L->maxstack) { /* Need to grow the stack again. */
|
||||
MSize need = (MSize)(ntop - o);
|
||||
L->top = o;
|
||||
lj_state_growstack(L, need);
|
||||
o = L->top;
|
||||
ntop = o + need;
|
||||
}
|
||||
L->top = curr_topL(L);
|
||||
for (; o < ntop; o++) /* Clear remainder of newly added frames. */
|
||||
setnilV(o);
|
||||
lua_assert(map + nslots == flinks-1);
|
||||
J->pc = (const BCIns *)(uintptr_t)(*--flinks);
|
||||
J->pc = snap_pc(*--flinks);
|
||||
lua_assert(map + nent == flinks);
|
||||
}
|
||||
|
||||
#undef IR
|
||||
|
@ -14,6 +14,19 @@ LJ_FUNC void lj_snap_add(jit_State *J);
|
||||
LJ_FUNC void lj_snap_shrink(jit_State *J);
|
||||
LJ_FUNC void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno);
|
||||
LJ_FUNC void lj_snap_restore(jit_State *J, void *exptr);
|
||||
LJ_FUNC void lj_snap_grow_buf_(jit_State *J, MSize need);
|
||||
LJ_FUNC void lj_snap_grow_map_(jit_State *J, MSize need);
|
||||
|
||||
static LJ_AINLINE void lj_snap_grow_buf(jit_State *J, MSize need)
|
||||
{
|
||||
if (LJ_UNLIKELY(need > J->sizesnap)) lj_snap_grow_buf_(J, need);
|
||||
}
|
||||
|
||||
static LJ_AINLINE void lj_snap_grow_map(jit_State *J, MSize need)
|
||||
{
|
||||
if (LJ_UNLIKELY(need > J->sizesnapmap)) lj_snap_grow_map_(J, need);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -161,8 +161,8 @@ void lj_trace_reenableproto(GCproto *pt)
|
||||
static void trace_unpatch(jit_State *J, Trace *T)
|
||||
{
|
||||
BCOp op = bc_op(T->startins);
|
||||
uint32_t pcofs = T->snap[0].mapofs + T->snap[0].nslots;
|
||||
BCIns *pc = ((BCIns *)(uintptr_t)T->snapmap[pcofs]) - 1;
|
||||
MSize pcofs = T->snap[0].mapofs + T->snap[0].nent;
|
||||
BCIns *pc = ((BCIns *)snap_pc(T->snapmap[pcofs])) - 1;
|
||||
switch (op) {
|
||||
case BC_FORL:
|
||||
lua_assert(bc_op(*pc) == BC_JFORI);
|
||||
@ -352,7 +352,6 @@ static void trace_start(jit_State *J)
|
||||
J->cur.ir = J->irbuf;
|
||||
J->cur.snap = J->snapbuf;
|
||||
J->cur.snapmap = J->snapmapbuf;
|
||||
/* J->cur.nsnapmap = 0; */
|
||||
J->mergesnap = 0;
|
||||
J->needsnap = 0;
|
||||
J->guardemit.irt = 0;
|
||||
|
Loading…
Reference in New Issue
Block a user