/* ** Snapshot handling. ** Copyright (C) 2005-2010 Mike Pall. See Copyright Notice in luajit.h */ #define lj_snap_c #define LUA_CORE #include "lj_obj.h" #if LJ_HASJIT #include "lj_gc.h" #include "lj_state.h" #include "lj_frame.h" #include "lj_ir.h" #include "lj_jit.h" #include "lj_iropt.h" #include "lj_trace.h" #include "lj_snap.h" #include "lj_target.h" /* Some local macros to save typing. Undef'd at the end. */ #define IR(ref) (&J->cur.ir[(ref)]) /* -- Snapshot generation ------------------------------------------------- */ /* NYI: Snapshots are in need of a redesign. The current storage model for ** snapshot maps is too wasteful. They could be compressed (1D or 2D) and ** made more flexible at the same time. Iterators should no longer need to ** skip unmodified slots. IR_FRAME should be eliminated, too. */ /* Add all modified slots to the snapshot. */ static void snapshot_slots(jit_State *J, IRRef2 *map, BCReg nslots) { BCReg s; for (s = 0; s < nslots; s++) { IRRef ref = tref_ref(J->slot[s]); if (ref) { IRIns *ir = IR(ref); if (ir->o == IR_SLOAD && ir->op1 == s && !(ir->op2 & IRSLOAD_INHERIT)) ref = 0; } map[s] = (IRRef2)ref; } } /* Add frame links at the end of the snapshot. */ static MSize snapshot_framelinks(jit_State *J, IRRef2 *map) { cTValue *frame = J->L->base - 1; cTValue *lim = J->L->base - J->baseslot; MSize f = 0; map[f++] = u32ptr(J->pc); while (frame > lim) { if (frame_islua(frame)) { map[f++] = u32ptr(frame_pc(frame)); frame = frame_prevl(frame); } else if (frame_ispcall(frame)) { map[f++] = (uint32_t)frame_ftsz(frame); frame = frame_prevd(frame); } else if (frame_iscont(frame)) { map[f++] = (uint32_t)frame_ftsz(frame); map[f++] = u32ptr(frame_contpc(frame)); frame = frame_prevd(frame); } else { lua_assert(0); } } return f; } /* Take a snapshot of the current stack. */ static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap) { BCReg nslots = J->baseslot + J->maxslot; MSize nsm, nframelinks; IRRef2 *p; /* Conservative estimate. Continuation frames need 2 slots. */ nsm = nsnapmap + nslots + (uint32_t)J->framedepth*2+1; if (LJ_UNLIKELY(nsm > J->sizesnapmap)) { /* Need to grow snapshot map? */ if (nsm < 2*J->sizesnapmap) nsm = 2*J->sizesnapmap; else if (nsm < 64) nsm = 64; J->snapmapbuf = (IRRef2 *)lj_mem_realloc(J->L, J->snapmapbuf, J->sizesnapmap*sizeof(IRRef2), nsm*sizeof(IRRef2)); J->cur.snapmap = J->snapmapbuf; J->sizesnapmap = nsm; } p = &J->cur.snapmap[nsnapmap]; snapshot_slots(J, p, nslots); nframelinks = snapshot_framelinks(J, p + nslots); J->cur.nsnapmap = (uint16_t)(nsnapmap + nslots + nframelinks); snap->mapofs = (uint16_t)nsnapmap; snap->ref = (IRRef1)J->cur.nins; snap->nslots = (uint8_t)nslots; snap->nframelinks = (uint8_t)nframelinks; snap->count = 0; } /* Add or merge a snapshot. */ void lj_snap_add(jit_State *J) { MSize nsnap = J->cur.nsnap; MSize nsnapmap = J->cur.nsnapmap; /* Merge if no ins. inbetween or if requested and no guard inbetween. */ if (J->mergesnap ? !irt_isguard(J->guardemit) : (nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) { nsnapmap = J->cur.snap[--nsnap].mapofs; } else { /* Need to grow snapshot buffer? */ if (LJ_UNLIKELY(nsnap >= J->sizesnap)) { MSize maxsnap = (MSize)J->param[JIT_P_maxsnap]; if (nsnap >= maxsnap) lj_trace_err(J, LJ_TRERR_SNAPOV); lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot); J->cur.snap = J->snapbuf; } J->cur.nsnap = (uint16_t)(nsnap+1); } J->mergesnap = 0; J->guardemit.irt = 0; snapshot_stack(J, &J->cur.snap[nsnap], nsnapmap); } /* Shrink last snapshot. */ void lj_snap_shrink(jit_State *J) { BCReg nslots = J->baseslot + J->maxslot; SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; IRRef2 *oflinks = &J->cur.snapmap[snap->mapofs + snap->nslots]; IRRef2 *nflinks = &J->cur.snapmap[snap->mapofs + nslots]; uint32_t s, nframelinks = snap->nframelinks; lua_assert(nslots < snap->nslots); snap->nslots = (uint8_t)nslots; J->cur.nsnapmap = (uint16_t)(snap->mapofs + nslots + nframelinks); for (s = 0; s < nframelinks; s++) /* Move frame links down. */ nflinks[s] = oflinks[s]; } /* -- Snapshot access ----------------------------------------------------- */ /* Initialize a Bloom Filter with all renamed refs. ** There are very few renames (often none), so the filter has ** very few bits set. This makes it suitable for negative filtering. */ static BloomFilter snap_renamefilter(Trace *T, SnapNo lim) { BloomFilter rfilt = 0; IRIns *ir; for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--) if (ir->op2 <= lim) bloomset(rfilt, ir->op1); return rfilt; } /* Process matching renames to find the original RegSP. */ static RegSP snap_renameref(Trace *T, SnapNo lim, IRRef ref, RegSP rs) { IRIns *ir; for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--) if (ir->op1 == ref && ir->op2 <= lim) rs = ir->prev; return rs; } /* Convert a snapshot into a linear slot -> RegSP map. */ void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno) { SnapShot *snap = &T->snap[snapno]; BCReg s, nslots = snap->nslots; IRRef2 *map = &T->snapmap[snap->mapofs]; BloomFilter rfilt = snap_renamefilter(T, snapno); for (s = 0; s < nslots; s++) { IRRef ref = snap_ref(map[s]); if (!irref_isk(ref)) { IRIns *ir = &T->ir[ref]; uint32_t rs = ir->prev; if (bloomtest(rfilt, ref)) rs = snap_renameref(T, snapno, ref, rs); rsmap[s] = (uint16_t)rs; } } } /* Restore interpreter state from exit state with the help of a snapshot. */ void lj_snap_restore(jit_State *J, void *exptr) { ExitState *ex = (ExitState *)exptr; SnapNo snapno = J->exitno; /* For now, snapno == exitno. */ Trace *T = J->trace[J->parent]; SnapShot *snap = &T->snap[snapno]; BCReg s, nslots = snap->nslots; IRRef2 *map = &T->snapmap[snap->mapofs]; IRRef2 *flinks = map + nslots + snap->nframelinks; TValue *o, *newbase, *ntop; BloomFilter rfilt = snap_renamefilter(T, snapno); lua_State *L = J->L; /* Make sure the stack is big enough for the slots from the snapshot. */ if (L->base + nslots >= L->maxstack) { L->top = curr_topL(L); lj_state_growstack(L, nslots - curr_proto(L)->framesize); } /* Fill stack slots with data from the registers and spill slots. */ newbase = NULL; ntop = L->base; for (s = 0, o = L->base-1; s < nslots; s++, o++) { IRRef ref = snap_ref(map[s]); if (ref) { IRIns *ir = &T->ir[ref]; if (irref_isk(ref)) { /* Restore constant slot. */ lj_ir_kvalue(L, o, ir); } else { IRType1 t = ir->t; RegSP rs = ir->prev; if (LJ_UNLIKELY(bloomtest(rfilt, ref))) rs = snap_renameref(T, snapno, ref, rs); if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */ int32_t *sps = &ex->spill[regsp_spill(rs)]; if (irt_isinteger(t)) { setintV(o, *sps); } else if (irt_isnum(t)) { o->u64 = *(uint64_t *)sps; } else { lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ setgcrefi(o->gcr, *sps); setitype(o, irt_toitype(t)); } } else if (ra_hasreg(regsp_reg(rs))) { /* Restore from register. */ Reg r = regsp_reg(rs); if (irt_isinteger(t)) { setintV(o, ex->gpr[r-RID_MIN_GPR]); } else if (irt_isnum(t)) { setnumV(o, ex->fpr[r-RID_MIN_FPR]); } else { if (!irt_ispri(t)) setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]); setitype(o, irt_toitype(t)); } } else { /* Restore frame slot. */ lua_assert(ir->o == IR_FRAME); /* This works for both PTR and FUNC IR_FRAME. */ setgcrefp(o->fr.func, mref(T->ir[ir->op2].ptr, void)); if (s != 0) /* Do not overwrite link to previous frame. */ o->fr.tp.ftsz = (int32_t)*--flinks; if (irt_isfunc(ir->t)) { GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr)); if (isluafunc(fn)) { TValue *fs; fs = o+1 + funcproto(fn)->framesize; if (fs > ntop) ntop = fs; /* Update top for newly added frames. */ if (s != 0) newbase = o+1; } } } } } else if (newbase) { setnilV(o); /* Clear unreferenced slots of newly added frames. */ } } if (newbase) L->base = newbase; if (ntop >= L->maxstack) { /* Need to grow the stack again. */ MSize need = (MSize)(ntop - o); L->top = o; lj_state_growstack(L, need); o = L->top; ntop = o + need; } L->top = curr_topL(L); for (; o < ntop; o++) /* Clear remainder of newly added frames. */ setnilV(o); lua_assert(map + nslots == flinks-1); J->pc = (const BCIns *)(uintptr_t)(*--flinks); } #undef IR #endif