diff --git a/src/lj_asm.c b/src/lj_asm.c index b3656e00..3912bbeb 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -2467,14 +2467,15 @@ static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base) SnapEntry *map = &as->T->snapmap[snap->mapofs]; MSize n, nent = snap->nent; for (n = 0; n < nent; n++) { - IRRef ref = snap_ref(map[n]); + SnapEntry sn = map[n]; + IRRef ref = snap_ref(sn); + /* NYI: sync the frame, bump base, set topslot, clear new slots. */ + if ((sn & (SNAP_CONT|SNAP_FRAME))) + lj_trace_err(as->J, LJ_TRERR_NYIGCF); if (!irref_isk(ref)) { - int32_t ofs = 8*(int32_t)(snap_slot(map[n])-1); IRIns *ir = IR(ref); - if (ir->o == IR_FRAME) { - /* NYI: sync the frame, bump base, set topslot, clear new slots. */ - lj_trace_err(as->J, LJ_TRERR_NYIGCF); - } else if (irt_isgcv(ir->t)) { + if (irt_isgcv(ir->t)) { + int32_t ofs = 8*(int32_t)(snap_slot(sn)-1); Reg src = ra_alloc1(as, ref, allow); emit_movtomro(as, src, base, ofs); emit_movmroi(as, base, ofs+4, irt_toitype(ir->t)); @@ -2975,17 +2976,16 @@ static void asm_tail_sync(ASMState *as) /* Must check all frames to find topslot (outer can be larger than inner). */ for (n = 0; n < nent; n++) { - IRRef ref = snap_ref(map[n]); - BCReg s = snap_slot(map[n]); - if (!irref_isk(ref)) { - IRIns *ir = IR(ref); - if (ir->o == IR_FRAME && irt_isfunc(ir->t)) { - GCfunc *fn = ir_kfunc(IR(ir->op2)); - if (isluafunc(fn)) { - BCReg fs = s + funcproto(fn)->framesize; - if (fs > topslot) topslot = fs; - newbase = s; - } + SnapEntry sn = map[n]; + if ((sn & SNAP_FRAME)) { + IRIns *ir = IR(snap_ref(sn)); + GCfunc *fn = ir_kfunc(IR(ir->op2)); + lua_assert(ir->o == IR_FRAME && irt_isfunc(ir->t)); + if (isluafunc(fn)) { + BCReg s = snap_slot(sn); + BCReg fs = s + funcproto(fn)->framesize; + if (fs > topslot) topslot = fs; + newbase = s; } } } diff --git a/src/lj_ir.h b/src/lj_ir.h index efc8205e..672aca4a 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h @@ -422,18 +422,29 @@ enum { #define irref_isk(ref) ((ref) < REF_BIAS) -/* Tagged IR references. */ +/* Tagged IR references (32 bit). +** +** +-------+-------+---------------+ +** | irt | flags | ref | +** +-------+-------+---------------+ +** +** The tag holds a copy of the IRType and speeds up IR type checks. +*/ typedef uint32_t TRef; -#define TREF(ref, t) (cast(TRef, (ref) + ((t)<<16))) +#define TREF_REFMASK 0x0000ffff +#define TREF_FRAME 0x00010000 +#define TREF_CONT 0x00020000 -#define tref_ref(tr) (cast(IRRef1, (tr))) -#define tref_t(tr) (cast(IRType, (tr)>>16)) -#define tref_type(tr) (cast(IRType, ((tr)>>16) & IRT_TYPE)) +#define TREF(ref, t) ((TRef)((ref) + ((t)<<24))) + +#define tref_ref(tr) ((IRRef1)(tr)) +#define tref_t(tr) ((IRType)((tr)>>24)) +#define tref_type(tr) ((IRType)(((tr)>>24) & IRT_TYPE)) #define tref_typerange(tr, first, last) \ - ((((tr)>>16) & IRT_TYPE) - (TRef)(first) <= (TRef)(last-first)) + ((((tr)>>24) & IRT_TYPE) - (TRef)(first) <= (TRef)(last-first)) -#define tref_istype(tr, t) (((tr) & (IRT_TYPE<<16)) == ((t)<<16)) +#define tref_istype(tr, t) (((tr) & (IRT_TYPE<<24)) == ((t)<<24)) #define tref_isnil(tr) (tref_istype((tr), IRT_NIL)) #define tref_isfalse(tr) (tref_istype((tr), IRT_FALSE)) #define tref_istrue(tr) (tref_istype((tr), IRT_TRUE)) diff --git a/src/lj_jit.h b/src/lj_jit.h index 1a1e407a..35595cd5 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -123,9 +123,14 @@ typedef struct SnapShot { /* Compressed snapshot entry. */ typedef uint32_t SnapEntry; -#define SNAP_FRAME 0x010000 /* Slot has frame link. */ +#define SNAP_FRAME 0x010000 /* Frame slot. */ +#define SNAP_CONT 0x020000 /* Continuation slot. */ +LJ_STATIC_ASSERT(SNAP_FRAME == TREF_FRAME); +LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT); -#define SNAP(slot, flags, ref) ((SnapEntry)((slot) << 24) + (flags) + (ref)) +#define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref)) +#define SNAP_TR(slot, tr) \ + (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK))) #define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc)) #define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz)) #define snap_ref(sn) ((sn) & 0xffff) diff --git a/src/lj_record.c b/src/lj_record.c index c14a9e86..94ea42ed 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -424,7 +424,7 @@ static BCReg rec_mm_prep(jit_State *J, ASMFunction cont) #else trcont = lj_ir_kptr(J, (void *)cont); #endif - J->base[top] = emitir(IRTG(IR_FRAME, IRT_PTR), trcont, trcont); + J->base[top] = emitir(IRTG(IR_FRAME, IRT_PTR), trcont, trcont) | TREF_CONT; for (s = J->maxslot; s < top; s++) J->base[s] = TREF_NIL; return top+1; @@ -1608,7 +1608,7 @@ static int rec_call(jit_State *J, BCReg func, int cres, int nargs) } /* Specialize to the runtime value of the called function. */ - res[0] = emitir(IRTG(IR_FRAME, IRT_FUNC), res[0], lj_ir_kfunc(J, rd.fn)); + res[0] = emitir(IRTG(IR_FRAME, IRT_FUNC), res[0], lj_ir_kfunc(J, rd.fn)) | TREF_FRAME; if (isluafunc(rd.fn)) { /* Record call to Lua function. */ GCproto *pt = funcproto(rd.fn); @@ -2164,8 +2164,9 @@ static void rec_setup_side(jit_State *J, Trace *T) BloomFilter seen = 0; /* Emit IR for slots inherited from parent snapshot. */ for (n = 0; n < nent; n++) { - IRRef ref = snap_ref(map[n]); - BCReg s = snap_slot(map[n]); + SnapEntry sn = map[n]; + IRRef ref = snap_ref(sn); + BCReg s = snap_slot(sn); IRIns *ir = &T->ir[ref]; TRef tr; /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */ @@ -2196,10 +2197,10 @@ static void rec_setup_side(jit_State *J, Trace *T) J->framedepth++; } tr = lj_ir_kfunc(J, ir_kfunc(&T->ir[ir->op2])); - tr = emitir_raw(IRT(IR_FRAME, IRT_FUNC), tr, tr); + tr = emitir_raw(IRT(IR_FRAME, IRT_FUNC), tr, tr) | TREF_FRAME; } else { tr = lj_ir_kptr(J, mref(T->ir[ir->op2].ptr, void)); - tr = emitir_raw(IRT(IR_FRAME, IRT_PTR), tr, tr); + tr = emitir_raw(IRT(IR_FRAME, IRT_PTR), tr, tr) | TREF_CONT; } break; case IR_SLOAD: /* Inherited SLOADs don't need a guard or type check. */ diff --git a/src/lj_snap.c b/src/lj_snap.c index d22c90a4..731b8f92 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c @@ -50,20 +50,19 @@ void lj_snap_grow_map_(jit_State *J, MSize need) /* -- Snapshot generation ------------------------------------------------- */ -/* NYI: IR_FRAME should be eliminated, too. */ - /* Add all modified slots to the snapshot. */ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) { BCReg s; MSize n = 0; for (s = 0; s < nslots; s++) { - IRRef ref = tref_ref(J->slot[s]); + TRef tr = J->slot[s]; + IRRef ref = tref_ref(tr); if (ref) { IRIns *ir = IR(ref); if (!(ir->o == IR_SLOAD && ir->op1 == s && !(ir->op2 & IRSLOAD_INHERIT))) - map[n++] = SNAP(s, ir->o == IR_FRAME ? SNAP_FRAME : 0, ref); + map[n++] = SNAP_TR(s, tr); } } return n; @@ -226,8 +225,9 @@ void lj_snap_restore(jit_State *J, void *exptr) /* Fill stack slots with data from the registers and spill slots. */ frame = L->base-1; for (n = 0; n < nent; n++) { - IRRef ref = snap_ref(map[n]); - BCReg s = snap_slot(map[n]); + SnapEntry sn = map[n]; + IRRef ref = snap_ref(sn); + BCReg s = snap_slot(sn); TValue *o = &frame[s]; /* Stack slots are relative to start frame. */ IRIns *ir = &T->ir[ref]; if (irref_isk(ref)) { /* Restore constant slot. */ @@ -260,6 +260,7 @@ void lj_snap_restore(jit_State *J, void *exptr) setitype(o, irt_toitype(t)); } } else { /* Restore frame slot. */ + lua_assert((sn & (SNAP_CONT|SNAP_FRAME))); lua_assert(ir->o == IR_FRAME); /* This works for both PTR and FUNC IR_FRAME. */ setgcrefp(o->fr.func, mref(T->ir[ir->op2].ptr, void));