From 79021951e5b2d80119d86cc850a876eb3faf89cf Mon Sep 17 00:00:00 2001 From: Peter Cawley Date: Sat, 26 Mar 2016 15:37:38 +0000 Subject: [PATCH] LJ_FR2: Improve trace recording and snapshots. The interesting changes here revolve around slots marked as TREF_FRAME / TREF_CONT. Under !LJ_FR2, said slots contain two 32-bit values, and the TRef for the slot primarily relates to the low 32 bits. In a snapshot, the main SnapEntry relates to the low 32 bits, and the framelink from the snapshot is used to restore the high 32 bits. Under LJ_FR2, TREF_FRAME / TREF_CONT slots contain a single 64-bit value. The TRef relates to all 64 bits, the SnapEntry is used to restore all 64 bits, and no framelinks are required to restore the slot. Restoration is done via IR_KNUM constants, as the 64-bit values in question can be happily interpreted as denormal numbers. These constants are created lazily: the slots in question get set to just TREF_FRAME / TREF_CONT initially, and then if required for a snapshot, the ref part of the TRef is changed from zero to the index of a KNUM. Slot 1 is always zero, as although it is technically a frame link, it never needs to be changed or saved or restored. Though the framelink part of a snapshot isn't required for slot restoration under LJ_FR2, it is still used for restoring PC. As such, every snapshot has exactly two framelink entries, which are used to store a 64-bit value. Manipulations of J->maxslot are more interesting under LJ_FR2. For example, the BC_MOV of a method call can introduce a three-slot gap under LJ_FR2, whereas it could only introduce a one-slot gap under !LJ_FR2. Other instructions can now introduce a one-slot gap where previously they wouldn't ever introduce a gap. --- src/jit/dump.lua | 10 ++- src/lj_arch.h | 2 +- src/lj_asm.c | 13 ++- src/lj_asm_x86.h | 22 ++++- src/lj_crecord.c | 26 ++++-- src/lj_def.h | 2 + src/lj_ffrecord.c | 48 ++++++---- src/lj_ir.h | 2 +- src/lj_jit.h | 4 + src/lj_record.c | 225 ++++++++++++++++++++++++++++++---------------- src/lj_snap.c | 75 ++++++++++++---- 11 files changed, 298 insertions(+), 131 deletions(-) diff --git a/src/jit/dump.lua b/src/jit/dump.lua index 9a722f73..a635af10 100644 --- a/src/jit/dump.lua +++ b/src/jit/dump.lua @@ -310,15 +310,17 @@ local function fmtfunc(func, pc) end end -local function formatk(tr, idx) +local function formatk(tr, idx, sn) local k, t, slot = tracek(tr, idx) local tn = type(k) local s if tn == "number" then - if k == 2^52+2^51 then + if band(sn or 0, 0x30000) ~= 0 then + s = band(sn, 0x20000) ~= 0 and "contpc" or "ftsz" + elseif k == 2^52+2^51 then s = "bias" else - s = format("%+.14g", k) + s = format(0 < k and k < 0x1p-1026 and "%+a" or "%+.14g", k) end elseif tn == "string" then s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub)) @@ -354,7 +356,7 @@ local function printsnap(tr, snap) n = n + 1 local ref = band(sn, 0xffff) - 0x8000 -- REF_BIAS if ref < 0 then - out:write(formatk(tr, ref)) + out:write(formatk(tr, ref, sn)) elseif band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM out:write(colorize(format("%04d/%04d", ref, ref+1), 14)) else diff --git a/src/lj_arch.h b/src/lj_arch.h index 612c7303..72622a21 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -453,7 +453,7 @@ #endif /* Disable or enable the JIT compiler. */ -#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) || LJ_FR2 || LJ_GC64 +#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) || LJ_GC64 #define LJ_HASJIT 0 #else #define LJ_HASJIT 1 diff --git a/src/lj_asm.c b/src/lj_asm.c index 0f723479..9e6f6576 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -1893,7 +1893,7 @@ static BCReg asm_baseslot(ASMState *as, SnapShot *snap, int *gotframe) SnapEntry sn = map[n-1]; if ((sn & SNAP_FRAME)) { *gotframe = 1; - return snap_slot(sn); + return snap_slot(sn) - LJ_FR2; } } return 0; @@ -1913,8 +1913,15 @@ static void asm_tail_link(ASMState *as) if (as->T->link == 0) { /* Setup fixed registers for exit to interpreter. */ - const BCIns *pc = snap_pc(as->T->snapmap[snap->mapofs + snap->nent]); int32_t mres; +#if LJ_FR2 + uint64_t pcbase; + const BCIns *pc; + memcpy(&pcbase, &as->T->snapmap[snap->mapofs + snap->nent], 8); + pc = (const BCIns *)(pcbase >> 8); +#else + const BCIns *pc = snap_pc(as->T->snapmap[snap->mapofs + snap->nent]); +#endif if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */ BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins; if (bc_isret(bc_op(*retpc))) @@ -1922,7 +1929,7 @@ static void asm_tail_link(ASMState *as) } ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); ra_allockreg(as, i32ptr(pc), RID_LPC); - mres = (int32_t)(snap->nslots - baseslot); + mres = (int32_t)(snap->nslots - baseslot - LJ_FR2); switch (bc_op(*pc)) { case BC_CALLM: case BC_CALLMT: mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break; diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index a7e8a79c..1f1b9d9d 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -348,7 +348,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) && noconflict(as, ref, IR_RETF, 0)) { as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow); - as->mrm.ofs = 8*((int32_t)ir->op1-1) + ((ir->op2&IRSLOAD_FRAME)?4:0); + as->mrm.ofs = 8*((int32_t)ir->op1-1) + (!LJ_FR2&&(ir->op2&IRSLOAD_FRAME)?4:0); as->mrm.idx = RID_NONE; return RID_MRM; } @@ -655,6 +655,9 @@ static void asm_callx(ASMState *as, IRIns *ir) static void asm_retf(ASMState *as, IRIns *ir) { Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); +#if LJ_FR2 + Reg rpc = ra_scratch(as, rset_exclude(RSET_GPR, base)); +#endif void *pc = ir_kptr(IR(ir->op2)); int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); as->topslot -= (BCReg)delta; @@ -663,7 +666,12 @@ static void asm_retf(ASMState *as, IRIns *ir) emit_setgl(as, base, jit_base); emit_addptr(as, base, -8*delta); asm_guardcc(as, CC_NE); +#if LJ_FR2 + emit_rmro(as, XO_CMP, rpc, base, -8); + emit_loadu64(as, rpc, u64ptr(pc)); +#else emit_gmroi(as, XG_ARITHi(XOg_CMP), base, -4, ptr2addr(pc)); +#endif } /* -- Type conversions ---------------------------------------------------- */ @@ -1400,7 +1408,11 @@ static void asm_ahustore(ASMState *as, IRIns *ir) static void asm_sload(ASMState *as, IRIns *ir) { - int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); +#if LJ_FR2 + int32_t ofs = 8*((int32_t)ir->op1-2); +#else + int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2&IRSLOAD_FRAME)?4:0); +#endif IRType1 t = ir->t; Reg base; lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ @@ -2386,13 +2398,13 @@ static void asm_stack_check(ASMState *as, BCReg topslot, static void asm_stack_restore(ASMState *as, SnapShot *snap) { SnapEntry *map = &as->T->snapmap[snap->mapofs]; - SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1]; + SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; MSize n, nent = snap->nent; /* Store the value of all modified slots to the Lua stack. */ for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; BCReg s = snap_slot(sn); - int32_t ofs = 8*((int32_t)s-1); + int32_t ofs = 8*((int32_t)s-1-LJ_FR2); IRRef ref = snap_ref(sn); IRIns *ir = IR(ref); if ((sn & SNAP_NORESTORE)) @@ -2410,8 +2422,10 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) emit_movmroi(as, RID_BASE, ofs, ir->i); } if ((sn & (SNAP_CONT|SNAP_FRAME))) { +#if !LJ_FR2 if (s != 0) /* Do not overwrite link to previous frame. */ emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--)); +#endif } else { if (!(LJ_64 && irt_islightud(ir->t))) emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); diff --git a/src/lj_crecord.c b/src/lj_crecord.c index c0f7e3d7..61dbc3f0 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c @@ -721,7 +721,11 @@ static void crec_index_meta(jit_State *J, CTState *cts, CType *ct, if (!tv) lj_trace_err(J, LJ_TRERR_BADTYPE); if (tvisfunc(tv)) { - J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; + J->base[-1-LJ_FR2] = lj_ir_kfunc(J, funcV(tv)); +#if LJ_FR2 + J->base[-1] = 0; +#endif + J->base[-1] |= TREF_FRAME; rd->nres = -1; /* Pending tailcall. */ } else if (rd->data == 0 && tvistab(tv) && tref_isstr(J->base[1])) { /* Specialize to result of __index lookup. */ @@ -1119,20 +1123,20 @@ static void crec_snap_caller(jit_State *J) lua_State *L = J->L; TValue *base = L->base, *top = L->top; const BCIns *pc = J->pc; - TRef ftr = J->base[-1]; + TRef ftr = J->base[-1-LJ_FR2]; ptrdiff_t delta; if (!frame_islua(base-1) || J->framedepth <= 0) lj_trace_err(J, LJ_TRERR_NYICALL); J->pc = frame_pc(base-1); delta = 1+LJ_FR2+bc_a(J->pc[-1]); L->top = base; L->base = base - delta; - J->base[-1] = TREF_FALSE; + J->base[-1-LJ_FR2] = TREF_FALSE; J->base -= delta; J->baseslot -= (BCReg)delta; - J->maxslot = (BCReg)delta; J->framedepth--; + J->maxslot = (BCReg)delta-LJ_FR2; J->framedepth--; lj_snap_add(J); L->base = base; L->top = top; J->framedepth++; J->maxslot = 1; J->base += delta; J->baseslot += (BCReg)delta; - J->base[-1] = ftr; J->pc = pc; + J->base[-1-LJ_FR2] = ftr; J->pc = pc; } /* Record function call. */ @@ -1224,7 +1228,11 @@ void LJ_FASTCALL recff_cdata_call(jit_State *J, RecordFFData *rd) tv = lj_ctype_meta(cts, ctype_isptr(ct->info) ? ctype_cid(ct->info) : id, mm); if (tv) { if (tvisfunc(tv)) { - J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; + J->base[-1-LJ_FR2] = lj_ir_kfunc(J, funcV(tv)); +#if LJ_FR2 + J->base[-1] = 0; +#endif + J->base[-1] |= TREF_FRAME; rd->nres = -1; /* Pending tailcall. */ return; } @@ -1373,7 +1381,11 @@ static TRef crec_arith_meta(jit_State *J, TRef *sp, CType **s, CTState *cts, } if (tv) { if (tvisfunc(tv)) { - J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; + J->base[-1-LJ_FR2] = lj_ir_kfunc(J, funcV(tv)); +#if LJ_FR2 + J->base[-1] = 0; +#endif + J->base[-1] |= TREF_FRAME; rd->nres = -1; /* Pending tailcall. */ return 0; } /* NYI: non-function metamethods. */ diff --git a/src/lj_def.h b/src/lj_def.h index 29d3fdda..9413399d 100644 --- a/src/lj_def.h +++ b/src/lj_def.h @@ -95,6 +95,8 @@ typedef unsigned int uintptr_t; #define U64x(hi, lo) (((uint64_t)0x##hi << 32) + (uint64_t)0x##lo) #define i32ptr(p) ((int32_t)(intptr_t)(void *)(p)) #define u32ptr(p) ((uint32_t)(intptr_t)(void *)(p)) +#define i64ptr(p) ((int64_t)(intptr_t)(void *)(p)) +#define u64ptr(p) ((uint64_t)(intptr_t)(void *)(p)) #define checki8(x) ((x) == (int32_t)(int8_t)(x)) #define checku8(x) ((x) == (int32_t)(uint8_t)(x)) diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index 2dc41790..9d27e9f5 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c @@ -102,34 +102,41 @@ static void recff_stitch(jit_State *J) ASMFunction cont = lj_cont_stitch; lua_State *L = J->L; TValue *base = L->base; + BCReg nslot = J->maxslot + 1 + LJ_FR2; + TValue *nframe = base + 1 + LJ_FR2; const BCIns *pc = frame_pc(base-1); TValue *pframe = frame_prevl(base-1); - lua_assert(!LJ_FR2); /* TODO_FR2: handle frame shift. */ /* Move func + args up in Lua stack and insert continuation. */ - memmove(&base[1], &base[-1], sizeof(TValue)*(J->maxslot+1)); - setframe_ftsz(base+1, ((char *)(base+1) - (char *)pframe) + FRAME_CONT); - setcont(base, cont); + memmove(&base[1], &base[-1-LJ_FR2], sizeof(TValue)*nslot); + setframe_ftsz(nframe, ((char *)nframe - (char *)pframe) + FRAME_CONT); + setcont(base-LJ_FR2, cont); setframe_pc(base, pc); - setnilV(base-1); /* Incorrect, but rec_check_slots() won't run anymore. */ - L->base += 2; - L->top += 2; + setnilV(base-1-LJ_FR2); /* Incorrect, but rec_check_slots() won't run. */ + L->base += 2 + LJ_FR2; + L->top += 2 + LJ_FR2; /* Ditto for the IR. */ - memmove(&J->base[1], &J->base[-1], sizeof(TRef)*(J->maxslot+1)); + memmove(&J->base[1], &J->base[-1-LJ_FR2], sizeof(TRef)*nslot); +#if LJ_FR2 + J->base[2] = TREF_FRAME; + J->base[-1] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont))); + J->base[0] = lj_ir_k64(J, IR_KNUM, u64ptr(pc)) | TREF_CONT; +#else J->base[0] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT; - J->ktrace = tref_ref((J->base[-1] = lj_ir_ktrace(J))); - J->base += 2; - J->baseslot += 2; +#endif + J->ktrace = tref_ref((J->base[-1-LJ_FR2] = lj_ir_ktrace(J))); + J->base += 2 + LJ_FR2; + J->baseslot += 2 + LJ_FR2; J->framedepth++; lj_record_stop(J, LJ_TRLINK_STITCH, 0); /* Undo Lua stack changes. */ - memmove(&base[-1], &base[1], sizeof(TValue)*(J->maxslot+1)); + memmove(&base[-1-LJ_FR2], &base[1], sizeof(TValue)*nslot); setframe_pc(base-1, pc); - L->base -= 2; - L->top -= 2; + L->base -= 2 + LJ_FR2; + L->top -= 2 + LJ_FR2; } /* Fallback handler for fast functions that are not recorded (yet). */ @@ -372,10 +379,10 @@ static int recff_metacall(jit_State *J, RecordFFData *rd, MMS mm) int errcode; TValue argv0; /* Temporarily insert metamethod below object. */ - J->base[1] = J->base[0]; + J->base[1+LJ_FR2] = J->base[0]; J->base[0] = ix.mobj; copyTV(J->L, &argv0, &rd->argv[0]); - copyTV(J->L, &rd->argv[1], &rd->argv[0]); + copyTV(J->L, &rd->argv[1+LJ_FR2], &rd->argv[0]); copyTV(J->L, &rd->argv[0], &ix.mobjv); /* Need to protect lj_record_tailcall because it may throw. */ errcode = lj_vm_cpcall(J->L, NULL, J, recff_metacall_cp); @@ -442,6 +449,10 @@ static void LJ_FASTCALL recff_xpairs(jit_State *J, RecordFFData *rd) static void LJ_FASTCALL recff_pcall(jit_State *J, RecordFFData *rd) { if (J->maxslot >= 1) { +#if LJ_FR2 + /* Shift function arguments up. */ + memmove(J->base + 1, J->base, sizeof(TRef) * J->maxslot); +#endif lj_record_call(J, 0, J->maxslot - 1); rd->nres = -1; /* Pending call. */ } /* else: Interpreter will throw. */ @@ -461,13 +472,16 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd) TValue argv0, argv1; TRef tmp; int errcode; - lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ /* Swap function and traceback. */ tmp = J->base[0]; J->base[0] = J->base[1]; J->base[1] = tmp; copyTV(J->L, &argv0, &rd->argv[0]); copyTV(J->L, &argv1, &rd->argv[1]); copyTV(J->L, &rd->argv[0], &argv1); copyTV(J->L, &rd->argv[1], &argv0); +#if LJ_FR2 + /* Shift function arguments up. */ + memmove(J->base + 2, J->base + 1, sizeof(TRef) * (J->maxslot-1)); +#endif /* Need to protect lj_record_call because it may throw. */ errcode = lj_vm_cpcall(J->L, NULL, J, recff_xpcall_cp); /* Always undo Lua stack swap to avoid confusing the interpreter. */ diff --git a/src/lj_ir.h b/src/lj_ir.h index 68811a21..da365cee 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h @@ -220,7 +220,7 @@ IRFLDEF(FLENUM) /* SLOAD mode bits, stored in op2. */ #define IRSLOAD_PARENT 0x01 /* Coalesce with parent trace. */ -#define IRSLOAD_FRAME 0x02 /* Load hiword of frame. */ +#define IRSLOAD_FRAME 0x02 /* Load 32 bits of ftsz. */ #define IRSLOAD_TYPECHECK 0x04 /* Needs type check. */ #define IRSLOAD_CONVERT 0x08 /* Number to integer conversion. */ #define IRSLOAD_READONLY 0x10 /* Read-only, omit slot store. */ diff --git a/src/lj_jit.h b/src/lj_jit.h index 2c00bfa2..5797e131 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -179,12 +179,16 @@ LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT); #define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref)) #define SNAP_TR(slot, tr) \ (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK))) +#if !LJ_FR2 #define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc)) +#endif #define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz)) #define snap_ref(sn) ((sn) & 0xffff) #define snap_slot(sn) ((BCReg)((sn) >> 24)) #define snap_isframe(sn) ((sn) & SNAP_FRAME) +#if !LJ_FR2 #define snap_pc(sn) ((const BCIns *)(uintptr_t)(sn)) +#endif #define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref)) /* Snapshot and exit numbers. */ diff --git a/src/lj_record.c b/src/lj_record.c index 4d69c669..1710b52d 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -87,8 +87,8 @@ static void rec_check_slots(jit_State *J) BCReg s, nslots = J->baseslot + J->maxslot; int32_t depth = 0; cTValue *base = J->L->base - J->baseslot; - lua_assert(J->baseslot >= 1 && J->baseslot < LJ_MAX_JSLOTS); - lua_assert(J->baseslot == 1 || (J->slot[J->baseslot-1] & TREF_FRAME)); + lua_assert(J->baseslot >= 1+LJ_FR2 && J->baseslot < LJ_MAX_JSLOTS); + lua_assert(J->baseslot == 1+LJ_FR2 || (J->slot[J->baseslot-1] & TREF_FRAME)); lua_assert(nslots < LJ_MAX_JSLOTS); for (s = 0; s < nslots; s++) { TRef tr = J->slot[s]; @@ -96,21 +96,39 @@ static void rec_check_slots(jit_State *J) cTValue *tv = &base[s]; IRRef ref = tref_ref(tr); IRIns *ir; - lua_assert(ref >= J->cur.nk && ref < J->cur.nins); - ir = IR(ref); - lua_assert(irt_t(ir->t) == tref_t(tr)); + if (!LJ_FR2 || ref || !(tr & (TREF_FRAME | TREF_CONT))) { + lua_assert(ref >= J->cur.nk && ref < J->cur.nins); + ir = IR(ref); + lua_assert(irt_t(ir->t) == tref_t(tr)); + } if (s == 0) { lua_assert(tref_isfunc(tr)); +#if LJ_FR2 + } else if (s == 1) { + lua_assert(0); +#endif } else if ((tr & TREF_FRAME)) { GCfunc *fn = gco2func(frame_gc(tv)); BCReg delta = (BCReg)(tv - frame_prev(tv)); +#if LJ_FR2 + if (ref) + lua_assert(ir_knum(ir)->u64 == tv->u64); + tr = J->slot[s-1]; + ir = IR(tref_ref(tr)); +#endif lua_assert(tref_isfunc(tr)); if (tref_isk(tr)) lua_assert(fn == ir_kfunc(ir)); - lua_assert(s > delta ? (J->slot[s-delta] & TREF_FRAME) : (s == delta)); + lua_assert(s > delta + LJ_FR2 ? (J->slot[s-delta] & TREF_FRAME) + : (s == delta + LJ_FR2)); depth++; } else if ((tr & TREF_CONT)) { +#if LJ_FR2 + if (ref) + lua_assert(ir_knum(ir)->u64 == tv->u64); +#else lua_assert(ir_kptr(ir) == gcrefp(tv->gcr, void)); - lua_assert((J->slot[s+1] & TREF_FRAME)); +#endif + lua_assert((J->slot[s+1+LJ_FR2] & TREF_FRAME)); depth++; } else { if (tvisnumber(tv)) @@ -162,10 +180,10 @@ static TRef sload(jit_State *J, int32_t slot) /* Get TRef for current function. */ static TRef getcurrf(jit_State *J) { - if (J->base[-1]) - return J->base[-1]; - lua_assert(J->baseslot == 1); - return sloadt(J, -1, IRT_FUNC, IRSLOAD_READONLY); + if (J->base[-1-LJ_FR2]) + return J->base[-1-LJ_FR2]; + lua_assert(J->baseslot == 1+LJ_FR2); + return sloadt(J, -1-LJ_FR2, IRT_FUNC, IRSLOAD_READONLY); } /* Compare for raw object equality. @@ -509,7 +527,6 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl) static LoopEvent rec_iterl(jit_State *J, const BCIns iterins) { BCReg ra = bc_a(iterins); - lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ if (!tref_isnil(getslot(J, ra))) { /* Looping back? */ J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */ J->maxslot = ra-1+bc_b(J->pc[-1]); @@ -680,20 +697,27 @@ static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs) TValue *functv = &J->L->base[func]; TRef *fbase = &J->base[func]; ptrdiff_t i; - lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ - for (i = 0; i <= nargs; i++) - (void)getslot(J, func+i); /* Ensure func and all args have a reference. */ + (void)getslot(J, func); /* Ensure func has a reference. */ + for (i = 1; i <= nargs; i++) + (void)getslot(J, func+LJ_FR2+i); /* Ensure all args have a reference. */ if (!tref_isfunc(fbase[0])) { /* Resolve __call metamethod. */ ix.tab = fbase[0]; copyTV(J->L, &ix.tabv, functv); if (!lj_record_mm_lookup(J, &ix, MM_call) || !tref_isfunc(ix.mobj)) lj_trace_err(J, LJ_TRERR_NOMM); - for (i = ++nargs; i > 0; i--) /* Shift arguments up. */ - fbase[i] = fbase[i-1]; + for (i = ++nargs; i > LJ_FR2; i--) /* Shift arguments up. */ + fbase[i+LJ_FR2] = fbase[i+LJ_FR2-1]; +#if LJ_FR2 + fbase[2] = fbase[0]; +#endif fbase[0] = ix.mobj; /* Replace function. */ functv = &ix.mobjv; } - fbase[0] = TREF_FRAME | rec_call_specialize(J, funcV(functv), fbase[0]); + fbase[0] = rec_call_specialize(J, funcV(functv), fbase[0]); +#if LJ_FR2 + fbase[1] = 0; +#endif + fbase[LJ_FR2] |= TREF_FRAME; J->maxslot = (BCReg)nargs; } @@ -703,8 +727,8 @@ void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs) rec_call_setup(J, func, nargs); /* Bump frame. */ J->framedepth++; - J->base += func+1; - J->baseslot += func+1; + J->base += func+1+LJ_FR2; + J->baseslot += func+1+LJ_FR2; } /* Record tail call. */ @@ -720,7 +744,11 @@ void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs) func += cbase; } /* Move func + args down. */ - memmove(&J->base[-1], &J->base[func], sizeof(TRef)*(J->maxslot+1)); +#if LJ_FR2 + if (J->baseslot == 2) + J->base[func+1] = 0; +#endif + memmove(&J->base[-1-LJ_FR2], &J->base[func], sizeof(TRef)*(J->maxslot+1+LJ_FR2)); /* Note: the new TREF_FRAME is now at J->base[-1] (even for slot #0). */ /* Tailcalls can form a loop, so count towards the loop unroll limit. */ if (++J->tailcalled > J->loopunroll) @@ -763,7 +791,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) BCReg cbase = (BCReg)frame_delta(frame); if (--J->framedepth < 0) lj_trace_err(J, LJ_TRERR_NYIRETL); - lua_assert(J->baseslot > 1); + lua_assert(J->baseslot > 1+LJ_FR2); gotresults++; rbase += cbase; J->baseslot -= (BCReg)cbase; @@ -787,7 +815,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) BCReg cbase = (BCReg)frame_delta(frame); if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */ lj_trace_err(J, LJ_TRERR_NYIRETL); - lua_assert(J->baseslot > 1); + lua_assert(J->baseslot > 1+LJ_FR2); rbase += cbase; J->baseslot -= (BCReg)cbase; J->base -= cbase; @@ -797,8 +825,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) BCIns callins = *(frame_pc(frame)-1); ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults; BCReg cbase = bc_a(callins); - GCproto *pt = funcproto(frame_func(frame - (cbase+1-LJ_FR2))); - lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame teardown. */ + GCproto *pt = funcproto(frame_func(frame - (cbase+1+LJ_FR2))); if ((pt->flags & PROTO_NOJIT)) lj_trace_err(J, LJ_TRERR_CJITOFF); if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) { @@ -811,13 +838,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) lj_snap_add(J); } for (i = 0; i < nresults; i++) /* Adjust results. */ - J->base[i-1] = i < gotresults ? J->base[rbase+i] : TREF_NIL; + J->base[i-1-LJ_FR2] = i < gotresults ? J->base[rbase+i] : TREF_NIL; J->maxslot = cbase+(BCReg)nresults; if (J->framedepth > 0) { /* Return to a frame that is part of the trace. */ J->framedepth--; - lua_assert(J->baseslot > cbase+1); - J->baseslot -= cbase+1; - J->base -= cbase+1; + lua_assert(J->baseslot > cbase+1+LJ_FR2); + J->baseslot -= cbase+1+LJ_FR2; + J->base -= cbase+1+LJ_FR2; } else if (J->parent == 0 && J->exitno == 0 && !bc_isret(bc_op(J->cur.startins))) { /* Return to lower frame would leave the loop in a root trace. */ @@ -827,13 +854,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) } else { /* Return to lower frame. Guard for the target we return to. */ TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); TRef trpc = lj_ir_kptr(J, (void *)frame_pc(frame)); - emitir(IRTG(IR_RETF, IRT_P32), trpt, trpc); + emitir(IRTG(IR_RETF, IRT_PGC), trpt, trpc); J->retdepth++; J->needsnap = 1; - lua_assert(J->baseslot == 1); + lua_assert(J->baseslot == 1+LJ_FR2); /* Shift result slots up and clear the slots of the new frame below. */ - memmove(J->base + cbase, J->base-1, sizeof(TRef)*nresults); - memset(J->base-1, 0, sizeof(TRef)*(cbase+1)); + memmove(J->base + cbase, J->base-1-LJ_FR2, sizeof(TRef)*nresults); + memset(J->base-1-LJ_FR2, 0, sizeof(TRef)*(cbase+1+LJ_FR2)); } } else if (frame_iscont(frame)) { /* Return to continuation frame. */ ASMFunction cont = frame_contf(frame); @@ -842,32 +869,39 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) lj_trace_err(J, LJ_TRERR_NYIRETL); J->baseslot -= (BCReg)cbase; J->base -= cbase; - J->maxslot = cbase-2; + J->maxslot = cbase-(2<base[dst] = gotresults ? J->base[cbase+rbase] : TREF_NIL; - if (dst >= J->maxslot) J->maxslot = dst+1; + if (dst >= J->maxslot) { + J->maxslot = dst+1; + } } else if (cont == lj_cont_nop) { /* Nothing to do here. */ } else if (cont == lj_cont_cat) { BCReg bslot = bc_b(*(frame_contpc(frame)-1)); TRef tr = gotresults ? J->base[cbase+rbase] : TREF_NIL; - if (bslot != cbase-2) { /* Concatenate the remainder. */ + if (bslot != J->maxslot) { /* Concatenate the remainder. */ TValue *b = J->L->base, save; /* Simulate lower frame and result. */ - J->base[cbase-2] = tr; - copyTV(J->L, &save, b-2); - if (gotresults) copyTV(J->L, b-2, b+rbase); else setnilV(b-2); + J->base[J->maxslot] = tr; + copyTV(J->L, &save, b-(2<L, b-(2<L->base = b - cbase; - tr = rec_cat(J, bslot, cbase-2); + tr = rec_cat(J, bslot, cbase-(2<L->base + cbase; /* Undo. */ J->L->base = b; - copyTV(J->L, b-2, &save); + copyTV(J->L, b-(2<base[dst] = tr; - if (dst >= J->maxslot) J->maxslot = dst+1; + if (dst >= J->maxslot) { + J->maxslot = dst+1; + } } /* Otherwise continue with another __concat call. */ } else { /* Result type already specialized. */ @@ -876,7 +910,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) } else { lj_trace_err(J, LJ_TRERR_NYIRETL); /* NYI: handle return to C frame. */ } - lua_assert(J->baseslot >= 1); + lua_assert(J->baseslot >= 1+LJ_FR2); } /* -- Metamethod handling ------------------------------------------------- */ @@ -885,11 +919,16 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) static BCReg rec_mm_prep(jit_State *J, ASMFunction cont) { BCReg s, top = cont == lj_cont_cat ? J->maxslot : curr_proto(J->L)->framesize; +#if LJ_FR2 + J->base[top] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont))); + J->base[top+1] = TREF_CONT; +#else J->base[top] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT; +#endif J->framedepth++; for (s = J->maxslot; s < top; s++) J->base[s] = 0; /* Clear frame gap to avoid resurrecting previous refs. */ - return top+1; + return top+1+LJ_FR2; } /* Record metamethod lookup. */ @@ -967,9 +1006,9 @@ static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm) BCReg func = rec_mm_prep(J, mm == MM_concat ? lj_cont_cat : lj_cont_ra); TRef *base = J->base + func; TValue *basev = J->L->base + func; - base[1] = ix->tab; base[2] = ix->key; - copyTV(J->L, basev+1, &ix->tabv); - copyTV(J->L, basev+2, &ix->keyv); + base[1+LJ_FR2] = ix->tab; base[2+LJ_FR2] = ix->key; + copyTV(J->L, basev+1+LJ_FR2, &ix->tabv); + copyTV(J->L, basev+2+LJ_FR2, &ix->keyv); if (!lj_record_mm_lookup(J, ix, mm)) { /* Lookup mm on 1st operand. */ if (mm != MM_unm) { ix->tab = ix->key; @@ -980,8 +1019,10 @@ static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm) lj_trace_err(J, LJ_TRERR_NOMM); } ok: - lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ base[0] = ix->mobj; +#if LJ_FR2 + base[1] = 0; +#endif copyTV(J->L, basev+0, &ix->mobjv); lj_record_call(J, func, 2); return 0; /* No result yet. */ @@ -997,8 +1038,9 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv) BCReg func = rec_mm_prep(J, lj_cont_ra); TRef *base = J->base + func; TValue *basev = J->L->base + func; - lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ base[0] = ix.mobj; copyTV(J->L, basev+0, &ix.mobjv); + base += LJ_FR2; + basev += LJ_FR2; base[1] = tr; copyTV(J->L, basev+1, tv); #if LJ_52 base[2] = tr; copyTV(J->L, basev+2, tv); @@ -1018,11 +1060,10 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv) static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op) { BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt); - TRef *base = J->base + func; - TValue *tv = J->L->base + func; - lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ - base[0] = ix->mobj; base[1] = ix->val; base[2] = ix->key; - copyTV(J->L, tv+0, &ix->mobjv); + TRef *base = J->base + func + LJ_FR2; + TValue *tv = J->L->base + func + LJ_FR2; + base[-LJ_FR2] = ix->mobj; base[1] = ix->val; base[2] = ix->key; + copyTV(J->L, tv-LJ_FR2, &ix->mobjv); copyTV(J->L, tv+1, &ix->valv); copyTV(J->L, tv+2, &ix->keyv); lj_record_call(J, func, 2); @@ -1339,11 +1380,10 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) handlemm: if (tref_isfunc(ix->mobj)) { /* Handle metamethod call. */ BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra); - TRef *base = J->base + func; - TValue *tv = J->L->base + func; - lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ - base[0] = ix->mobj; base[1] = ix->tab; base[2] = ix->key; - setfuncV(J->L, tv+0, funcV(&ix->mobjv)); + TRef *base = J->base + func + LJ_FR2; + TValue *tv = J->L->base + func + LJ_FR2; + base[-LJ_FR2] = ix->mobj; base[1] = ix->tab; base[2] = ix->key; + setfuncV(J->L, tv-LJ_FR2, funcV(&ix->mobjv)); copyTV(J->L, tv+1, &ix->tabv); copyTV(J->L, tv+2, &ix->keyv); if (ix->val) { @@ -1533,7 +1573,11 @@ static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val) goto noconstify; kfunc = lj_ir_kfunc(J, J->fn); emitir(IRTG(IR_EQ, IRT_FUNC), fn, kfunc); +#if LJ_FR2 + J->base[-2] = kfunc; +#else J->base[-1] = TREF_FRAME | kfunc; +#endif fn = kfunc; } tr = lj_record_constify(J, uvval(uvp)); @@ -1638,11 +1682,14 @@ static void rec_func_setup(jit_State *J) static void rec_func_vararg(jit_State *J) { GCproto *pt = J->pt; - BCReg s, fixargs, vframe = J->maxslot+1; + BCReg s, fixargs, vframe = J->maxslot+1+LJ_FR2; lua_assert((pt->flags & PROTO_VARARG)); if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS) lj_trace_err(J, LJ_TRERR_STACKOV); - J->base[vframe-1] = J->base[-1]; /* Copy function up. */ + J->base[vframe-1-LJ_FR2] = J->base[-1-LJ_FR2]; /* Copy function up. */ +#if LJ_FR2 + J->base[vframe-1] = TREF_FRAME; +#endif /* Copy fixarg slots up and set their original slots to nil. */ fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot; for (s = 0; s < fixargs; s++) { @@ -1704,7 +1751,7 @@ static int select_detect(jit_State *J) static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) { int32_t numparams = J->pt->numparams; - ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1; + ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1 - LJ_FR2; lua_assert(frame_isvarg(J->L->base-1)); if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */ ptrdiff_t i; @@ -1716,10 +1763,10 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) J->maxslot = dst + (BCReg)nresults; } for (i = 0; i < nresults; i++) - J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1) : TREF_NIL; + J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1 - LJ_FR2) : TREF_NIL; } else { /* Unknown number of varargs passed to trace. */ - TRef fr = emitir(IRTI(IR_SLOAD), 0, IRSLOAD_READONLY|IRSLOAD_FRAME); - int32_t frofs = 8*(1+numparams)+FRAME_VARG; + TRef fr = emitir(IRTI(IR_SLOAD), LJ_FR2, IRSLOAD_READONLY|IRSLOAD_FRAME); + int32_t frofs = 8*(1+LJ_FR2+numparams)+FRAME_VARG; if (nresults >= 0) { /* Known fixed number of results. */ ptrdiff_t i; if (nvararg > 0) { @@ -1733,7 +1780,7 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr); vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, lj_ir_kint(J, frofs-8)); for (i = 0; i < nload; i++) { - IRType t = itype2irt(&J->L->base[i-1-nvararg]); + IRType t = itype2irt(&J->L->base[i-1-LJ_FR2-nvararg]); TRef aref = emitir(IRT(IR_AREF, IRT_PGC), vbase, lj_ir_kint(J, (int32_t)i)); TRef tr = emitir(IRTG(IR_VLOAD, t), aref, 0); @@ -1781,14 +1828,15 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) if (idx != 0 && idx <= nvararg) { IRType t; TRef aref, vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr); - vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, lj_ir_kint(J, frofs-8)); - t = itype2irt(&J->L->base[idx-2-nvararg]); + vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, + lj_ir_kint(J, frofs-(8<L->base[idx-2-LJ_FR2-nvararg]); aref = emitir(IRT(IR_AREF, IRT_PGC), vbase, tridx); tr = emitir(IRTG(IR_VLOAD, t), aref, 0); if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ } - J->base[dst-2] = tr; - J->maxslot = dst-1; + J->base[dst-2-LJ_FR2] = tr; + J->maxslot = dst-1-LJ_FR2; J->bcskip = 2; /* Skip CALLM + select. */ } else { nyivarg: @@ -1881,7 +1929,15 @@ static void rec_comp_fixup(jit_State *J, const BCIns *pc, int cond) const BCIns *npc = pc + 2 + (cond ? bc_j(jmpins) : 0); SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; /* Set PC to opposite target to avoid re-recording the comp. in side trace. */ +#if LJ_FR2 + SnapEntry *flink = &J->cur.snapmap[snap->mapofs + snap->nent]; + uint64_t pcbase; + memcpy(&pcbase, flink, sizeof(uint64_t)); + pcbase = (pcbase & 0xff) | (u64ptr(npc) << 8); + memcpy(flink, &pcbase, sizeof(uint64_t)); +#else J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc); +#endif J->needsnap = 1; if (bc_a(jmpins) < J->maxslot) J->maxslot = bc_a(jmpins); lj_snap_shrink(J); /* Shrink last snapshot if possible. */ @@ -2179,7 +2235,13 @@ void lj_record_ins(jit_State *J) case BC_MOV: /* Clear gap of method call to avoid resurrecting previous refs. */ - if (ra > J->maxslot) J->base[ra-1] = 0; + if (ra > J->maxslot) { +#if LJ_FR2 + memset(J->base + J->maxslot, 0, (ra - J->maxslot) * sizeof(TRef)); +#else + J->base[ra-1] = 0; +#endif + } break; case BC_KSTR: case BC_KNUM: case BC_KPRI: break; @@ -2248,14 +2310,14 @@ void lj_record_ins(jit_State *J) /* -- Calls and vararg handling ----------------------------------------- */ case BC_ITERC: - J->base[ra] = getslot(J, ra-3-LJ_FR2); - J->base[ra+1] = getslot(J, ra-2-LJ_FR2); - J->base[ra+2] = getslot(J, ra-1-LJ_FR2); + J->base[ra] = getslot(J, ra-3); + J->base[ra+1+LJ_FR2] = getslot(J, ra-2); + J->base[ra+2+LJ_FR2] = getslot(J, ra-1); { /* Do the actual copy now because lj_record_call needs the values. */ TValue *b = &J->L->base[ra]; - copyTV(J->L, b, b-3-LJ_FR2); - copyTV(J->L, b+1, b-2-LJ_FR2); - copyTV(J->L, b+2, b-1-LJ_FR2); + copyTV(J->L, b, b-3); + copyTV(J->L, b+1+LJ_FR2, b-2); + copyTV(J->L, b+2+LJ_FR2, b-1); } lj_record_call(J, ra, (ptrdiff_t)rc-1); break; @@ -2378,7 +2440,12 @@ void lj_record_ins(jit_State *J) /* rc == 0 if we have no result yet, e.g. pending __index metamethod call. */ if (bcmode_a(op) == BCMdst && rc) { J->base[ra] = rc; - if (ra >= J->maxslot) J->maxslot = ra+1; + if (ra >= J->maxslot) { +#if LJ_FR2 + if (ra > J->maxslot) J->base[ra-1] = 0; +#endif + J->maxslot = ra+1; + } } #undef rav @@ -2463,7 +2530,7 @@ void lj_record_setup(jit_State *J) J->scev.idx = REF_NIL; setmref(J->scev.pc, NULL); - J->baseslot = 1; /* Invoking function is at base[-1]. */ + J->baseslot = 1+LJ_FR2; /* Invoking function is at base[-1-LJ_FR2]. */ J->base = J->slot + J->baseslot; J->maxslot = 0; J->framedepth = 0; diff --git a/src/lj_snap.c b/src/lj_snap.c index 6d490a5d..91180ec4 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c @@ -68,10 +68,18 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) for (s = 0; s < nslots; s++) { TRef tr = J->slot[s]; IRRef ref = tref_ref(tr); +#if LJ_FR2 + if (s == 1) continue; + if ((tr & (TREF_FRAME | TREF_CONT)) && !ref) { + TValue *base = J->L->base - J->baseslot; + tr = J->slot[s] = (tr & 0xff0000) | lj_ir_k64(J, IR_KNUM, base[s].u64); + ref = tref_ref(tr); + } +#endif if (ref) { SnapEntry sn = SNAP_TR(s, tr); IRIns *ir = &J->cur.ir[ref]; - if (!(sn & (SNAP_CONT|SNAP_FRAME)) && + if ((LJ_FR2 || !(sn & (SNAP_CONT|SNAP_FRAME))) && ir->o == IR_SLOAD && ir->op1 == s && ref > retf) { /* No need to snapshot unmodified non-inherited slots. */ if (!(ir->op2 & IRSLOAD_INHERIT)) @@ -90,34 +98,51 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) } /* Add frame links at the end of the snapshot. */ -static BCReg snapshot_framelinks(jit_State *J, SnapEntry *map) +static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot) { cTValue *frame = J->L->base - 1; - cTValue *lim = J->L->base - J->baseslot; + cTValue *lim = J->L->base - J->baseslot + LJ_FR2; GCfunc *fn = frame_func(frame); cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top; +#if LJ_FR2 + uint64_t pcbase = (u64ptr(J->pc) << 8) | (J->baseslot - 2); + lua_assert(2 <= J->baseslot && J->baseslot <= 257); + memcpy(map, &pcbase, sizeof(uint64_t)); +#else MSize f = 0; - lua_assert(!LJ_FR2); /* TODO_FR2: store 64 bit PCs. */ map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ +#endif while (frame > lim) { /* Backwards traversal of all frames above base. */ if (frame_islua(frame)) { +#if !LJ_FR2 map[f++] = SNAP_MKPC(frame_pc(frame)); +#endif frame = frame_prevl(frame); } else if (frame_iscont(frame)) { +#if !LJ_FR2 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); map[f++] = SNAP_MKPC(frame_contpc(frame)); +#endif frame = frame_prevd(frame); } else { lua_assert(!frame_isc(frame)); +#if !LJ_FR2 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); +#endif frame = frame_prevd(frame); continue; } if (frame + funcproto(frame_func(frame))->framesize > ftop) ftop = frame + funcproto(frame_func(frame))->framesize; } + *topslot = (uint8_t)(ftop - lim); +#if LJ_FR2 + lua_assert(sizeof(SnapEntry) * 2 == sizeof(uint64_t)); + return 2; +#else lua_assert(f == (MSize)(1 + J->framedepth)); - return (BCReg)(ftop - lim); + return f; +#endif } /* Take a snapshot of the current stack. */ @@ -127,16 +152,16 @@ static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap) MSize nent; SnapEntry *p; /* Conservative estimate. */ - lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth+1); + lj_snap_grow_map(J, nsnapmap + nslots + (MSize)(LJ_FR2?2:J->framedepth+1)); p = &J->cur.snapmap[nsnapmap]; nent = snapshot_slots(J, p, nslots); - snap->topslot = (uint8_t)snapshot_framelinks(J, p + nent); + snap->nent = (uint8_t)nent; + nent += snapshot_framelinks(J, p + nent, &snap->topslot); snap->mapofs = (uint16_t)nsnapmap; snap->ref = (IRRef1)J->cur.nins; - snap->nent = (uint8_t)nent; snap->nslots = (uint8_t)nslots; snap->count = 0; - J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + 1 + J->framedepth); + J->cur.nsnapmap = (uint16_t)(nsnapmap + nent); } /* Add or merge a snapshot. */ @@ -284,8 +309,15 @@ void lj_snap_shrink(jit_State *J) MSize n, m, nlim, nent = snap->nent; uint8_t udf[SNAP_USEDEF_SLOTS]; BCReg maxslot = J->maxslot; - BCReg minslot = snap_usedef(J, udf, snap_pc(map[nent]), maxslot); BCReg baseslot = J->baseslot; +#if LJ_FR2 + BCReg minslot; + uint64_t pcbase; + memcpy(&pcbase, &map[nent], sizeof(uint64_t)); + minslot = snap_usedef(J, udf, (const BCIns *)(pcbase >> 8), maxslot); +#else + BCReg minslot = snap_usedef(J, udf, snap_pc(map[nent]), maxslot); +#endif maxslot += baseslot; minslot += baseslot; snap->nslots = (uint8_t)maxslot; @@ -794,12 +826,19 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) SnapShot *snap = &T->snap[snapno]; MSize n, nent = snap->nent; SnapEntry *map = &T->snapmap[snap->mapofs]; - SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1]; - ptrdiff_t ftsz0; + SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1-LJ_FR2]; TValue *frame; BloomFilter rfilt = snap_renamefilter(T, snapno); - const BCIns *pc = snap_pc(map[nent]); lua_State *L = J->L; +#if LJ_FR2 + const BCIns *pc; + uint64_t pcbase; + memcpy(&pcbase, flinks, sizeof(uint64_t)); + pc = (const BCIns *)(pcbase >> 8); +#else + ptrdiff_t ftsz0; + const BCIns *pc = snap_pc(map[nent]); +#endif /* Set interpreter PC to the next PC to get correct error messages. */ setcframe_pc(cframe_raw(L->cframe), pc+1); @@ -811,8 +850,10 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) } /* Fill stack slots with data from the registers and spill slots. */ - frame = L->base-1; + frame = L->base-1-LJ_FR2; +#if !LJ_FR2 ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */ +#endif for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; if (!(sn & SNAP_NORESTORE)) { @@ -835,14 +876,18 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) TValue tmp; snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp); o->u32.hi = tmp.u32.lo; +#if !LJ_FR2 } else if ((sn & (SNAP_CONT|SNAP_FRAME))) { - lua_assert(!LJ_FR2); /* TODO_FR2: store 64 bit PCs. */ /* Overwrite tag with frame link. */ setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0); L->base = o+1; +#endif } } } +#if LJ_FR2 + L->base += (pcbase & 0xff); +#endif lua_assert(map + nent == flinks); /* Compute current stack top. */