diff --git a/src/jit/dump.lua b/src/jit/dump.lua index 9a722f73..a635af10 100644 --- a/src/jit/dump.lua +++ b/src/jit/dump.lua @@ -310,15 +310,17 @@ local function fmtfunc(func, pc) end end -local function formatk(tr, idx) +local function formatk(tr, idx, sn) local k, t, slot = tracek(tr, idx) local tn = type(k) local s if tn == "number" then - if k == 2^52+2^51 then + if band(sn or 0, 0x30000) ~= 0 then + s = band(sn, 0x20000) ~= 0 and "contpc" or "ftsz" + elseif k == 2^52+2^51 then s = "bias" else - s = format("%+.14g", k) + s = format(0 < k and k < 0x1p-1026 and "%+a" or "%+.14g", k) end elseif tn == "string" then s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub)) @@ -354,7 +356,7 @@ local function printsnap(tr, snap) n = n + 1 local ref = band(sn, 0xffff) - 0x8000 -- REF_BIAS if ref < 0 then - out:write(formatk(tr, ref)) + out:write(formatk(tr, ref, sn)) elseif band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM out:write(colorize(format("%04d/%04d", ref, ref+1), 14)) else diff --git a/src/lj_arch.h b/src/lj_arch.h index 612c7303..72622a21 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -453,7 +453,7 @@ #endif /* Disable or enable the JIT compiler. */ -#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) || LJ_FR2 || LJ_GC64 +#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) || LJ_GC64 #define LJ_HASJIT 0 #else #define LJ_HASJIT 1 diff --git a/src/lj_asm.c b/src/lj_asm.c index 9f784cc8..5dd7ca3a 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -1893,7 +1893,7 @@ static BCReg asm_baseslot(ASMState *as, SnapShot *snap, int *gotframe) SnapEntry sn = map[n-1]; if ((sn & SNAP_FRAME)) { *gotframe = 1; - return snap_slot(sn); + return snap_slot(sn) - LJ_FR2; } } return 0; @@ -1913,7 +1913,7 @@ static void asm_tail_link(ASMState *as) if (as->T->link == 0) { /* Setup fixed registers for exit to interpreter. */ - const BCIns *pc = snap_pc(as->T->snapmap[snap->mapofs + snap->nent]); + const BCIns *pc = snap_pc(&as->T->snapmap[snap->mapofs + snap->nent]); int32_t mres; if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */ BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins; @@ -1922,7 +1922,7 @@ static void asm_tail_link(ASMState *as) } ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); ra_allockreg(as, i32ptr(pc), RID_LPC); - mres = (int32_t)(snap->nslots - baseslot); + mres = (int32_t)(snap->nslots - baseslot - LJ_FR2); switch (bc_op(*pc)) { case BC_CALLM: case BC_CALLMT: mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break; diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 0361a965..83fe22b2 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -348,7 +348,8 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) && noconflict(as, ref, IR_RETF, 0)) { as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow); - as->mrm.ofs = 8*((int32_t)ir->op1-1) + ((ir->op2&IRSLOAD_FRAME)?4:0); + as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) + + (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0); as->mrm.idx = RID_NONE; return RID_MRM; } @@ -655,6 +656,9 @@ static void asm_callx(ASMState *as, IRIns *ir) static void asm_retf(ASMState *as, IRIns *ir) { Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); +#if LJ_FR2 + Reg rpc = ra_scratch(as, rset_exclude(RSET_GPR, base)); +#endif void *pc = ir_kptr(IR(ir->op2)); int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); as->topslot -= (BCReg)delta; @@ -663,7 +667,12 @@ static void asm_retf(ASMState *as, IRIns *ir) emit_setgl(as, base, jit_base); emit_addptr(as, base, -8*delta); asm_guardcc(as, CC_NE); +#if LJ_FR2 + emit_rmro(as, XO_CMP, rpc, base, -8); + emit_loadu64(as, rpc, u64ptr(pc)); +#else emit_gmroi(as, XG_ARITHi(XOg_CMP), base, -4, ptr2addr(pc)); +#endif } /* -- Type conversions ---------------------------------------------------- */ @@ -1397,7 +1406,8 @@ static void asm_ahustore(ASMState *as, IRIns *ir) static void asm_sload(ASMState *as, IRIns *ir) { - int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); + int32_t ofs = 8*((int32_t)ir->op1-1-LJ_FR2) + + (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0); IRType1 t = ir->t; Reg base; lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ @@ -2383,13 +2393,15 @@ static void asm_stack_check(ASMState *as, BCReg topslot, static void asm_stack_restore(ASMState *as, SnapShot *snap) { SnapEntry *map = &as->T->snapmap[snap->mapofs]; - SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1]; +#if !LJ_FR2 || defined(LUA_USE_ASSERT) + SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; +#endif MSize n, nent = snap->nent; /* Store the value of all modified slots to the Lua stack. */ for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; BCReg s = snap_slot(sn); - int32_t ofs = 8*((int32_t)s-1); + int32_t ofs = 8*((int32_t)s-1-LJ_FR2); IRRef ref = snap_ref(sn); IRIns *ir = IR(ref); if ((sn & SNAP_NORESTORE)) @@ -2407,8 +2419,10 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) emit_movmroi(as, RID_BASE, ofs, ir->i); } if ((sn & (SNAP_CONT|SNAP_FRAME))) { +#if !LJ_FR2 if (s != 0) /* Do not overwrite link to previous frame. */ emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--)); +#endif } else { if (!(LJ_64 && irt_islightud(ir->t))) emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); diff --git a/src/lj_crecord.c b/src/lj_crecord.c index c0f7e3d7..d568b20a 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c @@ -712,6 +712,19 @@ static TRef crec_reassoc_ofs(jit_State *J, TRef tr, ptrdiff_t *ofsp, MSize sz) return tr; } +/* Tailcall to function. */ +static void crec_tailcall(jit_State *J, RecordFFData *rd, cTValue *tv) +{ + TRef kfunc = lj_ir_kfunc(J, funcV(tv)); +#if LJ_FR2 + J->base[-2] = kfunc; + J->base[-1] = TREF_FRAME; +#else + J->base[-1] = kfunc | TREF_FRAME; +#endif + rd->nres = -1; /* Pending tailcall. */ +} + /* Record ctype __index/__newindex metamethods. */ static void crec_index_meta(jit_State *J, CTState *cts, CType *ct, RecordFFData *rd) @@ -721,8 +734,7 @@ static void crec_index_meta(jit_State *J, CTState *cts, CType *ct, if (!tv) lj_trace_err(J, LJ_TRERR_BADTYPE); if (tvisfunc(tv)) { - J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; - rd->nres = -1; /* Pending tailcall. */ + crec_tailcall(J, rd, tv); } else if (rd->data == 0 && tvistab(tv) && tref_isstr(J->base[1])) { /* Specialize to result of __index lookup. */ cTValue *o = lj_tab_get(J->L, tabV(tv), &rd->argv[1]); @@ -1119,20 +1131,20 @@ static void crec_snap_caller(jit_State *J) lua_State *L = J->L; TValue *base = L->base, *top = L->top; const BCIns *pc = J->pc; - TRef ftr = J->base[-1]; + TRef ftr = J->base[-1-LJ_FR2]; ptrdiff_t delta; if (!frame_islua(base-1) || J->framedepth <= 0) lj_trace_err(J, LJ_TRERR_NYICALL); J->pc = frame_pc(base-1); delta = 1+LJ_FR2+bc_a(J->pc[-1]); L->top = base; L->base = base - delta; - J->base[-1] = TREF_FALSE; + J->base[-1-LJ_FR2] = TREF_FALSE; J->base -= delta; J->baseslot -= (BCReg)delta; - J->maxslot = (BCReg)delta; J->framedepth--; + J->maxslot = (BCReg)delta-LJ_FR2; J->framedepth--; lj_snap_add(J); L->base = base; L->top = top; J->framedepth++; J->maxslot = 1; J->base += delta; J->baseslot += (BCReg)delta; - J->base[-1] = ftr; J->pc = pc; + J->base[-1-LJ_FR2] = ftr; J->pc = pc; } /* Record function call. */ @@ -1224,8 +1236,7 @@ void LJ_FASTCALL recff_cdata_call(jit_State *J, RecordFFData *rd) tv = lj_ctype_meta(cts, ctype_isptr(ct->info) ? ctype_cid(ct->info) : id, mm); if (tv) { if (tvisfunc(tv)) { - J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; - rd->nres = -1; /* Pending tailcall. */ + crec_tailcall(J, rd, tv); return; } } else if (mm == MM_new) { @@ -1373,8 +1384,7 @@ static TRef crec_arith_meta(jit_State *J, TRef *sp, CType **s, CTState *cts, } if (tv) { if (tvisfunc(tv)) { - J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; - rd->nres = -1; /* Pending tailcall. */ + crec_tailcall(J, rd, tv); return 0; } /* NYI: non-function metamethods. */ } else if ((MMS)rd->data == MM_eq) { /* Fallback cdata pointer comparison. */ diff --git a/src/lj_def.h b/src/lj_def.h index 29d3fdda..9413399d 100644 --- a/src/lj_def.h +++ b/src/lj_def.h @@ -95,6 +95,8 @@ typedef unsigned int uintptr_t; #define U64x(hi, lo) (((uint64_t)0x##hi << 32) + (uint64_t)0x##lo) #define i32ptr(p) ((int32_t)(intptr_t)(void *)(p)) #define u32ptr(p) ((uint32_t)(intptr_t)(void *)(p)) +#define i64ptr(p) ((int64_t)(intptr_t)(void *)(p)) +#define u64ptr(p) ((uint64_t)(intptr_t)(void *)(p)) #define checki8(x) ((x) == (int32_t)(int8_t)(x)) #define checku8(x) ((x) == (int32_t)(uint8_t)(x)) diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index ae567622..64a9a65d 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c @@ -102,35 +102,41 @@ static void recff_stitch(jit_State *J) ASMFunction cont = lj_cont_stitch; lua_State *L = J->L; TValue *base = L->base; + BCReg nslot = J->maxslot + 1 + LJ_FR2; + TValue *nframe = base + 1 + LJ_FR2; const BCIns *pc = frame_pc(base-1); TValue *pframe = frame_prevl(base-1); - lua_assert(!LJ_FR2); /* TODO_FR2: handle frame shift. */ /* Move func + args up in Lua stack and insert continuation. */ - memmove(&base[1], &base[-1], sizeof(TValue)*(J->maxslot+1)); - setframe_ftsz(base+1, ((char *)(base+1) - (char *)pframe) + FRAME_CONT); - setcont(base, cont); + memmove(&base[1], &base[-1-LJ_FR2], sizeof(TValue)*nslot); + setframe_ftsz(nframe, ((char *)nframe - (char *)pframe) + FRAME_CONT); + setcont(base-LJ_FR2, cont); setframe_pc(base, pc); - setnilV(base-1); /* Incorrect, but rec_check_slots() won't run anymore. */ - L->base += 2; - L->top += 2; + setnilV(base-1-LJ_FR2); /* Incorrect, but rec_check_slots() won't run anymore. */ + L->base += 2 + LJ_FR2; + L->top += 2 + LJ_FR2; /* Ditto for the IR. */ - memmove(&J->base[1], &J->base[-1], sizeof(TRef)*(J->maxslot+1)); + memmove(&J->base[1], &J->base[-1-LJ_FR2], sizeof(TRef)*nslot); +#if LJ_FR2 + J->base[2] = TREF_FRAME; + J->base[-1] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont))); + J->base[0] = lj_ir_k64(J, IR_KNUM, u64ptr(pc)) | TREF_CONT; +#else J->base[0] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT; - J->base[-1] = lj_ir_ktrace(J); - J->ktrace = tref_ref(J->base[-1]); - J->base += 2; - J->baseslot += 2; +#endif + J->ktrace = tref_ref((J->base[-1-LJ_FR2] = lj_ir_ktrace(J))); + J->base += 2 + LJ_FR2; + J->baseslot += 2 + LJ_FR2; J->framedepth++; lj_record_stop(J, LJ_TRLINK_STITCH, 0); /* Undo Lua stack changes. */ - memmove(&base[-1], &base[1], sizeof(TValue)*(J->maxslot+1)); + memmove(&base[-1-LJ_FR2], &base[1], sizeof(TValue)*nslot); setframe_pc(base-1, pc); - L->base -= 2; - L->top -= 2; + L->base -= 2 + LJ_FR2; + L->top -= 2 + LJ_FR2; } /* Fallback handler for fast functions that are not recorded (yet). */ @@ -373,10 +379,10 @@ static int recff_metacall(jit_State *J, RecordFFData *rd, MMS mm) int errcode; TValue argv0; /* Temporarily insert metamethod below object. */ - J->base[1] = J->base[0]; + J->base[1+LJ_FR2] = J->base[0]; J->base[0] = ix.mobj; copyTV(J->L, &argv0, &rd->argv[0]); - copyTV(J->L, &rd->argv[1], &rd->argv[0]); + copyTV(J->L, &rd->argv[1+LJ_FR2], &rd->argv[0]); copyTV(J->L, &rd->argv[0], &ix.mobjv); /* Need to protect lj_record_tailcall because it may throw. */ errcode = lj_vm_cpcall(J->L, NULL, J, recff_metacall_cp); @@ -443,6 +449,10 @@ static void LJ_FASTCALL recff_xpairs(jit_State *J, RecordFFData *rd) static void LJ_FASTCALL recff_pcall(jit_State *J, RecordFFData *rd) { if (J->maxslot >= 1) { +#if LJ_FR2 + /* Shift function arguments up. */ + memmove(J->base + 1, J->base, sizeof(TRef) * J->maxslot); +#endif lj_record_call(J, 0, J->maxslot - 1); rd->nres = -1; /* Pending call. */ } /* else: Interpreter will throw. */ @@ -462,13 +472,16 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd) TValue argv0, argv1; TRef tmp; int errcode; - lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ /* Swap function and traceback. */ tmp = J->base[0]; J->base[0] = J->base[1]; J->base[1] = tmp; copyTV(J->L, &argv0, &rd->argv[0]); copyTV(J->L, &argv1, &rd->argv[1]); copyTV(J->L, &rd->argv[0], &argv1); copyTV(J->L, &rd->argv[1], &argv0); +#if LJ_FR2 + /* Shift function arguments up. */ + memmove(J->base + 2, J->base + 1, sizeof(TRef) * (J->maxslot-1)); +#endif /* Need to protect lj_record_call because it may throw. */ errcode = lj_vm_cpcall(J->L, NULL, J, recff_xpcall_cp); /* Always undo Lua stack swap to avoid confusing the interpreter. */ diff --git a/src/lj_ir.h b/src/lj_ir.h index 3de57046..4e9c85c7 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h @@ -220,7 +220,7 @@ IRFLDEF(FLENUM) /* SLOAD mode bits, stored in op2. */ #define IRSLOAD_PARENT 0x01 /* Coalesce with parent trace. */ -#define IRSLOAD_FRAME 0x02 /* Load hiword of frame. */ +#define IRSLOAD_FRAME 0x02 /* Load 32 bits of ftsz. */ #define IRSLOAD_TYPECHECK 0x04 /* Needs type check. */ #define IRSLOAD_CONVERT 0x08 /* Number to integer conversion. */ #define IRSLOAD_READONLY 0x10 /* Read-only, omit slot store. */ diff --git a/src/lj_jit.h b/src/lj_jit.h index 55fbea8b..f460a0ab 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -179,14 +179,26 @@ LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT); #define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref)) #define SNAP_TR(slot, tr) \ (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK))) +#if !LJ_FR2 #define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc)) +#endif #define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz)) #define snap_ref(sn) ((sn) & 0xffff) #define snap_slot(sn) ((BCReg)((sn) >> 24)) #define snap_isframe(sn) ((sn) & SNAP_FRAME) -#define snap_pc(sn) ((const BCIns *)(uintptr_t)(sn)) #define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref)) +static LJ_AINLINE const BCIns *snap_pc(SnapEntry *sn) +{ +#if LJ_FR2 + uint64_t pcbase; + memcpy(&pcbase, sn, sizeof(uint64_t)); + return (const BCIns *)(pcbase >> 8); +#else + return (const BCIns *)(uintptr_t)*sn; +#endif +} + /* Snapshot and exit numbers. */ typedef uint32_t SnapNo; typedef uint32_t ExitNo; diff --git a/src/lj_record.c b/src/lj_record.c index 3b754897..f0481050 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -87,30 +87,48 @@ static void rec_check_slots(jit_State *J) BCReg s, nslots = J->baseslot + J->maxslot; int32_t depth = 0; cTValue *base = J->L->base - J->baseslot; - lua_assert(J->baseslot >= 1 && J->baseslot < LJ_MAX_JSLOTS); - lua_assert(J->baseslot == 1 || (J->slot[J->baseslot-1] & TREF_FRAME)); + lua_assert(J->baseslot >= 1+LJ_FR2 && J->baseslot < LJ_MAX_JSLOTS); + lua_assert(J->baseslot == 1+LJ_FR2 || (J->slot[J->baseslot-1] & TREF_FRAME)); lua_assert(nslots < LJ_MAX_JSLOTS); for (s = 0; s < nslots; s++) { TRef tr = J->slot[s]; if (tr) { cTValue *tv = &base[s]; IRRef ref = tref_ref(tr); - IRIns *ir; - lua_assert(ref >= J->cur.nk && ref < J->cur.nins); - ir = IR(ref); - lua_assert(irt_t(ir->t) == tref_t(tr)); + IRIns *ir = NULL; /* Silence compiler. */ + if (!LJ_FR2 || ref || !(tr & (TREF_FRAME | TREF_CONT))) { + lua_assert(ref >= J->cur.nk && ref < J->cur.nins); + ir = IR(ref); + lua_assert(irt_t(ir->t) == tref_t(tr)); + } if (s == 0) { lua_assert(tref_isfunc(tr)); +#if LJ_FR2 + } else if (s == 1) { + lua_assert(0); +#endif } else if ((tr & TREF_FRAME)) { GCfunc *fn = gco2func(frame_gc(tv)); BCReg delta = (BCReg)(tv - frame_prev(tv)); +#if LJ_FR2 + if (ref) + lua_assert(ir_knum(ir)->u64 == tv->u64); + tr = J->slot[s-1]; + ir = IR(tref_ref(tr)); +#endif lua_assert(tref_isfunc(tr)); if (tref_isk(tr)) lua_assert(fn == ir_kfunc(ir)); - lua_assert(s > delta ? (J->slot[s-delta] & TREF_FRAME) : (s == delta)); + lua_assert(s > delta + LJ_FR2 ? (J->slot[s-delta] & TREF_FRAME) + : (s == delta + LJ_FR2)); depth++; } else if ((tr & TREF_CONT)) { +#if LJ_FR2 + if (ref) + lua_assert(ir_knum(ir)->u64 == tv->u64); +#else lua_assert(ir_kptr(ir) == gcrefp(tv->gcr, void)); - lua_assert((J->slot[s+1] & TREF_FRAME)); +#endif + lua_assert((J->slot[s+1+LJ_FR2] & TREF_FRAME)); depth++; } else { if (tvisnumber(tv)) @@ -162,10 +180,10 @@ static TRef sload(jit_State *J, int32_t slot) /* Get TRef for current function. */ static TRef getcurrf(jit_State *J) { - if (J->base[-1]) - return J->base[-1]; - lua_assert(J->baseslot == 1); - return sloadt(J, -1, IRT_FUNC, IRSLOAD_READONLY); + if (J->base[-1-LJ_FR2]) + return J->base[-1-LJ_FR2]; + lua_assert(J->baseslot == 1+LJ_FR2); + return sloadt(J, -1-LJ_FR2, IRT_FUNC, IRSLOAD_READONLY); } /* Compare for raw object equality. @@ -509,7 +527,6 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl) static LoopEvent rec_iterl(jit_State *J, const BCIns iterins) { BCReg ra = bc_a(iterins); - lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ if (!tref_isnil(getslot(J, ra))) { /* Looping back? */ J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */ J->maxslot = ra-1+bc_b(J->pc[-1]); @@ -678,22 +695,31 @@ static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs) { RecordIndex ix; TValue *functv = &J->L->base[func]; - TRef *fbase = &J->base[func]; + TRef kfunc, *fbase = &J->base[func]; ptrdiff_t i; - lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ - for (i = 0; i <= nargs; i++) - (void)getslot(J, func+i); /* Ensure func and all args have a reference. */ + (void)getslot(J, func); /* Ensure func has a reference. */ + for (i = 1; i <= nargs; i++) + (void)getslot(J, func+LJ_FR2+i); /* Ensure all args have a reference. */ if (!tref_isfunc(fbase[0])) { /* Resolve __call metamethod. */ ix.tab = fbase[0]; copyTV(J->L, &ix.tabv, functv); if (!lj_record_mm_lookup(J, &ix, MM_call) || !tref_isfunc(ix.mobj)) lj_trace_err(J, LJ_TRERR_NOMM); - for (i = ++nargs; i > 0; i--) /* Shift arguments up. */ - fbase[i] = fbase[i-1]; + for (i = ++nargs; i > LJ_FR2; i--) /* Shift arguments up. */ + fbase[i+LJ_FR2] = fbase[i+LJ_FR2-1]; +#if LJ_FR2 + fbase[2] = fbase[0]; +#endif fbase[0] = ix.mobj; /* Replace function. */ functv = &ix.mobjv; } - fbase[0] = TREF_FRAME | rec_call_specialize(J, funcV(functv), fbase[0]); + kfunc = rec_call_specialize(J, funcV(functv), fbase[0]); +#if LJ_FR2 + fbase[0] = kfunc; + fbase[1] = TREF_FRAME; +#else + fbase[0] = kfunc | TREF_FRAME; +#endif J->maxslot = (BCReg)nargs; } @@ -703,8 +729,8 @@ void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs) rec_call_setup(J, func, nargs); /* Bump frame. */ J->framedepth++; - J->base += func+1; - J->baseslot += func+1; + J->base += func+1+LJ_FR2; + J->baseslot += func+1+LJ_FR2; } /* Record tail call. */ @@ -720,7 +746,9 @@ void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs) func += cbase; } /* Move func + args down. */ - memmove(&J->base[-1], &J->base[func], sizeof(TRef)*(J->maxslot+1)); + if (LJ_FR2 && J->baseslot == 2) + J->base[func+1] = 0; + memmove(&J->base[-1-LJ_FR2], &J->base[func], sizeof(TRef)*(J->maxslot+1+LJ_FR2)); /* Note: the new TREF_FRAME is now at J->base[-1] (even for slot #0). */ /* Tailcalls can form a loop, so count towards the loop unroll limit. */ if (++J->tailcalled > J->loopunroll) @@ -763,7 +791,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) BCReg cbase = (BCReg)frame_delta(frame); if (--J->framedepth < 0) lj_trace_err(J, LJ_TRERR_NYIRETL); - lua_assert(J->baseslot > 1); + lua_assert(J->baseslot > 1+LJ_FR2); gotresults++; rbase += cbase; J->baseslot -= (BCReg)cbase; @@ -787,7 +815,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) BCReg cbase = (BCReg)frame_delta(frame); if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */ lj_trace_err(J, LJ_TRERR_NYIRETL); - lua_assert(J->baseslot > 1); + lua_assert(J->baseslot > 1+LJ_FR2); rbase += cbase; J->baseslot -= (BCReg)cbase; J->base -= cbase; @@ -797,8 +825,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) BCIns callins = *(frame_pc(frame)-1); ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults; BCReg cbase = bc_a(callins); - GCproto *pt = funcproto(frame_func(frame - (cbase+1-LJ_FR2))); - lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame teardown. */ + GCproto *pt = funcproto(frame_func(frame - (cbase+1+LJ_FR2))); if ((pt->flags & PROTO_NOJIT)) lj_trace_err(J, LJ_TRERR_CJITOFF); if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) { @@ -811,13 +838,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) lj_snap_add(J); } for (i = 0; i < nresults; i++) /* Adjust results. */ - J->base[i-1] = i < gotresults ? J->base[rbase+i] : TREF_NIL; + J->base[i-1-LJ_FR2] = i < gotresults ? J->base[rbase+i] : TREF_NIL; J->maxslot = cbase+(BCReg)nresults; if (J->framedepth > 0) { /* Return to a frame that is part of the trace. */ J->framedepth--; - lua_assert(J->baseslot > cbase+1); - J->baseslot -= cbase+1; - J->base -= cbase+1; + lua_assert(J->baseslot > cbase+1+LJ_FR2); + J->baseslot -= cbase+1+LJ_FR2; + J->base -= cbase+1+LJ_FR2; } else if (J->parent == 0 && J->exitno == 0 && !bc_isret(bc_op(J->cur.startins))) { /* Return to lower frame would leave the loop in a root trace. */ @@ -827,13 +854,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) } else { /* Return to lower frame. Guard for the target we return to. */ TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); TRef trpc = lj_ir_kptr(J, (void *)frame_pc(frame)); - emitir(IRTG(IR_RETF, IRT_P32), trpt, trpc); + emitir(IRTG(IR_RETF, IRT_PGC), trpt, trpc); J->retdepth++; J->needsnap = 1; - lua_assert(J->baseslot == 1); + lua_assert(J->baseslot == 1+LJ_FR2); /* Shift result slots up and clear the slots of the new frame below. */ - memmove(J->base + cbase, J->base-1, sizeof(TRef)*nresults); - memset(J->base-1, 0, sizeof(TRef)*(cbase+1)); + memmove(J->base + cbase, J->base-1-LJ_FR2, sizeof(TRef)*nresults); + memset(J->base-1-LJ_FR2, 0, sizeof(TRef)*(cbase+1+LJ_FR2)); } } else if (frame_iscont(frame)) { /* Return to continuation frame. */ ASMFunction cont = frame_contf(frame); @@ -842,32 +869,39 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) lj_trace_err(J, LJ_TRERR_NYIRETL); J->baseslot -= (BCReg)cbase; J->base -= cbase; - J->maxslot = cbase-2; + J->maxslot = cbase-(2<base[dst] = gotresults ? J->base[cbase+rbase] : TREF_NIL; - if (dst >= J->maxslot) J->maxslot = dst+1; + if (dst >= J->maxslot) { + J->maxslot = dst+1; + } } else if (cont == lj_cont_nop) { /* Nothing to do here. */ } else if (cont == lj_cont_cat) { BCReg bslot = bc_b(*(frame_contpc(frame)-1)); TRef tr = gotresults ? J->base[cbase+rbase] : TREF_NIL; - if (bslot != cbase-2) { /* Concatenate the remainder. */ + if (bslot != J->maxslot) { /* Concatenate the remainder. */ TValue *b = J->L->base, save; /* Simulate lower frame and result. */ - J->base[cbase-2] = tr; - copyTV(J->L, &save, b-2); - if (gotresults) copyTV(J->L, b-2, b+rbase); else setnilV(b-2); + J->base[J->maxslot] = tr; + copyTV(J->L, &save, b-(2<L, b-(2<L->base = b - cbase; - tr = rec_cat(J, bslot, cbase-2); + tr = rec_cat(J, bslot, cbase-(2<L->base + cbase; /* Undo. */ J->L->base = b; - copyTV(J->L, b-2, &save); + copyTV(J->L, b-(2<base[dst] = tr; - if (dst >= J->maxslot) J->maxslot = dst+1; + if (dst >= J->maxslot) { + J->maxslot = dst+1; + } } /* Otherwise continue with another __concat call. */ } else { /* Result type already specialized. */ @@ -876,7 +910,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) } else { lj_trace_err(J, LJ_TRERR_NYIRETL); /* NYI: handle return to C frame. */ } - lua_assert(J->baseslot >= 1); + lua_assert(J->baseslot >= 1+LJ_FR2); } /* -- Metamethod handling ------------------------------------------------- */ @@ -885,11 +919,16 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) static BCReg rec_mm_prep(jit_State *J, ASMFunction cont) { BCReg s, top = cont == lj_cont_cat ? J->maxslot : curr_proto(J->L)->framesize; +#if LJ_FR2 + J->base[top] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont))); + J->base[top+1] = TREF_CONT; +#else J->base[top] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT; +#endif J->framedepth++; for (s = J->maxslot; s < top; s++) J->base[s] = 0; /* Clear frame gap to avoid resurrecting previous refs. */ - return top+1; + return top+1+LJ_FR2; } /* Record metamethod lookup. */ @@ -967,9 +1006,9 @@ static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm) BCReg func = rec_mm_prep(J, mm == MM_concat ? lj_cont_cat : lj_cont_ra); TRef *base = J->base + func; TValue *basev = J->L->base + func; - base[1] = ix->tab; base[2] = ix->key; - copyTV(J->L, basev+1, &ix->tabv); - copyTV(J->L, basev+2, &ix->keyv); + base[1+LJ_FR2] = ix->tab; base[2+LJ_FR2] = ix->key; + copyTV(J->L, basev+1+LJ_FR2, &ix->tabv); + copyTV(J->L, basev+2+LJ_FR2, &ix->keyv); if (!lj_record_mm_lookup(J, ix, mm)) { /* Lookup mm on 1st operand. */ if (mm != MM_unm) { ix->tab = ix->key; @@ -980,8 +1019,10 @@ static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm) lj_trace_err(J, LJ_TRERR_NOMM); } ok: - lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ base[0] = ix->mobj; +#if LJ_FR2 + base[1] = 0; +#endif copyTV(J->L, basev+0, &ix->mobjv); lj_record_call(J, func, 2); return 0; /* No result yet. */ @@ -997,8 +1038,9 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv) BCReg func = rec_mm_prep(J, lj_cont_ra); TRef *base = J->base + func; TValue *basev = J->L->base + func; - lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ base[0] = ix.mobj; copyTV(J->L, basev+0, &ix.mobjv); + base += LJ_FR2; + basev += LJ_FR2; base[1] = tr; copyTV(J->L, basev+1, tv); #if LJ_52 base[2] = tr; copyTV(J->L, basev+2, tv); @@ -1018,11 +1060,10 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv) static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op) { BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt); - TRef *base = J->base + func; - TValue *tv = J->L->base + func; - lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ - base[0] = ix->mobj; base[1] = ix->val; base[2] = ix->key; - copyTV(J->L, tv+0, &ix->mobjv); + TRef *base = J->base + func + LJ_FR2; + TValue *tv = J->L->base + func + LJ_FR2; + base[-LJ_FR2] = ix->mobj; base[1] = ix->val; base[2] = ix->key; + copyTV(J->L, tv-LJ_FR2, &ix->mobjv); copyTV(J->L, tv+1, &ix->valv); copyTV(J->L, tv+2, &ix->keyv); lj_record_call(J, func, 2); @@ -1339,11 +1380,10 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) handlemm: if (tref_isfunc(ix->mobj)) { /* Handle metamethod call. */ BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra); - TRef *base = J->base + func; - TValue *tv = J->L->base + func; - lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ - base[0] = ix->mobj; base[1] = ix->tab; base[2] = ix->key; - setfuncV(J->L, tv+0, funcV(&ix->mobjv)); + TRef *base = J->base + func + LJ_FR2; + TValue *tv = J->L->base + func + LJ_FR2; + base[-LJ_FR2] = ix->mobj; base[1] = ix->tab; base[2] = ix->key; + setfuncV(J->L, tv-LJ_FR2, funcV(&ix->mobjv)); copyTV(J->L, tv+1, &ix->tabv); copyTV(J->L, tv+2, &ix->keyv); if (ix->val) { @@ -1533,7 +1573,11 @@ static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val) goto noconstify; kfunc = lj_ir_kfunc(J, J->fn); emitir(IRTG(IR_EQ, IRT_FUNC), fn, kfunc); - J->base[-1] = TREF_FRAME | kfunc; +#if LJ_FR2 + J->base[-2] = kfunc; +#else + J->base[-1] = kfunc | TREF_FRAME; +#endif fn = kfunc; } tr = lj_record_constify(J, uvval(uvp)); @@ -1644,11 +1688,14 @@ static void rec_func_setup(jit_State *J) static void rec_func_vararg(jit_State *J) { GCproto *pt = J->pt; - BCReg s, fixargs, vframe = J->maxslot+1; + BCReg s, fixargs, vframe = J->maxslot+1+LJ_FR2; lua_assert((pt->flags & PROTO_VARARG)); if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS) lj_trace_err(J, LJ_TRERR_STACKOV); - J->base[vframe-1] = J->base[-1]; /* Copy function up. */ + J->base[vframe-1-LJ_FR2] = J->base[-1-LJ_FR2]; /* Copy function up. */ +#if LJ_FR2 + J->base[vframe-1] = TREF_FRAME; +#endif /* Copy fixarg slots up and set their original slots to nil. */ fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot; for (s = 0; s < fixargs; s++) { @@ -1710,7 +1757,7 @@ static int select_detect(jit_State *J) static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) { int32_t numparams = J->pt->numparams; - ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1; + ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1 - LJ_FR2; lua_assert(frame_isvarg(J->L->base-1)); if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */ ptrdiff_t i; @@ -1722,10 +1769,10 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) J->maxslot = dst + (BCReg)nresults; } for (i = 0; i < nresults; i++) - J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1) : TREF_NIL; + J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1 - LJ_FR2) : TREF_NIL; } else { /* Unknown number of varargs passed to trace. */ - TRef fr = emitir(IRTI(IR_SLOAD), 0, IRSLOAD_READONLY|IRSLOAD_FRAME); - int32_t frofs = 8*(1+numparams)+FRAME_VARG; + TRef fr = emitir(IRTI(IR_SLOAD), LJ_FR2, IRSLOAD_READONLY|IRSLOAD_FRAME); + int32_t frofs = 8*(1+LJ_FR2+numparams)+FRAME_VARG; if (nresults >= 0) { /* Known fixed number of results. */ ptrdiff_t i; if (nvararg > 0) { @@ -1739,7 +1786,7 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr); vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, lj_ir_kint(J, frofs-8)); for (i = 0; i < nload; i++) { - IRType t = itype2irt(&J->L->base[i-1-nvararg]); + IRType t = itype2irt(&J->L->base[i-1-LJ_FR2-nvararg]); TRef aref = emitir(IRT(IR_AREF, IRT_PGC), vbase, lj_ir_kint(J, (int32_t)i)); TRef tr = emitir(IRTG(IR_VLOAD, t), aref, 0); @@ -1787,14 +1834,15 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) if (idx != 0 && idx <= nvararg) { IRType t; TRef aref, vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr); - vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, lj_ir_kint(J, frofs-8)); - t = itype2irt(&J->L->base[idx-2-nvararg]); + vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, + lj_ir_kint(J, frofs-(8<L->base[idx-2-LJ_FR2-nvararg]); aref = emitir(IRT(IR_AREF, IRT_PGC), vbase, tridx); tr = emitir(IRTG(IR_VLOAD, t), aref, 0); if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ } - J->base[dst-2] = tr; - J->maxslot = dst-1; + J->base[dst-2-LJ_FR2] = tr; + J->maxslot = dst-1-LJ_FR2; J->bcskip = 2; /* Skip CALLM + select. */ } else { nyivarg: @@ -1887,7 +1935,15 @@ static void rec_comp_fixup(jit_State *J, const BCIns *pc, int cond) const BCIns *npc = pc + 2 + (cond ? bc_j(jmpins) : 0); SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; /* Set PC to opposite target to avoid re-recording the comp. in side trace. */ +#if LJ_FR2 + SnapEntry *flink = &J->cur.snapmap[snap->mapofs + snap->nent]; + uint64_t pcbase; + memcpy(&pcbase, flink, sizeof(uint64_t)); + pcbase = (pcbase & 0xff) | (u64ptr(npc) << 8); + memcpy(flink, &pcbase, sizeof(uint64_t)); +#else J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc); +#endif J->needsnap = 1; if (bc_a(jmpins) < J->maxslot) J->maxslot = bc_a(jmpins); lj_snap_shrink(J); /* Shrink last snapshot if possible. */ @@ -2185,7 +2241,13 @@ void lj_record_ins(jit_State *J) case BC_MOV: /* Clear gap of method call to avoid resurrecting previous refs. */ - if (ra > J->maxslot) J->base[ra-1] = 0; + if (ra > J->maxslot) { +#if LJ_FR2 + memset(J->base + J->maxslot, 0, (ra - J->maxslot) * sizeof(TRef)); +#else + J->base[ra-1] = 0; +#endif + } break; case BC_KSTR: case BC_KNUM: case BC_KPRI: break; @@ -2254,14 +2316,14 @@ void lj_record_ins(jit_State *J) /* -- Calls and vararg handling ----------------------------------------- */ case BC_ITERC: - J->base[ra] = getslot(J, ra-3-LJ_FR2); - J->base[ra+1] = getslot(J, ra-2-LJ_FR2); - J->base[ra+2] = getslot(J, ra-1-LJ_FR2); + J->base[ra] = getslot(J, ra-3); + J->base[ra+1+LJ_FR2] = getslot(J, ra-2); + J->base[ra+2+LJ_FR2] = getslot(J, ra-1); { /* Do the actual copy now because lj_record_call needs the values. */ TValue *b = &J->L->base[ra]; - copyTV(J->L, b, b-3-LJ_FR2); - copyTV(J->L, b+1, b-2-LJ_FR2); - copyTV(J->L, b+2, b-1-LJ_FR2); + copyTV(J->L, b, b-3); + copyTV(J->L, b+1+LJ_FR2, b-2); + copyTV(J->L, b+2+LJ_FR2, b-1); } lj_record_call(J, ra, (ptrdiff_t)rc-1); break; @@ -2384,7 +2446,12 @@ void lj_record_ins(jit_State *J) /* rc == 0 if we have no result yet, e.g. pending __index metamethod call. */ if (bcmode_a(op) == BCMdst && rc) { J->base[ra] = rc; - if (ra >= J->maxslot) J->maxslot = ra+1; + if (ra >= J->maxslot) { +#if LJ_FR2 + if (ra > J->maxslot) J->base[ra-1] = 0; +#endif + J->maxslot = ra+1; + } } #undef rav @@ -2469,7 +2536,7 @@ void lj_record_setup(jit_State *J) J->scev.idx = REF_NIL; setmref(J->scev.pc, NULL); - J->baseslot = 1; /* Invoking function is at base[-1]. */ + J->baseslot = 1+LJ_FR2; /* Invoking function is at base[-1-LJ_FR2]. */ J->base = J->slot + J->baseslot; J->maxslot = 0; J->framedepth = 0; diff --git a/src/lj_snap.c b/src/lj_snap.c index 6199b1f0..33c058be 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c @@ -68,10 +68,18 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) for (s = 0; s < nslots; s++) { TRef tr = J->slot[s]; IRRef ref = tref_ref(tr); +#if LJ_FR2 + if (s == 1) continue; + if ((tr & (TREF_FRAME | TREF_CONT)) && !ref) { + TValue *base = J->L->base - J->baseslot; + tr = J->slot[s] = (tr & 0xff0000) | lj_ir_k64(J, IR_KNUM, base[s].u64); + ref = tref_ref(tr); + } +#endif if (ref) { SnapEntry sn = SNAP_TR(s, tr); IRIns *ir = &J->cur.ir[ref]; - if (!(sn & (SNAP_CONT|SNAP_FRAME)) && + if ((LJ_FR2 || !(sn & (SNAP_CONT|SNAP_FRAME))) && ir->o == IR_SLOAD && ir->op1 == s && ref > retf) { /* No need to snapshot unmodified non-inherited slots. */ if (!(ir->op2 & IRSLOAD_INHERIT)) @@ -90,34 +98,51 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) } /* Add frame links at the end of the snapshot. */ -static BCReg snapshot_framelinks(jit_State *J, SnapEntry *map) +static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot) { cTValue *frame = J->L->base - 1; - cTValue *lim = J->L->base - J->baseslot; + cTValue *lim = J->L->base - J->baseslot + LJ_FR2; GCfunc *fn = frame_func(frame); cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top; +#if LJ_FR2 + uint64_t pcbase = (u64ptr(J->pc) << 8) | (J->baseslot - 2); + lua_assert(2 <= J->baseslot && J->baseslot <= 257); + memcpy(map, &pcbase, sizeof(uint64_t)); +#else MSize f = 0; - lua_assert(!LJ_FR2); /* TODO_FR2: store 64 bit PCs. */ map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ +#endif while (frame > lim) { /* Backwards traversal of all frames above base. */ if (frame_islua(frame)) { +#if !LJ_FR2 map[f++] = SNAP_MKPC(frame_pc(frame)); +#endif frame = frame_prevl(frame); } else if (frame_iscont(frame)) { +#if !LJ_FR2 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); map[f++] = SNAP_MKPC(frame_contpc(frame)); +#endif frame = frame_prevd(frame); } else { lua_assert(!frame_isc(frame)); +#if !LJ_FR2 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); +#endif frame = frame_prevd(frame); continue; } if (frame + funcproto(frame_func(frame))->framesize > ftop) ftop = frame + funcproto(frame_func(frame))->framesize; } + *topslot = (uint8_t)(ftop - lim); +#if LJ_FR2 + lua_assert(sizeof(SnapEntry) * 2 == sizeof(uint64_t)); + return 2; +#else lua_assert(f == (MSize)(1 + J->framedepth)); - return (BCReg)(ftop - lim); + return f; +#endif } /* Take a snapshot of the current stack. */ @@ -127,16 +152,16 @@ static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap) MSize nent; SnapEntry *p; /* Conservative estimate. */ - lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth+1); + lj_snap_grow_map(J, nsnapmap + nslots + (MSize)(LJ_FR2?2:J->framedepth+1)); p = &J->cur.snapmap[nsnapmap]; nent = snapshot_slots(J, p, nslots); - snap->topslot = (uint8_t)snapshot_framelinks(J, p + nent); + snap->nent = (uint8_t)nent; + nent += snapshot_framelinks(J, p + nent, &snap->topslot); snap->mapofs = (uint16_t)nsnapmap; snap->ref = (IRRef1)J->cur.nins; - snap->nent = (uint8_t)nent; snap->nslots = (uint8_t)nslots; snap->count = 0; - J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + 1 + J->framedepth); + J->cur.nsnapmap = (uint16_t)(nsnapmap + nent); } /* Add or merge a snapshot. */ @@ -284,8 +309,8 @@ void lj_snap_shrink(jit_State *J) MSize n, m, nlim, nent = snap->nent; uint8_t udf[SNAP_USEDEF_SLOTS]; BCReg maxslot = J->maxslot; - BCReg minslot = snap_usedef(J, udf, snap_pc(map[nent]), maxslot); BCReg baseslot = J->baseslot; + BCReg minslot = snap_usedef(J, udf, snap_pc(&map[nent]), maxslot); maxslot += baseslot; minslot += baseslot; snap->nslots = (uint8_t)maxslot; @@ -794,11 +819,13 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) SnapShot *snap = &T->snap[snapno]; MSize n, nent = snap->nent; SnapEntry *map = &T->snapmap[snap->mapofs]; - SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1]; + SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1-LJ_FR2]; +#if !LJ_FR2 ptrdiff_t ftsz0; +#endif TValue *frame; BloomFilter rfilt = snap_renamefilter(T, snapno); - const BCIns *pc = snap_pc(map[nent]); + const BCIns *pc = snap_pc(&map[nent]); lua_State *L = J->L; /* Set interpreter PC to the next PC to get correct error messages. */ @@ -811,8 +838,10 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) } /* Fill stack slots with data from the registers and spill slots. */ - frame = L->base-1; + frame = L->base-1-LJ_FR2; +#if !LJ_FR2 ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */ +#endif for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; if (!(sn & SNAP_NORESTORE)) { @@ -835,14 +864,18 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) TValue tmp; snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp); o->u32.hi = tmp.u32.lo; +#if !LJ_FR2 } else if ((sn & (SNAP_CONT|SNAP_FRAME))) { - lua_assert(!LJ_FR2); /* TODO_FR2: store 64 bit PCs. */ /* Overwrite tag with frame link. */ setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0); L->base = o+1; +#endif } } } +#if LJ_FR2 + L->base += (map[nent+LJ_BE] & 0xff); +#endif lua_assert(map + nent == flinks); /* Compute current stack top. */