From b5d741fa7e11a2a58df65f3c71489c58f8758f75 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Wed, 25 Dec 2013 02:55:25 +0100 Subject: [PATCH] Add trace stitching. --- src/lib_base.c | 4 +- src/lib_jit.c | 2 +- src/lj_dispatch.c | 23 ++++++++ src/lj_dispatch.h | 6 ++- src/lj_ffrecord.c | 130 ++++++++++++++++++++++++++++++++++------------ src/lj_jit.h | 4 +- src/lj_record.c | 57 +++++++++++++------- src/lj_record.h | 1 + src/lj_snap.c | 3 +- src/lj_trace.c | 30 +++++++++-- src/lj_trace.h | 1 + src/lj_traceerr.h | 3 +- src/lj_vm.h | 1 + src/vm_arm.dasc | 50 ++++++++++++++++++ src/vm_mips.dasc | 58 ++++++++++++++++++++- src/vm_ppc.dasc | 55 ++++++++++++++++++-- src/vm_x86.dasc | 63 +++++++++++++++++++++- 17 files changed, 422 insertions(+), 69 deletions(-) diff --git a/src/lib_base.c b/src/lib_base.c index a19926a7..495e1ab1 100644 --- a/src/lib_base.c +++ b/src/lib_base.c @@ -101,7 +101,7 @@ static int ffh_pairs(lua_State *L, MMS mm) #endif LJLIB_PUSH(lastcl) -LJLIB_ASM(pairs) +LJLIB_ASM(pairs) LJLIB_REC(xpairs 0) { return ffh_pairs(L, MM_pairs); } @@ -114,7 +114,7 @@ LJLIB_NOREGUV LJLIB_ASM(ipairs_aux) LJLIB_REC(.) } LJLIB_PUSH(lastcl) -LJLIB_ASM(ipairs) LJLIB_REC(.) +LJLIB_ASM(ipairs) LJLIB_REC(xpairs 1) { return ffh_pairs(L, MM_ipairs); } diff --git a/src/lib_jit.c b/src/lib_jit.c index a87e1833..1ee04b7d 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c @@ -284,7 +284,7 @@ static GCtrace *jit_checktrace(lua_State *L) /* Names of link types. ORDER LJ_TRLINK */ static const char *const jit_trlinkname[] = { "none", "root", "loop", "tail-recursion", "up-recursion", "down-recursion", - "interpreter", "return" + "interpreter", "return", "stitch" }; /* local info = jit.util.traceinfo(tr) */ diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c index 0146d8cd..b76e8048 100644 --- a/src/lj_dispatch.c +++ b/src/lj_dispatch.c @@ -42,6 +42,12 @@ LJ_STATIC_ASSERT(GG_NUM_ASMFF == FF_NUM_ASMFUNC); #include LJ_FUNCA_NORET void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L, lua_State *co); +#if !LJ_HASJIT +#define lj_dispatch_stitch lj_dispatch_ins +#endif +#if !LJ_HASPROFILE +#define lj_dispatch_profile lj_dispatch_ins +#endif #define GOTFUNC(name) (ASMFunction)name, static const ASMFunction dispatch_got[] = { @@ -511,6 +517,23 @@ out: return makeasmfunc(lj_bc_ofs[op]); /* Return static dispatch target. */ } +#if LJ_HASJIT +/* Stitch a new trace. */ +void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc) +{ + ERRNO_SAVE + lua_State *L = J->L; + void *cf = cframe_raw(L->cframe); + const BCIns *oldpc = cframe_pc(cf); + setcframe_pc(cf, pc); + /* Before dispatch, have to bias PC by 1. */ + L->top = L->base + cur_topslot(curr_proto(L), pc+1, cframe_multres_n(cf)); + lj_trace_stitch(J, pc-1); /* Point to the CALL instruction. */ + setcframe_pc(cf, oldpc); + ERRNO_RESTORE +} +#endif + #if LJ_HASPROFILE /* Profile dispatch. */ void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc) diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h index 811a0ae4..447eb2d8 100644 --- a/src/lj_dispatch.h +++ b/src/lj_dispatch.h @@ -29,7 +29,8 @@ _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \ _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \ _(pow) _(fmod) _(ldexp) \ - _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_profile) _(lj_err_throw)\ + _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_stitch) \ + _(lj_dispatch_profile) _(lj_err_throw) \ _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \ _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \ _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \ @@ -110,6 +111,9 @@ LJ_FUNC void lj_dispatch_update(global_State *g); /* Instruction dispatch callback for hooks or when recording. */ LJ_FUNCA void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc); LJ_FUNCA ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns*pc); +#if LJ_HASJIT +LJ_FUNCA void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc); +#endif #if LJ_HASPROFILE LJ_FUNCA void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc); #endif diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index 42aae8b5..6a156c7c 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c @@ -96,28 +96,81 @@ static ptrdiff_t results_wanted(jit_State *J) return -1; } -/* Throw error for unsupported variant of fast function. */ -LJ_NORET static void recff_nyiu(jit_State *J) +/* Trace stitching: add continuation below frame to start a new trace. */ +static void recff_stitch(jit_State *J) { - setfuncV(J->L, &J->errinfo, J->fn); - lj_trace_err_info(J, LJ_TRERR_NYIFFU); + ASMFunction cont = lj_cont_stitch; + TraceNo traceno = J->cur.traceno; + lua_State *L = J->L; + TValue *base = L->base; + const BCIns *pc = frame_pc(base-1); + TValue *pframe = frame_prevl(base-1); + TRef trcont; + + /* Move func + args up in Lua stack and insert continuation. */ + memmove(&base[1], &base[-1], sizeof(TValue)*(J->maxslot+1)); + setframe_ftsz(base+1, (int)((char *)(base+1) - (char *)pframe) + FRAME_CONT); + setcont(base, cont); + setframe_pc(base, pc); + if (LJ_DUALNUM) setintV(base-1, traceno); else base[-1].u64 = traceno; + L->base += 2; + L->top += 2; + + /* Ditto for the IR. */ + memmove(&J->base[1], &J->base[-1], sizeof(TRef)*(J->maxslot+1)); +#if LJ_64 + trcont = lj_ir_kptr(J, (void *)((int64_t)cont-(int64_t)lj_vm_asm_begin)); +#else + trcont = lj_ir_kptr(J, (void *)cont); +#endif + J->base[0] = trcont | TREF_CONT; + J->base[-1] = LJ_DUALNUM ? lj_ir_kint(J,traceno) : lj_ir_knum_u64(J,traceno); + J->maxslot += 2; + J->framedepth++; + + lj_record_stop(J, LJ_TRLINK_STITCH, 0); + + /* Undo Lua stack changes. */ + memmove(&base[-1], &base[1], sizeof(TValue)*(J->maxslot+1)); + setframe_pc(base-1, pc); + L->base -= 2; + L->top -= 2; } -/* Fallback handler for all fast functions that are not recorded (yet). */ +/* Fallback handler for fast functions that are not recorded (yet). */ static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd) { - setfuncV(J->L, &J->errinfo, J->fn); - lj_trace_err_info(J, LJ_TRERR_NYIFF); - UNUSED(rd); + if (J->cur.nins < (IRRef)J->param[JIT_P_minstitch] + REF_BASE) { + lj_trace_err_info(J, LJ_TRERR_TRACEUV); + } else { + /* Can only stitch from Lua call. */ + if (J->framedepth && frame_islua(J->L->base-1)) { + BCOp op = bc_op(*frame_pc(J->L->base-1)); + /* Stitched trace cannot start with *M op with variable # of args. */ + if (!(op == BC_CALLM || op == BC_RETM || op == BC_TSETM)) { + switch (J->fn->c.ffid) { + case FF_error: + case FF_debug_sethook: + case FF_jit_flush: + break; /* Don't stitch across special builtins. */ + default: + recff_stitch(J); /* Use trace stitching. */ + rd->nres = -1; + return; + } + } + } + /* Otherwise stop trace and return to interpreter. */ + lj_record_stop(J, LJ_TRLINK_RETURN, 0); + rd->nres = -1; + } } -/* C functions can have arbitrary side-effects and are not recorded (yet). */ -static void LJ_FASTCALL recff_c(jit_State *J, RecordFFData *rd) -{ - setfuncV(J->L, &J->errinfo, J->fn); - lj_trace_err_info(J, LJ_TRERR_NYICF); - UNUSED(rd); -} +/* Fallback handler for unsupported variants of fast functions. */ +#define recff_nyiu recff_nyi + +/* Must stop the trace for classic C functions with arbitrary side-effects. */ +#define recff_c recff_nyi /* Emit BUFHDR for the global temporary buffer. */ static TRef recff_bufhdr(jit_State *J) @@ -268,7 +321,8 @@ static void LJ_FASTCALL recff_select(jit_State *J, RecordFFData *rd) J->base[i] = J->base[start+i]; } /* else: Interpreter will throw. */ } else { - recff_nyiu(J); + recff_nyiu(J, rd); + return; } } /* else: Interpreter will throw. */ } @@ -279,14 +333,18 @@ static void LJ_FASTCALL recff_tonumber(jit_State *J, RecordFFData *rd) TRef base = J->base[1]; if (tr && !tref_isnil(base)) { base = lj_opt_narrow_toint(J, base); - if (!tref_isk(base) || IR(tref_ref(base))->i != 10) - recff_nyiu(J); + if (!tref_isk(base) || IR(tref_ref(base))->i != 10) { + recff_nyiu(J, rd); + return; + } } if (tref_isnumber_str(tr)) { if (tref_isstr(tr)) { TValue tmp; - if (!lj_strscan_num(strV(&rd->argv[0]), &tmp)) - recff_nyiu(J); /* Would need an inverted STRTO for this case. */ + if (!lj_strscan_num(strV(&rd->argv[0]), &tmp)) { + recff_nyiu(J, rd); /* Would need an inverted STRTO for this case. */ + return; + } tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); } #if LJ_HASFFI @@ -348,7 +406,8 @@ static void LJ_FASTCALL recff_tostring(jit_State *J, RecordFFData *rd) } else if (tref_ispri(tr)) { J->base[0] = lj_ir_kstr(J, lj_strfmt_obj(J->L, &rd->argv[0])); } else { - recff_nyiu(J); + recff_nyiu(J, rd); + return; } } } @@ -370,14 +429,14 @@ static void LJ_FASTCALL recff_ipairs_aux(jit_State *J, RecordFFData *rd) } /* else: Interpreter will throw. */ } -static void LJ_FASTCALL recff_ipairs(jit_State *J, RecordFFData *rd) +static void LJ_FASTCALL recff_xpairs(jit_State *J, RecordFFData *rd) { if (!(LJ_52 && recff_metacall(J, rd, MM_ipairs))) { TRef tab = J->base[0]; if (tref_istab(tab)) { J->base[0] = lj_ir_kfunc(J, funcV(&J->fn->c.upvalue[0])); J->base[1] = tab; - J->base[2] = lj_ir_kint(J, 0); + J->base[2] = rd->data ? lj_ir_kint(J, 0) : TREF_NIL; rd->nres = 3; } /* else: Interpreter will throw. */ } @@ -431,8 +490,7 @@ static void LJ_FASTCALL recff_getfenv(jit_State *J, RecordFFData *rd) J->base[0] = emitir(IRT(IR_FLOAD, IRT_TAB), trl, IRFL_THREAD_ENV); return; } - recff_nyiu(J); - UNUSED(rd); + recff_nyiu(J, rd); } /* -- Math library fast functions ----------------------------------------- */ @@ -672,8 +730,7 @@ static void LJ_FASTCALL recff_bit_tohex(jit_State *J, RecordFFData *rd) TRef tr = recff_bit64_tohex(J, rd, hdr); J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); #else - UNUSED(rd); - recff_nyiu(J); /* Don't bother working around this NYI. */ + recff_nyiu(J, rd); /* Don't bother working around this NYI. */ #endif } @@ -891,7 +948,8 @@ static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd) J->base[0] = TREF_NIL; } } else { /* Search for pattern. */ - recff_nyiu(J); + recff_nyiu(J, rd); + return; } } @@ -931,7 +989,8 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd) tr = lj_ir_call(J, IRCALL_lj_strfmt_putfxint, tr, trsf, tra); lj_needsplit(J); #else - recff_nyiu(J); /* Don't bother working around this NYI. */ + recff_nyiu(J, rd); /* Don't bother working around this NYI. */ + return; #endif } break; @@ -946,8 +1005,10 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd) if (LJ_SOFTFP) lj_needsplit(J); break; case STRFMT_STR: - if (!tref_isstr(tra)) - recff_nyiu(J); /* NYI: __tostring and non-string types for %s. */ + if (!tref_isstr(tra)) { + recff_nyiu(J, rd); /* NYI: __tostring and non-string types for %s. */ + return; + } if (sf == STRFMT_STR) /* Shortcut for plain %s. */ tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, tra); else if ((sf & STRFMT_T_QUOTED)) @@ -966,8 +1027,8 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd) case STRFMT_PTR: /* NYI */ case STRFMT_ERR: default: - recff_nyiu(J); - break; + recff_nyiu(J, rd); + return; } } J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); @@ -991,7 +1052,8 @@ static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd) ix.idxchain = 0; lj_record_idx(J, &ix); /* Set new value. */ } else { /* Complex case: insert in the middle. */ - recff_nyiu(J); + recff_nyiu(J, rd); + return; } } /* else: Interpreter will throw. */ } diff --git a/src/lj_jit.h b/src/lj_jit.h index cfb04aa7..52a216cc 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -97,6 +97,7 @@ _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \ _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \ _(\007, maxsnap, 500) /* Max. # of snapshots for a trace. */ \ + _(\011, minstitch, 0) /* Min. # of IR ins for a stitched trace. */ \ \ _(\007, hotloop, 56) /* # of iter. to detect a hot loop/call. */ \ _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \ @@ -202,7 +203,8 @@ typedef enum { LJ_TRLINK_UPREC, /* Up-recursion. */ LJ_TRLINK_DOWNREC, /* Down-recursion. */ LJ_TRLINK_INTERP, /* Fallback to interpreter. */ - LJ_TRLINK_RETURN /* Return to interpreter. */ + LJ_TRLINK_RETURN, /* Return to interpreter. */ + LJ_TRLINK_STITCH /* Trace stitching. */ } TraceLink; /* Trace object. */ diff --git a/src/lj_record.c b/src/lj_record.c index ce9e20de..4ab474ad 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -233,7 +233,7 @@ static void canonicalize_slots(jit_State *J) } /* Stop recording. */ -static void rec_stop(jit_State *J, TraceLink linktype, TraceNo lnk) +void lj_record_stop(jit_State *J, TraceLink linktype, TraceNo lnk) { lj_trace_end(J); J->cur.linktype = (uint8_t)linktype; @@ -501,8 +501,7 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl) static LoopEvent rec_iterl(jit_State *J, const BCIns iterins) { BCReg ra = bc_a(iterins); - lua_assert(J->base[ra] != 0); - if (!tref_isnil(J->base[ra])) { /* Looping back? */ + if (!tref_isnil(getslot(J, ra))) { /* Looping back? */ J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */ J->maxslot = ra-1+bc_b(J->pc[-1]); J->pc += bc_j(iterins)+1; @@ -540,12 +539,12 @@ static int innerloopleft(jit_State *J, const BCIns *pc) /* Handle the case when an interpreted loop op is hit. */ static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev) { - if (J->parent == 0) { + if (J->parent == 0 && J->exitno == 0) { if (pc == J->startpc && J->framedepth + J->retdepth == 0) { /* Same loop? */ if (ev == LOOPEV_LEAVE) /* Must loop back to form a root trace. */ lj_trace_err(J, LJ_TRERR_LLEAVE); - rec_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping root trace. */ + lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping trace. */ } else if (ev != LOOPEV_LEAVE) { /* Entering inner loop? */ /* It's usually better to abort here and wait until the inner loop ** is traced. But if the inner loop repeatedly didn't loop back, @@ -570,15 +569,15 @@ static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev) /* Handle the case when an already compiled loop op is hit. */ static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev) { - if (J->parent == 0) { /* Root trace hit an inner loop. */ + if (J->parent == 0 && J->exitno == 0) { /* Root trace hit an inner loop. */ /* Better let the inner loop spawn a side trace back here. */ lj_trace_err(J, LJ_TRERR_LINNER); } else if (ev != LOOPEV_LEAVE) { /* Side trace enters a compiled loop. */ J->instunroll = 0; /* Cannot continue across a compiled loop op. */ if (J->pc == J->startpc && J->framedepth + J->retdepth == 0) - rec_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Form an extra loop. */ + lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Form extra loop. */ else - rec_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the loop. */ + lj_record_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the loop. */ } /* Side trace continues across a loop that's left or not entered. */ } @@ -643,6 +642,18 @@ static TRef rec_call_specialize(jit_State *J, GCfunc *fn, TRef tr) (void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); /* Prevent GC of proto. */ return tr; } + } else { + /* Don't specialize to non-monomorphic builtins. */ + switch (fn->c.ffid) { + case FF_coroutine_wrap_aux: + case FF_string_gmatch_aux: + /* NYI: io_file_iter doesn't have an ffid, yet. */ + /* NYI: specialize to ffid? Not strictly necessary, trace will stop. */ + return tr; + default: + /* NYI: don't specialize to non-monomorphic C functions. */ + break; + } } /* Otherwise specialize to the function (closure) value itself. */ kfunc = lj_ir_kfunc(J, fn); @@ -750,12 +761,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) /* Return to lower frame via interpreter for unhandled cases. */ if (J->framedepth == 0 && J->pt && bc_isret(bc_op(*J->pc)) && (!frame_islua(frame) || - (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))))) { + (J->parent == 0 && J->exitno == 0 && + !bc_isret(bc_op(J->cur.startins))))) { /* NYI: specialize to frame type and return directly, not via RET*. */ for (i = -1; i < (ptrdiff_t)rbase; i++) J->base[i] = 0; /* Purge dead slots. */ J->maxslot = rbase + (BCReg)gotresults; - rec_stop(J, LJ_TRLINK_RETURN, 0); /* Return to interpreter. */ + lj_record_stop(J, LJ_TRLINK_RETURN, 0); /* Return to interpreter. */ return; } if (frame_isvarg(frame)) { @@ -779,7 +791,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) if (check_downrec_unroll(J, pt)) { J->maxslot = (BCReg)(rbase + gotresults); lj_snap_purge(J); - rec_stop(J, LJ_TRLINK_DOWNREC, J->cur.traceno); /* Down-recursion. */ + lj_record_stop(J, LJ_TRLINK_DOWNREC, J->cur.traceno); /* Down-rec. */ return; } lj_snap_add(J); @@ -792,7 +804,8 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) lua_assert(J->baseslot > cbase+1); J->baseslot -= cbase+1; J->base -= cbase+1; - } else if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) { + } else if (J->parent == 0 && J->exitno == 0 && + !bc_isret(bc_op(J->cur.startins))) { /* Return to lower frame would leave the loop in a root trace. */ lj_trace_err(J, LJ_TRERR_LLEAVE); } else { /* Return to lower frame. Guard for the target we return to. */ @@ -1480,9 +1493,9 @@ static void check_call_unroll(jit_State *J, TraceNo lnk) if (count + J->tailcalled > J->param[JIT_P_recunroll]) { J->pc++; if (J->framedepth + J->retdepth == 0) - rec_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Tail-recursion. */ + lj_record_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Tail-rec. */ else - rec_stop(J, LJ_TRLINK_UPREC, J->cur.traceno); /* Up-recursion. */ + lj_record_stop(J, LJ_TRLINK_UPREC, J->cur.traceno); /* Up-recursion. */ } } else { if (count > J->param[JIT_P_callunroll]) { @@ -1556,9 +1569,9 @@ static void rec_func_jit(jit_State *J, TraceNo lnk) } J->instunroll = 0; /* Cannot continue across a compiled function. */ if (J->pc == J->startpc && J->framedepth + J->retdepth == 0) - rec_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Extra tail-recursion. */ + lj_record_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Extra tail-rec. */ else - rec_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the function. */ + lj_record_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the function. */ } /* -- Vararg handling ----------------------------------------------------- */ @@ -2165,7 +2178,7 @@ void lj_record_ins(jit_State *J) case BC_JFORI: lua_assert(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL); if (rec_for(J, pc, 0) != LOOPEV_LEAVE) /* Link to existing loop. */ - rec_stop(J, LJ_TRLINK_ROOT, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J])); + lj_record_stop(J, LJ_TRLINK_ROOT, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J])); /* Continue tracing if the loop is not entered. */ break; @@ -2299,6 +2312,12 @@ static const BCIns *rec_setup_root(jit_State *J) J->maxslot = J->pt->numparams; pc++; break; + case BC_CALLM: + case BC_CALL: + case BC_ITERC: + /* No bytecode range check for stitched traces. */ + pc++; + break; default: lua_assert(0); break; @@ -2366,7 +2385,7 @@ void lj_record_setup(jit_State *J) if (traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] || T->snap[J->exitno].count >= J->param[JIT_P_hotexit] + J->param[JIT_P_tryside]) { - rec_stop(J, LJ_TRLINK_INTERP, 0); + lj_record_stop(J, LJ_TRLINK_INTERP, 0); } } else { /* Root trace. */ J->cur.root = 0; @@ -2378,6 +2397,8 @@ void lj_record_setup(jit_State *J) lj_snap_add(J); if (bc_op(J->cur.startins) == BC_FORL) rec_for_loop(J, J->pc-1, &J->scev, 1); + else if (bc_op(J->cur.startins) == BC_ITERC) + J->startpc = NULL; if (1 + J->pt->framesize >= LJ_MAX_JSLOTS) lj_trace_err(J, LJ_TRERR_STACKOV); } diff --git a/src/lj_record.h b/src/lj_record.h index 287b2604..5f08c18d 100644 --- a/src/lj_record.h +++ b/src/lj_record.h @@ -28,6 +28,7 @@ typedef struct RecordIndex { LJ_FUNC int lj_record_objcmp(jit_State *J, TRef a, TRef b, cTValue *av, cTValue *bv); +LJ_FUNC void lj_record_stop(jit_State *J, TraceLink linktype, TraceNo lnk); LJ_FUNC TRef lj_record_constify(jit_State *J, cTValue *o); LJ_FUNC void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs); diff --git a/src/lj_snap.c b/src/lj_snap.c index a25e4601..0c6cd776 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c @@ -97,7 +97,8 @@ static BCReg snapshot_framelinks(jit_State *J, SnapEntry *map) { cTValue *frame = J->L->base - 1; cTValue *lim = J->L->base - J->baseslot; - cTValue *ftop = frame + funcproto(frame_func(frame))->framesize; + GCfunc *fn = frame_func(frame); + cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top; MSize f = 0; map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ while (frame > lim) { /* Backwards traversal of all frames above base. */ diff --git a/src/lj_trace.c b/src/lj_trace.c index 2b8d931f..fa15e23d 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -360,7 +360,7 @@ static void trace_start(jit_State *J) TraceNo traceno; if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */ - if (J->parent == 0) { + if (J->parent == 0 && J->exitno == 0) { /* Lazy bytecode patching to disable hotcount events. */ lua_assert(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL || bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF); @@ -453,6 +453,12 @@ static void trace_stop(jit_State *J) root->nextside = (TraceNo1)traceno; } break; + case BC_CALLM: + case BC_CALL: + case BC_ITERC: + /* Trace stitching: patch link of previous trace. */ + traceref(J, J->exitno)->link = traceno; + break; default: lua_assert(0); break; @@ -502,8 +508,12 @@ static int trace_abort(jit_State *J) return 1; /* Retry ASM with new MCode area. */ } /* Penalize or blacklist starting bytecode instruction. */ - if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) - penalty_pc(J, &gcref(J->cur.startpt)->pt, mref(J->cur.startpc, BCIns), e); + if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) { + if (J->exitno == 0) + penalty_pc(J, &gcref(J->cur.startpt)->pt, mref(J->cur.startpc, BCIns), e); + else + traceref(J, J->exitno)->link = J->exitno; /* Self-link is blacklisted. */ + } /* Is there anything to abort? */ traceno = J->cur.traceno; @@ -680,6 +690,20 @@ static void trace_hotside(jit_State *J, const BCIns *pc) } } +/* Stitch a new trace to the previous trace. */ +void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc) +{ + /* Only start a new trace if not recording or inside __gc call or vmevent. */ + if (J->state == LJ_TRACE_IDLE && + !(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT))) { + J->parent = 0; /* Have to treat it like a root trace. */ + /* J->exitno is set to the invoking trace. */ + J->state = LJ_TRACE_START; + lj_trace_ins(J, pc); + } +} + + /* Tiny struct to pass data to protected call. */ typedef struct ExitDataCP { jit_State *J; diff --git a/src/lj_trace.h b/src/lj_trace.h index e30d3d59..be55e9d1 100644 --- a/src/lj_trace.h +++ b/src/lj_trace.h @@ -34,6 +34,7 @@ LJ_FUNC void lj_trace_freestate(global_State *g); /* Event handling. */ LJ_FUNC void lj_trace_ins(jit_State *J, const BCIns *pc); LJ_FUNCA void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc); +LJ_FUNCA void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc); LJ_FUNCA int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr); /* Signal asynchronous abort of trace or end of trace. */ diff --git a/src/lj_traceerr.h b/src/lj_traceerr.h index 8f463ca6..2546fc8f 100644 --- a/src/lj_traceerr.h +++ b/src/lj_traceerr.h @@ -7,6 +7,7 @@ /* Recording. */ TREDEF(RECERR, "error thrown or hook called during recording") +TREDEF(TRACEUV, "trace too short") TREDEF(TRACEOV, "trace too long") TREDEF(STACKOV, "trace too deep") TREDEF(SNAPOV, "too many snapshots") @@ -23,8 +24,6 @@ TREDEF(BADTYPE, "bad argument type") TREDEF(CJITOFF, "JIT compilation disabled for function") TREDEF(CUNROLL, "call unroll limit reached") TREDEF(DOWNREC, "down-recursion, restarting") -TREDEF(NYICF, "NYI: C function %p") -TREDEF(NYIFF, "NYI: FastFunc %s") TREDEF(NYIFFU, "NYI: unsupported variant of FastFunc %s") TREDEF(NYIRETL, "NYI: return to lower frame") diff --git a/src/lj_vm.h b/src/lj_vm.h index 5893d0b2..4a1c2f2d 100644 --- a/src/lj_vm.h +++ b/src/lj_vm.h @@ -107,6 +107,7 @@ LJ_ASMF void lj_cont_nop(void); /* Do nothing, just continue execution. */ LJ_ASMF void lj_cont_condt(void); /* Branch if result is true. */ LJ_ASMF void lj_cont_condf(void); /* Branch if result is false. */ LJ_ASMF void lj_cont_hook(void); /* Continue from hook yield. */ +LJ_ASMF void lj_cont_stitch(void); /* Trace stitching. */ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc index b728d52f..559d20bd 100644 --- a/src/vm_arm.dasc +++ b/src/vm_arm.dasc @@ -2082,6 +2082,55 @@ static void build_subroutines(BuildCtx *ctx) | ldr INS, [PC, #-4] | bx CRET1 | + |->cont_stitch: // Trace stitching. + |.if JIT + | // RA = resultptr, CARG4 = meta base + | ldr RB, SAVE_MULTRES + | ldr INS, [PC, #-4] + | ldr CARG3, [CARG4, #-24] // Save previous trace number. + | subs RB, RB, #8 + | decode_RA8 RC, INS // Call base. + | beq >2 + |1: // Move results down. + | ldrd CARG12, [RA] + | add RA, RA, #8 + | subs RB, RB, #8 + | strd CARG12, [BASE, RC] + | add RC, RC, #8 + | bne <1 + |2: + | decode_RA8 RA, INS + | decode_RB8 RB, INS + | add RA, RA, RB + | ldr CARG1, [DISPATCH, #DISPATCH_J(trace)] + |3: + | cmp RA, RC + | mvn CARG2, #~LJ_TNIL + | bhi >9 // More results wanted? + | + | ldr TRACE:RA, [CARG1, CARG3, lsl #2] + | ldrh RC, TRACE:RA->link + | cmp RC, CARG3 + | beq ->cont_nop // Blacklisted. + | cmp RC, #0 + | bne =>BC_JLOOP // Jump to stitched trace. + | + | // Stitch a new trace to the previous trace. + | str CARG3, [DISPATCH, #DISPATCH_J(exitno)] + | str L, [DISPATCH, #DISPATCH_J(L)] + | str BASE, L->base + | sub CARG1, DISPATCH, #-GG_DISP2J + | mov CARG2, PC + | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) + | ldr BASE, L->base + | b ->cont_nop + | + |9: // Fill up results with nil. + | strd CARG12, [BASE, RC] + | add RC, RC, #8 + | b <3 + |.endif + | |->vm_profhook: // Dispatch target for profiler hook. #if LJ_HASPROFILE | mov CARG1, L @@ -2166,6 +2215,7 @@ static void build_subroutines(BuildCtx *ctx) | lsrlo RC, INS, #16 // No: Decode operands A*8 and D. | subhs RC, RC, #8 | addhs RA, RA, BASE // Yes: RA = BASE+framesize*8, RC = nargs*8 + | ldrhs CARG3, [BASE, FRAME_FUNC] | bx OP | |3: // Rethrow error from the right C frame. diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc index f45a5c49..094ffe38 100644 --- a/src/vm_mips.dasc +++ b/src/vm_mips.dasc @@ -2011,6 +2011,60 @@ static void build_subroutines(BuildCtx *ctx) | jr CRET1 |. lw INS, -4(PC) | + |->cont_stitch: // Trace stitching. + |.if JIT + | // RA = resultptr, RB = meta base + | lw INS, -4(PC) + | lw TMP3, -24+LO(RB) // Save previous trace number. + | decode_RA8a RC, INS + | addiu AT, MULTRES, -8 + | decode_RA8b RC + | beqz AT, >2 + |. addu RC, BASE, RC // Call base. + |1: // Move results down. + | ldc1 f0, 0(RA) + | addiu AT, AT, -8 + | addiu RA, RA, 8 + | sdc1 f0, 0(RC) + | bnez AT, <1 + |. addiu RC, RC, 8 + |2: + | decode_RA8a RA, INS + | decode_RB8a RB, INS + | decode_RA8b RA + | decode_RB8b RB + | addu RA, RA, RB + | lw TMP1, DISPATCH_J(trace)(DISPATCH) + | addu RA, BASE, RA + |3: + | sltu AT, RC, RA + | bnez AT, >9 // More results wanted? + |. sll TMP2, TMP3, 2 + | + | addu TMP2, TMP1, TMP2 + | lw TRACE:TMP2, 0(TMP2) + | lhu RD, TRACE:TMP2->link + | beq RD, TMP3, ->cont_nop // Blacklisted. + |. load_got lj_dispatch_stitch + | bnez RD, =>BC_JLOOP // Jump to stitched trace. + |. sll RD, RD, 3 + | + | // Stitch a new trace to the previous trace. + | sw TMP3, DISPATCH_J(exitno)(DISPATCH) + | sw L, DISPATCH_J(L)(DISPATCH) + | sw BASE, L->base + | addiu CARG1, DISPATCH, GG_DISP2J + | call_intern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) + |. move CARG2, PC + | b ->cont_nop + |. lw BASE, L->base + | + |9: + | sw TISNIL, HI(RC) + | b <3 + |. addiu RC, RC, 8 + |.endif + | |->vm_profhook: // Dispatch target for profiler hook. #if LJ_HASPROFILE | load_got lj_dispatch_profile @@ -2091,13 +2145,13 @@ static void build_subroutines(BuildCtx *ctx) | sw BASE, L->base |1: | bltz CRET1, >3 // Check for error from exit. - |. lw LFUNC:TMP1, FRAME_FUNC(BASE) + |. lw LFUNC:RB, FRAME_FUNC(BASE) | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | sll MULTRES, CRET1, 3 | li TISNIL, LJ_TNIL | sw MULTRES, SAVE_MULTRES | mtc1 TMP3, TOBIT - | lw TMP1, LFUNC:TMP1->pc + | lw TMP1, LFUNC:RB->pc | sw r0, DISPATCH_GL(jit_base)(DISPATCH) | lw KBASE, PC2PROTO(k)(TMP1) | cvt.d.s TOBIT, TOBIT diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc index 91de682d..c21f5c43 100644 --- a/src/vm_ppc.dasc +++ b/src/vm_ppc.dasc @@ -2505,6 +2505,55 @@ static void build_subroutines(BuildCtx *ctx) | mtctr CRET1 | bctr | + |->cont_stitch: // Trace stitching. + |.if JIT + | // RA = resultptr, RB = meta base + | lwz INS, -4(PC) + | lwz TMP3, -20(RB) // Save previous trace number. + | addic. TMP1, MULTRES, -8 + | decode_RA8 RC, INS // Call base. + | beq >2 + |1: // Move results down. + | lfd f0, 0(RA) + | addic. TMP1, TMP1, -8 + | addi RA, RA, 8 + | stfdx f0, BASE, RC + | addi RC, RC, 8 + | bne <1 + |2: + | decode_RA8 RA, INS + | decode_RB8 RB, INS + | add RA, RA, RB + | lwz TMP1, DISPATCH_J(trace)(DISPATCH) + |3: + | cmplw RA, RC + | bgt >9 // More results wanted? + | + | slwi TMP2, TMP3, 2 + | lwzx TRACE:TMP2, TMP1, TMP2 + | lhz RD, TRACE:TMP2->link + | cmpw RD, TMP3 + | cmpwi cr1, RD, 0 + | beq ->cont_nop // Blacklisted. + | slwi RD, RD, 3 + | bne cr1, =>BC_JLOOP // Jump to stitched trace. + | + | // Stitch a new trace to the previous trace. + | stw TMP3, DISPATCH_J(exitno)(DISPATCH) + | stp L, DISPATCH_J(L)(DISPATCH) + | stp BASE, L->base + | addi CARG1, DISPATCH, GG_DISP2J + | mr CARG2, PC + | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) + | lp BASE, L->base + | b ->cont_nop + | + |9: + | stwx TISNIL, BASE, RC + | addi RC, RC, 8 + | b <3 + |.endif + | |->vm_profhook: // Dispatch target for profiler hook. #if LJ_HASPROFILE | mr CARG1, L @@ -2557,7 +2606,7 @@ static void build_subroutines(BuildCtx *ctx) | sub CARG3, TMP0, CARG3 // Compute exit number. | lp BASE, DISPATCH_GL(jit_base)(DISPATCH) | srwi CARG3, CARG3, 2 - | stw L, DISPATCH_J(L)(DISPATCH) + | stp L, DISPATCH_J(L)(DISPATCH) | subi CARG3, CARG3, 2 | stp BASE, L->base | stw CARG4, DISPATCH_J(parent)(DISPATCH) @@ -2589,11 +2638,11 @@ static void build_subroutines(BuildCtx *ctx) |1: | cmpwi CARG1, 0 | blt >3 // Check for error from exit. - | lwz LFUNC:TMP1, FRAME_FUNC(BASE) + | lwz LFUNC:RB, FRAME_FUNC(BASE) | slwi MULTRES, CARG1, 3 | li TMP2, 0 | stw MULTRES, SAVE_MULTRES - | lwz TMP1, LFUNC:TMP1->pc + | lwz TMP1, LFUNC:RB->pc | stw TMP2, DISPATCH_GL(jit_base)(DISPATCH) | lwz KBASE, PC2PROTO(k)(TMP1) | // Setup type comparison constants. diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index 8ed6efd1..c2f03d80 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc @@ -2659,6 +2659,67 @@ static void build_subroutines(BuildCtx *ctx) | add NARGS:RD, 1 | jmp RBa | + |->cont_stitch: // Trace stitching. + |.if JIT + | // BASE = base, RC = result, RB = mbase + | mov RA, [RB-24] // Save previous trace number. + | mov TMP1, RA + | mov TMP3, DISPATCH // Need one more register. + | mov DISPATCH, MULTRES + | movzx RA, PC_RA + | lea RA, [BASE+RA*8] // Call base. + | sub DISPATCH, 1 + | jz >2 + |1: // Move results down. + |.if X64 + | mov RBa, [RC] + | mov [RA], RBa + |.else + | mov RB, [RC] + | mov [RA], RB + | mov RB, [RC+4] + | mov [RA+4], RB + |.endif + | add RC, 8 + | add RA, 8 + | sub DISPATCH, 1 + | jnz <1 + |2: + | movzx RC, PC_RA + | movzx RB, PC_RB + | add RC, RB + | lea RC, [BASE+RC*8-8] + |3: + | cmp RC, RA + | ja >9 // More results wanted? + | + | mov DISPATCH, TMP3 + | mov RB, TMP1 // Get previous trace number. + | mov RA, [DISPATCH+DISPATCH_J(trace)] + | mov TRACE:RD, [RA+RB*4] + | movzx RD, word TRACE:RD->link + | cmp RD, RB + | je ->cont_nop // Blacklisted. + | test RD, RD + | jne =>BC_JLOOP // Jump to stitched trace. + | + | // Stitch a new trace to the previous trace. + | mov [DISPATCH+DISPATCH_J(exitno)], RB + | mov L:RB, SAVE_L + | mov L:RB->base, BASE + | mov FCARG2, PC + | lea FCARG1, [DISPATCH+GG_DISP2J] + | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa + | call extern lj_dispatch_stitch@8 // (jit_State *J, const BCIns *pc) + | mov BASE, L:RB->base + | jmp ->cont_nop + | + |9: // Fill up results with nil. + | mov dword [RA+4], LJ_TNIL + | add RA, 8 + | jmp <3 + |.endif + | |->vm_profhook: // Dispatch target for profiler hook. #if LJ_HASPROFILE | mov L:RB, SAVE_L @@ -5382,7 +5443,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ins_A // RA = base, RD = target (loop extent) | // Note: RA/RD is only used by trace recorder to determine scope/extent | // This opcode does NOT jump, it's only purpose is to detect a hot loop. - |.if JIT + |.if JIT | hotloop RB |.endif | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.