Add trace stitching.

This commit is contained in:
Mike Pall 2013-12-25 02:55:25 +01:00
parent 6e02c210c4
commit b5d741fa7e
17 changed files with 422 additions and 69 deletions

View File

@ -101,7 +101,7 @@ static int ffh_pairs(lua_State *L, MMS mm)
#endif
LJLIB_PUSH(lastcl)
LJLIB_ASM(pairs)
LJLIB_ASM(pairs) LJLIB_REC(xpairs 0)
{
return ffh_pairs(L, MM_pairs);
}
@ -114,7 +114,7 @@ LJLIB_NOREGUV LJLIB_ASM(ipairs_aux) LJLIB_REC(.)
}
LJLIB_PUSH(lastcl)
LJLIB_ASM(ipairs) LJLIB_REC(.)
LJLIB_ASM(ipairs) LJLIB_REC(xpairs 1)
{
return ffh_pairs(L, MM_ipairs);
}

View File

@ -284,7 +284,7 @@ static GCtrace *jit_checktrace(lua_State *L)
/* Names of link types. ORDER LJ_TRLINK */
static const char *const jit_trlinkname[] = {
"none", "root", "loop", "tail-recursion", "up-recursion", "down-recursion",
"interpreter", "return"
"interpreter", "return", "stitch"
};
/* local info = jit.util.traceinfo(tr) */

View File

@ -42,6 +42,12 @@ LJ_STATIC_ASSERT(GG_NUM_ASMFF == FF_NUM_ASMFUNC);
#include <math.h>
LJ_FUNCA_NORET void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L,
lua_State *co);
#if !LJ_HASJIT
#define lj_dispatch_stitch lj_dispatch_ins
#endif
#if !LJ_HASPROFILE
#define lj_dispatch_profile lj_dispatch_ins
#endif
#define GOTFUNC(name) (ASMFunction)name,
static const ASMFunction dispatch_got[] = {
@ -511,6 +517,23 @@ out:
return makeasmfunc(lj_bc_ofs[op]); /* Return static dispatch target. */
}
#if LJ_HASJIT
/* Stitch a new trace. */
void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc)
{
ERRNO_SAVE
lua_State *L = J->L;
void *cf = cframe_raw(L->cframe);
const BCIns *oldpc = cframe_pc(cf);
setcframe_pc(cf, pc);
/* Before dispatch, have to bias PC by 1. */
L->top = L->base + cur_topslot(curr_proto(L), pc+1, cframe_multres_n(cf));
lj_trace_stitch(J, pc-1); /* Point to the CALL instruction. */
setcframe_pc(cf, oldpc);
ERRNO_RESTORE
}
#endif
#if LJ_HASPROFILE
/* Profile dispatch. */
void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc)

View File

@ -29,7 +29,8 @@
_(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \
_(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \
_(pow) _(fmod) _(ldexp) \
_(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_profile) _(lj_err_throw)\
_(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_stitch) \
_(lj_dispatch_profile) _(lj_err_throw) \
_(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \
_(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \
_(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \
@ -110,6 +111,9 @@ LJ_FUNC void lj_dispatch_update(global_State *g);
/* Instruction dispatch callback for hooks or when recording. */
LJ_FUNCA void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc);
LJ_FUNCA ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns*pc);
#if LJ_HASJIT
LJ_FUNCA void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc);
#endif
#if LJ_HASPROFILE
LJ_FUNCA void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc);
#endif

View File

@ -96,28 +96,81 @@ static ptrdiff_t results_wanted(jit_State *J)
return -1;
}
/* Throw error for unsupported variant of fast function. */
LJ_NORET static void recff_nyiu(jit_State *J)
/* Trace stitching: add continuation below frame to start a new trace. */
static void recff_stitch(jit_State *J)
{
setfuncV(J->L, &J->errinfo, J->fn);
lj_trace_err_info(J, LJ_TRERR_NYIFFU);
ASMFunction cont = lj_cont_stitch;
TraceNo traceno = J->cur.traceno;
lua_State *L = J->L;
TValue *base = L->base;
const BCIns *pc = frame_pc(base-1);
TValue *pframe = frame_prevl(base-1);
TRef trcont;
/* Move func + args up in Lua stack and insert continuation. */
memmove(&base[1], &base[-1], sizeof(TValue)*(J->maxslot+1));
setframe_ftsz(base+1, (int)((char *)(base+1) - (char *)pframe) + FRAME_CONT);
setcont(base, cont);
setframe_pc(base, pc);
if (LJ_DUALNUM) setintV(base-1, traceno); else base[-1].u64 = traceno;
L->base += 2;
L->top += 2;
/* Ditto for the IR. */
memmove(&J->base[1], &J->base[-1], sizeof(TRef)*(J->maxslot+1));
#if LJ_64
trcont = lj_ir_kptr(J, (void *)((int64_t)cont-(int64_t)lj_vm_asm_begin));
#else
trcont = lj_ir_kptr(J, (void *)cont);
#endif
J->base[0] = trcont | TREF_CONT;
J->base[-1] = LJ_DUALNUM ? lj_ir_kint(J,traceno) : lj_ir_knum_u64(J,traceno);
J->maxslot += 2;
J->framedepth++;
lj_record_stop(J, LJ_TRLINK_STITCH, 0);
/* Undo Lua stack changes. */
memmove(&base[-1], &base[1], sizeof(TValue)*(J->maxslot+1));
setframe_pc(base-1, pc);
L->base -= 2;
L->top -= 2;
}
/* Fallback handler for all fast functions that are not recorded (yet). */
/* Fallback handler for fast functions that are not recorded (yet). */
static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd)
{
setfuncV(J->L, &J->errinfo, J->fn);
lj_trace_err_info(J, LJ_TRERR_NYIFF);
UNUSED(rd);
if (J->cur.nins < (IRRef)J->param[JIT_P_minstitch] + REF_BASE) {
lj_trace_err_info(J, LJ_TRERR_TRACEUV);
} else {
/* Can only stitch from Lua call. */
if (J->framedepth && frame_islua(J->L->base-1)) {
BCOp op = bc_op(*frame_pc(J->L->base-1));
/* Stitched trace cannot start with *M op with variable # of args. */
if (!(op == BC_CALLM || op == BC_RETM || op == BC_TSETM)) {
switch (J->fn->c.ffid) {
case FF_error:
case FF_debug_sethook:
case FF_jit_flush:
break; /* Don't stitch across special builtins. */
default:
recff_stitch(J); /* Use trace stitching. */
rd->nres = -1;
return;
}
}
}
/* Otherwise stop trace and return to interpreter. */
lj_record_stop(J, LJ_TRLINK_RETURN, 0);
rd->nres = -1;
}
}
/* C functions can have arbitrary side-effects and are not recorded (yet). */
static void LJ_FASTCALL recff_c(jit_State *J, RecordFFData *rd)
{
setfuncV(J->L, &J->errinfo, J->fn);
lj_trace_err_info(J, LJ_TRERR_NYICF);
UNUSED(rd);
}
/* Fallback handler for unsupported variants of fast functions. */
#define recff_nyiu recff_nyi
/* Must stop the trace for classic C functions with arbitrary side-effects. */
#define recff_c recff_nyi
/* Emit BUFHDR for the global temporary buffer. */
static TRef recff_bufhdr(jit_State *J)
@ -268,7 +321,8 @@ static void LJ_FASTCALL recff_select(jit_State *J, RecordFFData *rd)
J->base[i] = J->base[start+i];
} /* else: Interpreter will throw. */
} else {
recff_nyiu(J);
recff_nyiu(J, rd);
return;
}
} /* else: Interpreter will throw. */
}
@ -279,14 +333,18 @@ static void LJ_FASTCALL recff_tonumber(jit_State *J, RecordFFData *rd)
TRef base = J->base[1];
if (tr && !tref_isnil(base)) {
base = lj_opt_narrow_toint(J, base);
if (!tref_isk(base) || IR(tref_ref(base))->i != 10)
recff_nyiu(J);
if (!tref_isk(base) || IR(tref_ref(base))->i != 10) {
recff_nyiu(J, rd);
return;
}
}
if (tref_isnumber_str(tr)) {
if (tref_isstr(tr)) {
TValue tmp;
if (!lj_strscan_num(strV(&rd->argv[0]), &tmp))
recff_nyiu(J); /* Would need an inverted STRTO for this case. */
if (!lj_strscan_num(strV(&rd->argv[0]), &tmp)) {
recff_nyiu(J, rd); /* Would need an inverted STRTO for this case. */
return;
}
tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
}
#if LJ_HASFFI
@ -348,7 +406,8 @@ static void LJ_FASTCALL recff_tostring(jit_State *J, RecordFFData *rd)
} else if (tref_ispri(tr)) {
J->base[0] = lj_ir_kstr(J, lj_strfmt_obj(J->L, &rd->argv[0]));
} else {
recff_nyiu(J);
recff_nyiu(J, rd);
return;
}
}
}
@ -370,14 +429,14 @@ static void LJ_FASTCALL recff_ipairs_aux(jit_State *J, RecordFFData *rd)
} /* else: Interpreter will throw. */
}
static void LJ_FASTCALL recff_ipairs(jit_State *J, RecordFFData *rd)
static void LJ_FASTCALL recff_xpairs(jit_State *J, RecordFFData *rd)
{
if (!(LJ_52 && recff_metacall(J, rd, MM_ipairs))) {
TRef tab = J->base[0];
if (tref_istab(tab)) {
J->base[0] = lj_ir_kfunc(J, funcV(&J->fn->c.upvalue[0]));
J->base[1] = tab;
J->base[2] = lj_ir_kint(J, 0);
J->base[2] = rd->data ? lj_ir_kint(J, 0) : TREF_NIL;
rd->nres = 3;
} /* else: Interpreter will throw. */
}
@ -431,8 +490,7 @@ static void LJ_FASTCALL recff_getfenv(jit_State *J, RecordFFData *rd)
J->base[0] = emitir(IRT(IR_FLOAD, IRT_TAB), trl, IRFL_THREAD_ENV);
return;
}
recff_nyiu(J);
UNUSED(rd);
recff_nyiu(J, rd);
}
/* -- Math library fast functions ----------------------------------------- */
@ -672,8 +730,7 @@ static void LJ_FASTCALL recff_bit_tohex(jit_State *J, RecordFFData *rd)
TRef tr = recff_bit64_tohex(J, rd, hdr);
J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
#else
UNUSED(rd);
recff_nyiu(J); /* Don't bother working around this NYI. */
recff_nyiu(J, rd); /* Don't bother working around this NYI. */
#endif
}
@ -891,7 +948,8 @@ static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd)
J->base[0] = TREF_NIL;
}
} else { /* Search for pattern. */
recff_nyiu(J);
recff_nyiu(J, rd);
return;
}
}
@ -931,7 +989,8 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
tr = lj_ir_call(J, IRCALL_lj_strfmt_putfxint, tr, trsf, tra);
lj_needsplit(J);
#else
recff_nyiu(J); /* Don't bother working around this NYI. */
recff_nyiu(J, rd); /* Don't bother working around this NYI. */
return;
#endif
}
break;
@ -946,8 +1005,10 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
if (LJ_SOFTFP) lj_needsplit(J);
break;
case STRFMT_STR:
if (!tref_isstr(tra))
recff_nyiu(J); /* NYI: __tostring and non-string types for %s. */
if (!tref_isstr(tra)) {
recff_nyiu(J, rd); /* NYI: __tostring and non-string types for %s. */
return;
}
if (sf == STRFMT_STR) /* Shortcut for plain %s. */
tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, tra);
else if ((sf & STRFMT_T_QUOTED))
@ -966,8 +1027,8 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
case STRFMT_PTR: /* NYI */
case STRFMT_ERR:
default:
recff_nyiu(J);
break;
recff_nyiu(J, rd);
return;
}
}
J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
@ -991,7 +1052,8 @@ static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd)
ix.idxchain = 0;
lj_record_idx(J, &ix); /* Set new value. */
} else { /* Complex case: insert in the middle. */
recff_nyiu(J);
recff_nyiu(J, rd);
return;
}
} /* else: Interpreter will throw. */
}

View File

@ -97,6 +97,7 @@
_(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \
_(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \
_(\007, maxsnap, 500) /* Max. # of snapshots for a trace. */ \
_(\011, minstitch, 0) /* Min. # of IR ins for a stitched trace. */ \
\
_(\007, hotloop, 56) /* # of iter. to detect a hot loop/call. */ \
_(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \
@ -202,7 +203,8 @@ typedef enum {
LJ_TRLINK_UPREC, /* Up-recursion. */
LJ_TRLINK_DOWNREC, /* Down-recursion. */
LJ_TRLINK_INTERP, /* Fallback to interpreter. */
LJ_TRLINK_RETURN /* Return to interpreter. */
LJ_TRLINK_RETURN, /* Return to interpreter. */
LJ_TRLINK_STITCH /* Trace stitching. */
} TraceLink;
/* Trace object. */

View File

@ -233,7 +233,7 @@ static void canonicalize_slots(jit_State *J)
}
/* Stop recording. */
static void rec_stop(jit_State *J, TraceLink linktype, TraceNo lnk)
void lj_record_stop(jit_State *J, TraceLink linktype, TraceNo lnk)
{
lj_trace_end(J);
J->cur.linktype = (uint8_t)linktype;
@ -501,8 +501,7 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl)
static LoopEvent rec_iterl(jit_State *J, const BCIns iterins)
{
BCReg ra = bc_a(iterins);
lua_assert(J->base[ra] != 0);
if (!tref_isnil(J->base[ra])) { /* Looping back? */
if (!tref_isnil(getslot(J, ra))) { /* Looping back? */
J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */
J->maxslot = ra-1+bc_b(J->pc[-1]);
J->pc += bc_j(iterins)+1;
@ -540,12 +539,12 @@ static int innerloopleft(jit_State *J, const BCIns *pc)
/* Handle the case when an interpreted loop op is hit. */
static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev)
{
if (J->parent == 0) {
if (J->parent == 0 && J->exitno == 0) {
if (pc == J->startpc && J->framedepth + J->retdepth == 0) {
/* Same loop? */
if (ev == LOOPEV_LEAVE) /* Must loop back to form a root trace. */
lj_trace_err(J, LJ_TRERR_LLEAVE);
rec_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping root trace. */
lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping trace. */
} else if (ev != LOOPEV_LEAVE) { /* Entering inner loop? */
/* It's usually better to abort here and wait until the inner loop
** is traced. But if the inner loop repeatedly didn't loop back,
@ -570,15 +569,15 @@ static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev)
/* Handle the case when an already compiled loop op is hit. */
static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev)
{
if (J->parent == 0) { /* Root trace hit an inner loop. */
if (J->parent == 0 && J->exitno == 0) { /* Root trace hit an inner loop. */
/* Better let the inner loop spawn a side trace back here. */
lj_trace_err(J, LJ_TRERR_LINNER);
} else if (ev != LOOPEV_LEAVE) { /* Side trace enters a compiled loop. */
J->instunroll = 0; /* Cannot continue across a compiled loop op. */
if (J->pc == J->startpc && J->framedepth + J->retdepth == 0)
rec_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Form an extra loop. */
lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Form extra loop. */
else
rec_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the loop. */
lj_record_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the loop. */
} /* Side trace continues across a loop that's left or not entered. */
}
@ -643,6 +642,18 @@ static TRef rec_call_specialize(jit_State *J, GCfunc *fn, TRef tr)
(void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); /* Prevent GC of proto. */
return tr;
}
} else {
/* Don't specialize to non-monomorphic builtins. */
switch (fn->c.ffid) {
case FF_coroutine_wrap_aux:
case FF_string_gmatch_aux:
/* NYI: io_file_iter doesn't have an ffid, yet. */
/* NYI: specialize to ffid? Not strictly necessary, trace will stop. */
return tr;
default:
/* NYI: don't specialize to non-monomorphic C functions. */
break;
}
}
/* Otherwise specialize to the function (closure) value itself. */
kfunc = lj_ir_kfunc(J, fn);
@ -750,12 +761,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
/* Return to lower frame via interpreter for unhandled cases. */
if (J->framedepth == 0 && J->pt && bc_isret(bc_op(*J->pc)) &&
(!frame_islua(frame) ||
(J->parent == 0 && !bc_isret(bc_op(J->cur.startins))))) {
(J->parent == 0 && J->exitno == 0 &&
!bc_isret(bc_op(J->cur.startins))))) {
/* NYI: specialize to frame type and return directly, not via RET*. */
for (i = -1; i < (ptrdiff_t)rbase; i++)
J->base[i] = 0; /* Purge dead slots. */
J->maxslot = rbase + (BCReg)gotresults;
rec_stop(J, LJ_TRLINK_RETURN, 0); /* Return to interpreter. */
lj_record_stop(J, LJ_TRLINK_RETURN, 0); /* Return to interpreter. */
return;
}
if (frame_isvarg(frame)) {
@ -779,7 +791,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
if (check_downrec_unroll(J, pt)) {
J->maxslot = (BCReg)(rbase + gotresults);
lj_snap_purge(J);
rec_stop(J, LJ_TRLINK_DOWNREC, J->cur.traceno); /* Down-recursion. */
lj_record_stop(J, LJ_TRLINK_DOWNREC, J->cur.traceno); /* Down-rec. */
return;
}
lj_snap_add(J);
@ -792,7 +804,8 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
lua_assert(J->baseslot > cbase+1);
J->baseslot -= cbase+1;
J->base -= cbase+1;
} else if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) {
} else if (J->parent == 0 && J->exitno == 0 &&
!bc_isret(bc_op(J->cur.startins))) {
/* Return to lower frame would leave the loop in a root trace. */
lj_trace_err(J, LJ_TRERR_LLEAVE);
} else { /* Return to lower frame. Guard for the target we return to. */
@ -1480,9 +1493,9 @@ static void check_call_unroll(jit_State *J, TraceNo lnk)
if (count + J->tailcalled > J->param[JIT_P_recunroll]) {
J->pc++;
if (J->framedepth + J->retdepth == 0)
rec_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Tail-recursion. */
lj_record_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Tail-rec. */
else
rec_stop(J, LJ_TRLINK_UPREC, J->cur.traceno); /* Up-recursion. */
lj_record_stop(J, LJ_TRLINK_UPREC, J->cur.traceno); /* Up-recursion. */
}
} else {
if (count > J->param[JIT_P_callunroll]) {
@ -1556,9 +1569,9 @@ static void rec_func_jit(jit_State *J, TraceNo lnk)
}
J->instunroll = 0; /* Cannot continue across a compiled function. */
if (J->pc == J->startpc && J->framedepth + J->retdepth == 0)
rec_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Extra tail-recursion. */
lj_record_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Extra tail-rec. */
else
rec_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the function. */
lj_record_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the function. */
}
/* -- Vararg handling ----------------------------------------------------- */
@ -2165,7 +2178,7 @@ void lj_record_ins(jit_State *J)
case BC_JFORI:
lua_assert(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL);
if (rec_for(J, pc, 0) != LOOPEV_LEAVE) /* Link to existing loop. */
rec_stop(J, LJ_TRLINK_ROOT, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J]));
lj_record_stop(J, LJ_TRLINK_ROOT, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J]));
/* Continue tracing if the loop is not entered. */
break;
@ -2299,6 +2312,12 @@ static const BCIns *rec_setup_root(jit_State *J)
J->maxslot = J->pt->numparams;
pc++;
break;
case BC_CALLM:
case BC_CALL:
case BC_ITERC:
/* No bytecode range check for stitched traces. */
pc++;
break;
default:
lua_assert(0);
break;
@ -2366,7 +2385,7 @@ void lj_record_setup(jit_State *J)
if (traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] ||
T->snap[J->exitno].count >= J->param[JIT_P_hotexit] +
J->param[JIT_P_tryside]) {
rec_stop(J, LJ_TRLINK_INTERP, 0);
lj_record_stop(J, LJ_TRLINK_INTERP, 0);
}
} else { /* Root trace. */
J->cur.root = 0;
@ -2378,6 +2397,8 @@ void lj_record_setup(jit_State *J)
lj_snap_add(J);
if (bc_op(J->cur.startins) == BC_FORL)
rec_for_loop(J, J->pc-1, &J->scev, 1);
else if (bc_op(J->cur.startins) == BC_ITERC)
J->startpc = NULL;
if (1 + J->pt->framesize >= LJ_MAX_JSLOTS)
lj_trace_err(J, LJ_TRERR_STACKOV);
}

View File

@ -28,6 +28,7 @@ typedef struct RecordIndex {
LJ_FUNC int lj_record_objcmp(jit_State *J, TRef a, TRef b,
cTValue *av, cTValue *bv);
LJ_FUNC void lj_record_stop(jit_State *J, TraceLink linktype, TraceNo lnk);
LJ_FUNC TRef lj_record_constify(jit_State *J, cTValue *o);
LJ_FUNC void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs);

View File

@ -97,7 +97,8 @@ static BCReg snapshot_framelinks(jit_State *J, SnapEntry *map)
{
cTValue *frame = J->L->base - 1;
cTValue *lim = J->L->base - J->baseslot;
cTValue *ftop = frame + funcproto(frame_func(frame))->framesize;
GCfunc *fn = frame_func(frame);
cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top;
MSize f = 0;
map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */
while (frame > lim) { /* Backwards traversal of all frames above base. */

View File

@ -360,7 +360,7 @@ static void trace_start(jit_State *J)
TraceNo traceno;
if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */
if (J->parent == 0) {
if (J->parent == 0 && J->exitno == 0) {
/* Lazy bytecode patching to disable hotcount events. */
lua_assert(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL ||
bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF);
@ -453,6 +453,12 @@ static void trace_stop(jit_State *J)
root->nextside = (TraceNo1)traceno;
}
break;
case BC_CALLM:
case BC_CALL:
case BC_ITERC:
/* Trace stitching: patch link of previous trace. */
traceref(J, J->exitno)->link = traceno;
break;
default:
lua_assert(0);
break;
@ -502,8 +508,12 @@ static int trace_abort(jit_State *J)
return 1; /* Retry ASM with new MCode area. */
}
/* Penalize or blacklist starting bytecode instruction. */
if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins)))
penalty_pc(J, &gcref(J->cur.startpt)->pt, mref(J->cur.startpc, BCIns), e);
if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) {
if (J->exitno == 0)
penalty_pc(J, &gcref(J->cur.startpt)->pt, mref(J->cur.startpc, BCIns), e);
else
traceref(J, J->exitno)->link = J->exitno; /* Self-link is blacklisted. */
}
/* Is there anything to abort? */
traceno = J->cur.traceno;
@ -680,6 +690,20 @@ static void trace_hotside(jit_State *J, const BCIns *pc)
}
}
/* Stitch a new trace to the previous trace. */
void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc)
{
/* Only start a new trace if not recording or inside __gc call or vmevent. */
if (J->state == LJ_TRACE_IDLE &&
!(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT))) {
J->parent = 0; /* Have to treat it like a root trace. */
/* J->exitno is set to the invoking trace. */
J->state = LJ_TRACE_START;
lj_trace_ins(J, pc);
}
}
/* Tiny struct to pass data to protected call. */
typedef struct ExitDataCP {
jit_State *J;

View File

@ -34,6 +34,7 @@ LJ_FUNC void lj_trace_freestate(global_State *g);
/* Event handling. */
LJ_FUNC void lj_trace_ins(jit_State *J, const BCIns *pc);
LJ_FUNCA void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc);
LJ_FUNCA void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc);
LJ_FUNCA int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr);
/* Signal asynchronous abort of trace or end of trace. */

View File

@ -7,6 +7,7 @@
/* Recording. */
TREDEF(RECERR, "error thrown or hook called during recording")
TREDEF(TRACEUV, "trace too short")
TREDEF(TRACEOV, "trace too long")
TREDEF(STACKOV, "trace too deep")
TREDEF(SNAPOV, "too many snapshots")
@ -23,8 +24,6 @@ TREDEF(BADTYPE, "bad argument type")
TREDEF(CJITOFF, "JIT compilation disabled for function")
TREDEF(CUNROLL, "call unroll limit reached")
TREDEF(DOWNREC, "down-recursion, restarting")
TREDEF(NYICF, "NYI: C function %p")
TREDEF(NYIFF, "NYI: FastFunc %s")
TREDEF(NYIFFU, "NYI: unsupported variant of FastFunc %s")
TREDEF(NYIRETL, "NYI: return to lower frame")

View File

@ -107,6 +107,7 @@ LJ_ASMF void lj_cont_nop(void); /* Do nothing, just continue execution. */
LJ_ASMF void lj_cont_condt(void); /* Branch if result is true. */
LJ_ASMF void lj_cont_condf(void); /* Branch if result is false. */
LJ_ASMF void lj_cont_hook(void); /* Continue from hook yield. */
LJ_ASMF void lj_cont_stitch(void); /* Trace stitching. */
enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */

View File

@ -2082,6 +2082,55 @@ static void build_subroutines(BuildCtx *ctx)
| ldr INS, [PC, #-4]
| bx CRET1
|
|->cont_stitch: // Trace stitching.
|.if JIT
| // RA = resultptr, CARG4 = meta base
| ldr RB, SAVE_MULTRES
| ldr INS, [PC, #-4]
| ldr CARG3, [CARG4, #-24] // Save previous trace number.
| subs RB, RB, #8
| decode_RA8 RC, INS // Call base.
| beq >2
|1: // Move results down.
| ldrd CARG12, [RA]
| add RA, RA, #8
| subs RB, RB, #8
| strd CARG12, [BASE, RC]
| add RC, RC, #8
| bne <1
|2:
| decode_RA8 RA, INS
| decode_RB8 RB, INS
| add RA, RA, RB
| ldr CARG1, [DISPATCH, #DISPATCH_J(trace)]
|3:
| cmp RA, RC
| mvn CARG2, #~LJ_TNIL
| bhi >9 // More results wanted?
|
| ldr TRACE:RA, [CARG1, CARG3, lsl #2]
| ldrh RC, TRACE:RA->link
| cmp RC, CARG3
| beq ->cont_nop // Blacklisted.
| cmp RC, #0
| bne =>BC_JLOOP // Jump to stitched trace.
|
| // Stitch a new trace to the previous trace.
| str CARG3, [DISPATCH, #DISPATCH_J(exitno)]
| str L, [DISPATCH, #DISPATCH_J(L)]
| str BASE, L->base
| sub CARG1, DISPATCH, #-GG_DISP2J
| mov CARG2, PC
| bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
| ldr BASE, L->base
| b ->cont_nop
|
|9: // Fill up results with nil.
| strd CARG12, [BASE, RC]
| add RC, RC, #8
| b <3
|.endif
|
|->vm_profhook: // Dispatch target for profiler hook.
#if LJ_HASPROFILE
| mov CARG1, L
@ -2166,6 +2215,7 @@ static void build_subroutines(BuildCtx *ctx)
| lsrlo RC, INS, #16 // No: Decode operands A*8 and D.
| subhs RC, RC, #8
| addhs RA, RA, BASE // Yes: RA = BASE+framesize*8, RC = nargs*8
| ldrhs CARG3, [BASE, FRAME_FUNC]
| bx OP
|
|3: // Rethrow error from the right C frame.

View File

@ -2011,6 +2011,60 @@ static void build_subroutines(BuildCtx *ctx)
| jr CRET1
|. lw INS, -4(PC)
|
|->cont_stitch: // Trace stitching.
|.if JIT
| // RA = resultptr, RB = meta base
| lw INS, -4(PC)
| lw TMP3, -24+LO(RB) // Save previous trace number.
| decode_RA8a RC, INS
| addiu AT, MULTRES, -8
| decode_RA8b RC
| beqz AT, >2
|. addu RC, BASE, RC // Call base.
|1: // Move results down.
| ldc1 f0, 0(RA)
| addiu AT, AT, -8
| addiu RA, RA, 8
| sdc1 f0, 0(RC)
| bnez AT, <1
|. addiu RC, RC, 8
|2:
| decode_RA8a RA, INS
| decode_RB8a RB, INS
| decode_RA8b RA
| decode_RB8b RB
| addu RA, RA, RB
| lw TMP1, DISPATCH_J(trace)(DISPATCH)
| addu RA, BASE, RA
|3:
| sltu AT, RC, RA
| bnez AT, >9 // More results wanted?
|. sll TMP2, TMP3, 2
|
| addu TMP2, TMP1, TMP2
| lw TRACE:TMP2, 0(TMP2)
| lhu RD, TRACE:TMP2->link
| beq RD, TMP3, ->cont_nop // Blacklisted.
|. load_got lj_dispatch_stitch
| bnez RD, =>BC_JLOOP // Jump to stitched trace.
|. sll RD, RD, 3
|
| // Stitch a new trace to the previous trace.
| sw TMP3, DISPATCH_J(exitno)(DISPATCH)
| sw L, DISPATCH_J(L)(DISPATCH)
| sw BASE, L->base
| addiu CARG1, DISPATCH, GG_DISP2J
| call_intern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
|. move CARG2, PC
| b ->cont_nop
|. lw BASE, L->base
|
|9:
| sw TISNIL, HI(RC)
| b <3
|. addiu RC, RC, 8
|.endif
|
|->vm_profhook: // Dispatch target for profiler hook.
#if LJ_HASPROFILE
| load_got lj_dispatch_profile
@ -2091,13 +2145,13 @@ static void build_subroutines(BuildCtx *ctx)
| sw BASE, L->base
|1:
| bltz CRET1, >3 // Check for error from exit.
|. lw LFUNC:TMP1, FRAME_FUNC(BASE)
|. lw LFUNC:RB, FRAME_FUNC(BASE)
| lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
| sll MULTRES, CRET1, 3
| li TISNIL, LJ_TNIL
| sw MULTRES, SAVE_MULTRES
| mtc1 TMP3, TOBIT
| lw TMP1, LFUNC:TMP1->pc
| lw TMP1, LFUNC:RB->pc
| sw r0, DISPATCH_GL(jit_base)(DISPATCH)
| lw KBASE, PC2PROTO(k)(TMP1)
| cvt.d.s TOBIT, TOBIT

View File

@ -2505,6 +2505,55 @@ static void build_subroutines(BuildCtx *ctx)
| mtctr CRET1
| bctr
|
|->cont_stitch: // Trace stitching.
|.if JIT
| // RA = resultptr, RB = meta base
| lwz INS, -4(PC)
| lwz TMP3, -20(RB) // Save previous trace number.
| addic. TMP1, MULTRES, -8
| decode_RA8 RC, INS // Call base.
| beq >2
|1: // Move results down.
| lfd f0, 0(RA)
| addic. TMP1, TMP1, -8
| addi RA, RA, 8
| stfdx f0, BASE, RC
| addi RC, RC, 8
| bne <1
|2:
| decode_RA8 RA, INS
| decode_RB8 RB, INS
| add RA, RA, RB
| lwz TMP1, DISPATCH_J(trace)(DISPATCH)
|3:
| cmplw RA, RC
| bgt >9 // More results wanted?
|
| slwi TMP2, TMP3, 2
| lwzx TRACE:TMP2, TMP1, TMP2
| lhz RD, TRACE:TMP2->link
| cmpw RD, TMP3
| cmpwi cr1, RD, 0
| beq ->cont_nop // Blacklisted.
| slwi RD, RD, 3
| bne cr1, =>BC_JLOOP // Jump to stitched trace.
|
| // Stitch a new trace to the previous trace.
| stw TMP3, DISPATCH_J(exitno)(DISPATCH)
| stp L, DISPATCH_J(L)(DISPATCH)
| stp BASE, L->base
| addi CARG1, DISPATCH, GG_DISP2J
| mr CARG2, PC
| bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
| lp BASE, L->base
| b ->cont_nop
|
|9:
| stwx TISNIL, BASE, RC
| addi RC, RC, 8
| b <3
|.endif
|
|->vm_profhook: // Dispatch target for profiler hook.
#if LJ_HASPROFILE
| mr CARG1, L
@ -2557,7 +2606,7 @@ static void build_subroutines(BuildCtx *ctx)
| sub CARG3, TMP0, CARG3 // Compute exit number.
| lp BASE, DISPATCH_GL(jit_base)(DISPATCH)
| srwi CARG3, CARG3, 2
| stw L, DISPATCH_J(L)(DISPATCH)
| stp L, DISPATCH_J(L)(DISPATCH)
| subi CARG3, CARG3, 2
| stp BASE, L->base
| stw CARG4, DISPATCH_J(parent)(DISPATCH)
@ -2589,11 +2638,11 @@ static void build_subroutines(BuildCtx *ctx)
|1:
| cmpwi CARG1, 0
| blt >3 // Check for error from exit.
| lwz LFUNC:TMP1, FRAME_FUNC(BASE)
| lwz LFUNC:RB, FRAME_FUNC(BASE)
| slwi MULTRES, CARG1, 3
| li TMP2, 0
| stw MULTRES, SAVE_MULTRES
| lwz TMP1, LFUNC:TMP1->pc
| lwz TMP1, LFUNC:RB->pc
| stw TMP2, DISPATCH_GL(jit_base)(DISPATCH)
| lwz KBASE, PC2PROTO(k)(TMP1)
| // Setup type comparison constants.

View File

@ -2659,6 +2659,67 @@ static void build_subroutines(BuildCtx *ctx)
| add NARGS:RD, 1
| jmp RBa
|
|->cont_stitch: // Trace stitching.
|.if JIT
| // BASE = base, RC = result, RB = mbase
| mov RA, [RB-24] // Save previous trace number.
| mov TMP1, RA
| mov TMP3, DISPATCH // Need one more register.
| mov DISPATCH, MULTRES
| movzx RA, PC_RA
| lea RA, [BASE+RA*8] // Call base.
| sub DISPATCH, 1
| jz >2
|1: // Move results down.
|.if X64
| mov RBa, [RC]
| mov [RA], RBa
|.else
| mov RB, [RC]
| mov [RA], RB
| mov RB, [RC+4]
| mov [RA+4], RB
|.endif
| add RC, 8
| add RA, 8
| sub DISPATCH, 1
| jnz <1
|2:
| movzx RC, PC_RA
| movzx RB, PC_RB
| add RC, RB
| lea RC, [BASE+RC*8-8]
|3:
| cmp RC, RA
| ja >9 // More results wanted?
|
| mov DISPATCH, TMP3
| mov RB, TMP1 // Get previous trace number.
| mov RA, [DISPATCH+DISPATCH_J(trace)]
| mov TRACE:RD, [RA+RB*4]
| movzx RD, word TRACE:RD->link
| cmp RD, RB
| je ->cont_nop // Blacklisted.
| test RD, RD
| jne =>BC_JLOOP // Jump to stitched trace.
|
| // Stitch a new trace to the previous trace.
| mov [DISPATCH+DISPATCH_J(exitno)], RB
| mov L:RB, SAVE_L
| mov L:RB->base, BASE
| mov FCARG2, PC
| lea FCARG1, [DISPATCH+GG_DISP2J]
| mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
| call extern lj_dispatch_stitch@8 // (jit_State *J, const BCIns *pc)
| mov BASE, L:RB->base
| jmp ->cont_nop
|
|9: // Fill up results with nil.
| mov dword [RA+4], LJ_TNIL
| add RA, 8
| jmp <3
|.endif
|
|->vm_profhook: // Dispatch target for profiler hook.
#if LJ_HASPROFILE
| mov L:RB, SAVE_L
@ -5382,7 +5443,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| ins_A // RA = base, RD = target (loop extent)
| // Note: RA/RD is only used by trace recorder to determine scope/extent
| // This opcode does NOT jump, it's only purpose is to detect a hot loop.
|.if JIT
|.if JIT
| hotloop RB
|.endif
| // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.