Re-enable trace stitching.

Thanks to Vyacheslav Egorov.
This commit is contained in:
Mike Pall 2015-08-29 23:22:29 +02:00
parent ad29314c2c
commit a3a6866d4c
13 changed files with 60 additions and 84 deletions

View File

@ -90,7 +90,7 @@ Please take a look at the commit history for more details.
</ul></li>
<li>Improvements to the JIT compiler:
<ul>
<li>Add trace stitching (disabled for now).</li>
<li>Add trace stitching.</li>
<li>Compile various builtins: <tt>string.char()</tt>, <tt>string.reverse()</tt>, <tt>string.lower()</tt>, <tt>string.upper()</tt>, <tt>string.rep()</tt>, <tt>string.format()</tt>, <tt>table.concat()</tt>, <tt>bit.tohex()</tt>, <tt>getfenv(0)</tt>, <tt>debug.getmetatable()</tt>.</li>
<li>Compile <tt>string.find()</tt> for fixed string searches (no patterns).</li>
<li>Compile <tt>BC_TSETM</tt>, e.g. <tt>{1,2,3,f()}</tt>.</li>

View File

@ -96,18 +96,10 @@ static ptrdiff_t results_wanted(jit_State *J)
return -1;
}
#ifdef LUAJIT_TRACE_STITCHING
/* This feature is disabled for now due to a design mistake. Sorry.
**
** It causes unpredictable behavior and crashes when a full trace flush
** happens with a stitching continuation still in the stack somewhere.
*/
/* Trace stitching: add continuation below frame to start a new trace. */
static void recff_stitch(jit_State *J)
{
ASMFunction cont = lj_cont_stitch;
TraceNo traceno = J->cur.traceno;
lua_State *L = J->L;
TValue *base = L->base;
const BCIns *pc = frame_pc(base-1);
@ -120,7 +112,7 @@ static void recff_stitch(jit_State *J)
setframe_ftsz(base+1, ((char *)(base+1) - (char *)pframe) + FRAME_CONT);
setcont(base, cont);
setframe_pc(base, pc);
if (LJ_DUALNUM) setintV(base-1, traceno); else base[-1].u64 = traceno;
setnilV(base-1); /* Incorrect, but rec_check_slots() won't run anymore. */
L->base += 2;
L->top += 2;
@ -132,7 +124,9 @@ static void recff_stitch(jit_State *J)
trcont = lj_ir_kptr(J, (void *)cont);
#endif
J->base[0] = trcont | TREF_CONT;
J->base[-1] = LJ_DUALNUM ? lj_ir_kint(J,traceno) : lj_ir_knum_u64(J,traceno);
J->ktracep = lj_ir_k64_reserve(J);
lua_assert(irt_toitype_(IRT_P64) == LJ_TTRACE);
J->base[-1] = emitir(IRT(IR_XLOAD, IRT_P64), lj_ir_kptr(J, &J->ktracep->gcr), 0);
J->base += 2;
J->baseslot += 2;
J->framedepth++;
@ -181,31 +175,6 @@ static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd)
/* Must stop the trace for classic C functions with arbitrary side-effects. */
#define recff_c recff_nyi
#else
/* Fallback handler for fast functions that are not recorded (yet). */
static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd)
{
setfuncV(J->L, &J->errinfo, J->fn);
lj_trace_err_info(J, LJ_TRERR_NYIFF);
UNUSED(rd);
}
/* Throw error for unsupported variant of fast function. */
LJ_NORET static void recff_nyiu(jit_State *J, RecordFFData *rd)
{
setfuncV(J->L, &J->errinfo, J->fn);
lj_trace_err_info(J, LJ_TRERR_NYIFFU);
UNUSED(rd);
}
/* Must abort the trace for classic C functions with arbitrary side-effects. */
static void LJ_FASTCALL recff_c(jit_State *J, RecordFFData *rd)
{
setfuncV(J->L, &J->errinfo, J->fn);
lj_trace_err_info(J, LJ_TRERR_NYICF);
UNUSED(rd);
}
#endif
/* Emit BUFHDR for the global temporary buffer. */
static TRef recff_bufhdr(jit_State *J)

View File

@ -69,7 +69,7 @@ static void gc_mark(global_State *g, GCobj *o)
gray2black(o); /* Closed upvalues are never gray. */
} else if (gct != ~LJ_TSTR && gct != ~LJ_TCDATA) {
lua_assert(gct == ~LJ_TFUNC || gct == ~LJ_TTAB ||
gct == ~LJ_TTHREAD || gct == ~LJ_TPROTO);
gct == ~LJ_TTHREAD || gct == ~LJ_TPROTO || gct == ~LJ_TTRACE);
setgcrefr(o->gch.gclist, g->gc.gray);
setgcref(g->gc.gray, o);
}

View File

@ -209,24 +209,13 @@ void lj_ir_k64_freeall(jit_State *J)
lj_mem_free(J2G(J), k, sizeof(K64Array));
k = next;
}
setmref(J->k64, NULL);
}
/* Find 64 bit constant in chained array or add it. */
cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64)
/* Get new 64 bit constant slot. */
static TValue *ir_k64_add(jit_State *J, K64Array *kp, uint64_t u64)
{
K64Array *k, *kp = NULL;
TValue *ntv;
MSize idx;
/* Search for the constant in the whole chain of arrays. */
for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) {
kp = k; /* Remember previous element in list. */
for (idx = 0; idx < k->numk; idx++) { /* Search one array. */
TValue *tv = &k->k[idx];
if (tv->u64 == u64) /* Needed for +-0/NaN/absmask. */
return tv;
}
}
/* Constant was not found, need to add it. */
if (!(kp && kp->numk < LJ_MIN_K64SZ)) { /* Allocate a new array. */
K64Array *kn = lj_mem_newt(J->L, sizeof(K64Array), K64Array);
setmref(kn->next, NULL);
@ -242,6 +231,33 @@ cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64)
return ntv;
}
/* Find 64 bit constant in chained array or add it. */
cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64)
{
K64Array *k, *kp = NULL;
MSize idx;
/* Search for the constant in the whole chain of arrays. */
for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) {
kp = k; /* Remember previous element in list. */
for (idx = 0; idx < k->numk; idx++) { /* Search one array. */
TValue *tv = &k->k[idx];
if (tv->u64 == u64) /* Needed for +-0/NaN/absmask. */
return tv;
}
}
/* Otherwise add a new constant. */
return ir_k64_add(J, kp, u64);
}
TValue *lj_ir_k64_reserve(jit_State *J)
{
K64Array *k, *kp = NULL;
lj_ir_k64_find(J, 0); /* Intern dummy 0 to protect the reserved slot. */
/* Find last K64Array, if any. */
for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) kp = k;
return ir_k64_add(J, kp, 0); /* Set to 0. Final value is set later. */
}
/* Intern 64 bit constant, given by its address. */
TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv)
{

View File

@ -40,6 +40,7 @@ static LJ_AINLINE IRRef lj_ir_nextins(jit_State *J)
LJ_FUNC TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k);
LJ_FUNC void lj_ir_k64_freeall(jit_State *J);
LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv);
LJ_FUNC TValue *lj_ir_k64_reserve(jit_State *J);
LJ_FUNC cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64);
LJ_FUNC TRef lj_ir_knum_u64(jit_State *J, uint64_t u64);
LJ_FUNC TRef lj_ir_knumint(jit_State *J, lua_Number n);

View File

@ -381,6 +381,7 @@ typedef struct jit_State {
GCRef *trace; /* Array of traces. */
TraceNo freetrace; /* Start of scan for next free trace. */
MSize sizetrace; /* Size of trace array. */
TValue *ktracep; /* Pointer to K64Array slot with GCtrace pointer. */
IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */
TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */

View File

@ -631,8 +631,8 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
} else if (irt_isnum(t)) {
setnumV(o, ex->fpr[r-RID_MIN_FPR]);
#endif
} else if (LJ_64 && irt_islightud(t)) {
/* 64 bit lightuserdata which may escape already has the tag bits. */
} else if (LJ_64 && irt_is64(t)) {
/* 64 bit values that already have the tag bits. */
o->u64 = ex->gpr[r-RID_MIN_GPR];
} else if (irt_ispri(t)) {
setpriV(o, irt_toitype(t));

View File

@ -274,7 +274,7 @@ int lj_trace_flushall(lua_State *L)
if (T->root == 0)
trace_flushroot(J, T);
lj_gdbjit_deltrace(J, T);
T->traceno = 0;
T->traceno = T->link = 0; /* Blacklist the link for cont_stitch. */
setgcrefnull(J->trace[i]);
}
}
@ -284,6 +284,7 @@ int lj_trace_flushall(lua_State *L)
memset(J->penalty, 0, sizeof(J->penalty));
/* Free the whole machine code and invalidate all exit stub groups. */
lj_mcode_free(J);
lj_ir_k64_freeall(J);
memset(J->exitstubgroup, 0, sizeof(J->exitstubgroup));
lj_vmevent_send(L, TRACE,
setstrV(L, L->top++, lj_str_newlit(L, "flush"));
@ -402,6 +403,7 @@ static void trace_start(jit_State *J)
J->postproc = LJ_POST_NONE;
lj_resetsplit(J);
J->retryrec = 0;
J->ktracep = NULL;
setgcref(J->cur.startpt, obj2gco(J->pt));
L = J->L;
@ -477,6 +479,9 @@ static void trace_stop(jit_State *J)
lj_mcode_commit(J, J->cur.mcode);
J->postproc = LJ_POST_NONE;
trace_save(J, T);
if (J->ktracep) { /* Patch K64Array slot with the final GCtrace pointer. */
setgcV(J->L, J->ktracep, obj2gco(T), LJ_TTRACE);
}
L = J->L;
lj_vmevent_send(L, TRACE,

View File

@ -25,8 +25,6 @@ TREDEF(BADTYPE, "bad argument type")
TREDEF(CJITOFF, "JIT compilation disabled for function")
TREDEF(CUNROLL, "call unroll limit reached")
TREDEF(DOWNREC, "down-recursion, restarting")
TREDEF(NYICF, "NYI: C function %s")
TREDEF(NYIFF, "NYI: FastFunc %s")
TREDEF(NYIFFU, "NYI: unsupported variant of FastFunc %s")
TREDEF(NYIRETL, "NYI: return to lower frame")

View File

@ -2086,7 +2086,7 @@ static void build_subroutines(BuildCtx *ctx)
| // RA = resultptr, CARG4 = meta base
| ldr RB, SAVE_MULTRES
| ldr INS, [PC, #-4]
| ldr CARG3, [CARG4, #-24] // Save previous trace number.
| ldr TRACE:CARG3, [CARG4, #-24] // Save previous trace.
| subs RB, RB, #8
| decode_RA8 RC, INS // Call base.
| beq >2
@ -2101,23 +2101,20 @@ static void build_subroutines(BuildCtx *ctx)
| decode_RA8 RA, INS
| decode_RB8 RB, INS
| add RA, RA, RB
| ldr CARG1, [DISPATCH, #DISPATCH_J(trace)]
|3:
| cmp RA, RC
| mvn CARG2, #~LJ_TNIL
| bhi >9 // More results wanted?
|
| ldr TRACE:RA, [CARG1, CARG3, lsl #2]
| cmp TRACE:RA, #0
| beq ->cont_nop
| ldrh RC, TRACE:RA->link
| cmp RC, CARG3
| ldrh RA, TRACE:CARG3->traceno
| ldrh RC, TRACE:CARG3->link
| cmp RC, RA
| beq ->cont_nop // Blacklisted.
| cmp RC, #0
| bne =>BC_JLOOP // Jump to stitched trace.
|
| // Stitch a new trace to the previous trace.
| str CARG3, [DISPATCH, #DISPATCH_J(exitno)]
| str RA, [DISPATCH, #DISPATCH_J(exitno)]
| str L, [DISPATCH, #DISPATCH_J(L)]
| str BASE, L->base
| sub CARG1, DISPATCH, #-GG_DISP2J

View File

@ -2015,7 +2015,7 @@ static void build_subroutines(BuildCtx *ctx)
|.if JIT
| // RA = resultptr, RB = meta base
| lw INS, -4(PC)
| lw TMP3, -24+LO(RB) // Save previous trace number.
| lw TMP2, -24+LO(RB) // Save previous trace.
| decode_RA8a RC, INS
| addiu AT, MULTRES, -8
| decode_RA8b RC
@ -2034,17 +2034,13 @@ static void build_subroutines(BuildCtx *ctx)
| decode_RA8b RA
| decode_RB8b RB
| addu RA, RA, RB
| lw TMP1, DISPATCH_J(trace)(DISPATCH)
| addu RA, BASE, RA
|3:
| sltu AT, RC, RA
| bnez AT, >9 // More results wanted?
|. sll TMP2, TMP3, 2
|. nop
|
| addu TMP2, TMP1, TMP2
| lw TRACE:TMP2, 0(TMP2)
| beqz TRACE:TMP2, ->cont_nop
|. nop
| lhu TMP3, TRACE:TMP2->traceno
| lhu RD, TRACE:TMP2->link
| beq RD, TMP3, ->cont_nop // Blacklisted.
|. load_got lj_dispatch_stitch

View File

@ -2525,7 +2525,7 @@ static void build_subroutines(BuildCtx *ctx)
|.if JIT
| // RA = resultptr, RB = meta base
| lwz INS, -4(PC)
| lwz TMP3, -20(RB) // Save previous trace number.
| lwz TRACE:TMP2, -20(RB) // Save previous trace.
| addic. TMP1, MULTRES, -8
| decode_RA8 RC, INS // Call base.
| beq >2
@ -2540,15 +2540,11 @@ static void build_subroutines(BuildCtx *ctx)
| decode_RA8 RA, INS
| decode_RB8 RB, INS
| add RA, RA, RB
| lwz TMP1, DISPATCH_J(trace)(DISPATCH)
|3:
| cmplw RA, RC
| bgt >9 // More results wanted?
|
| slwi TMP2, TMP3, 2
| lwzx TRACE:TMP2, TMP1, TMP2
| cmpwi TRACE:TMP2, 0
| beq ->cont_nop
| lhz TMP3, TRACE:TMP2->traceno
| lhz RD, TRACE:TMP2->link
| cmpw RD, TMP3
| cmpwi cr1, RD, 0

View File

@ -2667,8 +2667,8 @@ static void build_subroutines(BuildCtx *ctx)
|->cont_stitch: // Trace stitching.
|.if JIT
| // BASE = base, RC = result, RB = mbase
| mov RA, [RB-24] // Save previous trace number.
| mov TMP1, RA
| mov TRACE:RA, [RB-24] // Save previous trace.
| mov TMP1, TRACE:RA
| mov TMP3, DISPATCH // Need one more register.
| mov DISPATCH, MULTRES
| movzx RA, PC_RA
@ -2699,11 +2699,8 @@ static void build_subroutines(BuildCtx *ctx)
| ja >9 // More results wanted?
|
| mov DISPATCH, TMP3
| mov RB, TMP1 // Get previous trace number.
| mov RA, [DISPATCH+DISPATCH_J(trace)]
| mov TRACE:RD, [RA+RB*4]
| test TRACE:RD, TRACE:RD
| jz ->cont_nop
| mov TRACE:RD, TMP1 // Get previous trace.
| movzx RB, word TRACE:RD->traceno
| movzx RD, word TRACE:RD->link
| cmp RD, RB
| je ->cont_nop // Blacklisted.