Specialize to prototype for non-monomorphic functions.

Solves the trace-explosion problem with closure-heavy programming.
This commit is contained in:
Mike Pall 2011-11-20 19:14:39 +01:00
parent cc7a12be93
commit cecbe3c15f
6 changed files with 31 additions and 15 deletions

View File

@ -148,14 +148,6 @@ with me, before writing major improvements, to avoid duplication of
effort.
</li>
<li>
The trace compiler currently doesn't back off specialization for
function call dispatch. It should really fall back to specializing on
the prototype, not the closure identity. This can lead to the so-called
"trace explosion" problem with <b>closure-heavy programming</b>. The
trace linking heuristics prevent this, but in the worst case this
means the code always falls back to the interpreter.
</li>
<li>
<b>Trace management</b> needs more tuning: less drastic countermeasures
against trace explosion and better heuristics in general.
</li>

View File

@ -118,6 +118,7 @@ GCfunc *lj_func_newC(lua_State *L, MSize nelems, GCtab *env)
static GCfunc *func_newL(lua_State *L, GCproto *pt, GCtab *env)
{
uint32_t count;
GCfunc *fn = (GCfunc *)lj_mem_newgco(L, sizeLfunc((MSize)pt->sizeuv));
fn->l.gct = ~LJ_TFUNC;
fn->l.ffid = FF_LUA;
@ -125,6 +126,9 @@ static GCfunc *func_newL(lua_State *L, GCproto *pt, GCtab *env)
/* NOBARRIER: Really a setgcref. But the GCfunc is new (marked white). */
setmref(fn->l.pc, proto_bc(pt));
setgcref(fn->l.env, obj2gco(env));
/* Saturating 3 bit counter (0..7) for created closures. */
count = (uint32_t)pt->flags + PROTO_CLCOUNT;
pt->flags = (uint8_t)(count - ((count >> PROTO_CLC_BITS) & PROTO_CLCOUNT));
return fn;
}

View File

@ -183,6 +183,7 @@ IRFPMDEF(FPMENUM)
#define IRFLDEF(_) \
_(STR_LEN, offsetof(GCstr, len)) \
_(FUNC_ENV, offsetof(GCfunc, l.env)) \
_(FUNC_PC, offsetof(GCfunc, l.pc)) \
_(TAB_META, offsetof(GCtab, metatable)) \
_(TAB_ARRAY, offsetof(GCtab, array)) \
_(TAB_NODE, offsetof(GCtab, node)) \

View File

@ -318,6 +318,9 @@ typedef struct GCproto {
/* Only used during parsing. */
#define PROTO_HAS_RETURN 0x20 /* Already emitted a return. */
#define PROTO_FIXUP_RETURN 0x40 /* Need to fixup emitted returns. */
/* Top bits used for counting created closures. */
#define PROTO_CLCOUNT 0x20 /* Base of saturating 3 bit counter. */
#define PROTO_CLC_BITS 3
#define proto_kgc(pt, idx) \
check_exp((uintptr_t)(intptr_t)(idx) >= (uintptr_t)-(intptr_t)(pt)->sizekgc, \

View File

@ -1392,7 +1392,7 @@ static GCproto *fs_finish(LexState *ls, BCLine line)
pt->gct = ~LJ_TPROTO;
pt->sizept = (MSize)sizept;
pt->trace = 0;
pt->flags = fs->flags;
pt->flags = (uint8_t)(fs->flags & ~(PROTO_HAS_RETURN|PROTO_FIXUP_RETURN));
pt->numparams = fs->numparams;
pt->framesize = fs->framesize;
setgcref(pt->chunkname, obj2gco(ls->chunkname));

View File

@ -563,12 +563,32 @@ static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev)
/* -- Record calls and returns -------------------------------------------- */
/* Specialize to the runtime value of the called function or its prototype. */
static TRef rec_call_specialize(jit_State *J, GCfunc *fn, TRef tr)
{
TRef kfunc;
if (isluafunc(fn)) {
GCproto *pt = funcproto(fn);
/* 3 or more closures created? Probably not a monomorphic function. */
if (pt->flags >= 3*PROTO_CLCOUNT) { /* Specialize to prototype instead. */
TRef trpt = emitir(IRT(IR_FLOAD, IRT_P32), tr, IRFL_FUNC_PC);
emitir(IRTG(IR_EQ, IRT_P32), trpt, lj_ir_kptr(J, proto_bc(pt)));
(void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); /* Prevent GC of proto. */
return tr;
}
}
/* Otherwise specialize to the function (closure) value itself. */
kfunc = lj_ir_kfunc(J, fn);
emitir(IRTG(IR_EQ, IRT_FUNC), tr, kfunc);
return kfunc;
}
/* Record call setup. */
static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs)
{
RecordIndex ix;
TValue *functv = &J->L->base[func];
TRef trfunc, *fbase = &J->base[func];
TRef *fbase = &J->base[func];
ptrdiff_t i;
for (i = 0; i <= nargs; i++)
(void)getslot(J, func+i); /* Ensure func and all args have a reference. */
@ -582,11 +602,7 @@ static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs)
fbase[0] = ix.mobj; /* Replace function. */
functv = &ix.mobjv;
}
/* Specialize to the runtime value of the called function. */
trfunc = lj_ir_kfunc(J, funcV(functv));
emitir(IRTG(IR_EQ, IRT_FUNC), fbase[0], trfunc);
fbase[0] = trfunc | TREF_FRAME;
fbase[0] = TREF_FRAME | rec_call_specialize(J, funcV(functv), fbase[0]);
J->maxslot = (BCReg)nargs;
}