diff --git a/doc/status.html b/doc/status.html index d386e1aa..5f8fa463 100644 --- a/doc/status.html +++ b/doc/status.html @@ -148,14 +148,6 @@ with me, before writing major improvements, to avoid duplication of effort.
  • -The trace compiler currently doesn't back off specialization for -function call dispatch. It should really fall back to specializing on -the prototype, not the closure identity. This can lead to the so-called -"trace explosion" problem with closure-heavy programming. The -trace linking heuristics prevent this, but in the worst case this -means the code always falls back to the interpreter. -
  • -
  • Trace management needs more tuning: less drastic countermeasures against trace explosion and better heuristics in general.
  • diff --git a/src/lj_func.c b/src/lj_func.c index 334ba4c8..97be0a2a 100644 --- a/src/lj_func.c +++ b/src/lj_func.c @@ -118,6 +118,7 @@ GCfunc *lj_func_newC(lua_State *L, MSize nelems, GCtab *env) static GCfunc *func_newL(lua_State *L, GCproto *pt, GCtab *env) { + uint32_t count; GCfunc *fn = (GCfunc *)lj_mem_newgco(L, sizeLfunc((MSize)pt->sizeuv)); fn->l.gct = ~LJ_TFUNC; fn->l.ffid = FF_LUA; @@ -125,6 +126,9 @@ static GCfunc *func_newL(lua_State *L, GCproto *pt, GCtab *env) /* NOBARRIER: Really a setgcref. But the GCfunc is new (marked white). */ setmref(fn->l.pc, proto_bc(pt)); setgcref(fn->l.env, obj2gco(env)); + /* Saturating 3 bit counter (0..7) for created closures. */ + count = (uint32_t)pt->flags + PROTO_CLCOUNT; + pt->flags = (uint8_t)(count - ((count >> PROTO_CLC_BITS) & PROTO_CLCOUNT)); return fn; } diff --git a/src/lj_ir.h b/src/lj_ir.h index 7a9f1a9c..8cf8129f 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h @@ -183,6 +183,7 @@ IRFPMDEF(FPMENUM) #define IRFLDEF(_) \ _(STR_LEN, offsetof(GCstr, len)) \ _(FUNC_ENV, offsetof(GCfunc, l.env)) \ + _(FUNC_PC, offsetof(GCfunc, l.pc)) \ _(TAB_META, offsetof(GCtab, metatable)) \ _(TAB_ARRAY, offsetof(GCtab, array)) \ _(TAB_NODE, offsetof(GCtab, node)) \ diff --git a/src/lj_obj.h b/src/lj_obj.h index afb29d0f..4a360df6 100644 --- a/src/lj_obj.h +++ b/src/lj_obj.h @@ -318,6 +318,9 @@ typedef struct GCproto { /* Only used during parsing. */ #define PROTO_HAS_RETURN 0x20 /* Already emitted a return. */ #define PROTO_FIXUP_RETURN 0x40 /* Need to fixup emitted returns. */ +/* Top bits used for counting created closures. */ +#define PROTO_CLCOUNT 0x20 /* Base of saturating 3 bit counter. */ +#define PROTO_CLC_BITS 3 #define proto_kgc(pt, idx) \ check_exp((uintptr_t)(intptr_t)(idx) >= (uintptr_t)-(intptr_t)(pt)->sizekgc, \ diff --git a/src/lj_parse.c b/src/lj_parse.c index f0bb4419..4b8a8e6f 100644 --- a/src/lj_parse.c +++ b/src/lj_parse.c @@ -1392,7 +1392,7 @@ static GCproto *fs_finish(LexState *ls, BCLine line) pt->gct = ~LJ_TPROTO; pt->sizept = (MSize)sizept; pt->trace = 0; - pt->flags = fs->flags; + pt->flags = (uint8_t)(fs->flags & ~(PROTO_HAS_RETURN|PROTO_FIXUP_RETURN)); pt->numparams = fs->numparams; pt->framesize = fs->framesize; setgcref(pt->chunkname, obj2gco(ls->chunkname)); diff --git a/src/lj_record.c b/src/lj_record.c index 61e09b6d..a76f5d94 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -563,12 +563,32 @@ static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev) /* -- Record calls and returns -------------------------------------------- */ +/* Specialize to the runtime value of the called function or its prototype. */ +static TRef rec_call_specialize(jit_State *J, GCfunc *fn, TRef tr) +{ + TRef kfunc; + if (isluafunc(fn)) { + GCproto *pt = funcproto(fn); + /* 3 or more closures created? Probably not a monomorphic function. */ + if (pt->flags >= 3*PROTO_CLCOUNT) { /* Specialize to prototype instead. */ + TRef trpt = emitir(IRT(IR_FLOAD, IRT_P32), tr, IRFL_FUNC_PC); + emitir(IRTG(IR_EQ, IRT_P32), trpt, lj_ir_kptr(J, proto_bc(pt))); + (void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); /* Prevent GC of proto. */ + return tr; + } + } + /* Otherwise specialize to the function (closure) value itself. */ + kfunc = lj_ir_kfunc(J, fn); + emitir(IRTG(IR_EQ, IRT_FUNC), tr, kfunc); + return kfunc; +} + /* Record call setup. */ static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs) { RecordIndex ix; TValue *functv = &J->L->base[func]; - TRef trfunc, *fbase = &J->base[func]; + TRef *fbase = &J->base[func]; ptrdiff_t i; for (i = 0; i <= nargs; i++) (void)getslot(J, func+i); /* Ensure func and all args have a reference. */ @@ -582,11 +602,7 @@ static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs) fbase[0] = ix.mobj; /* Replace function. */ functv = &ix.mobjv; } - - /* Specialize to the runtime value of the called function. */ - trfunc = lj_ir_kfunc(J, funcV(functv)); - emitir(IRTG(IR_EQ, IRT_FUNC), fbase[0], trfunc); - fbase[0] = trfunc | TREF_FRAME; + fbase[0] = TREF_FRAME | rec_call_specialize(J, funcV(functv), fbase[0]); J->maxslot = (BCReg)nargs; }