Record calls to vararg functions.

This loop is now roughly 1000x faster than the Lua interpreter:
  local function f(a,b,...) end; for i=1,2e8 do f(1,2,i) end
Yet another silly microbenchmark -- I know.
This commit is contained in:
Mike Pall 2010-09-12 01:37:02 +02:00
parent b72ae54dc0
commit c2c08ba9b3
4 changed files with 56 additions and 16 deletions

View File

@ -384,17 +384,18 @@ void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc)
callhook(L, LUA_HOOKRET, -1); callhook(L, LUA_HOOKRET, -1);
} }
/* Initialize call. Ensure stack space and clear missing parameters. */ /* Initialize call. Ensure stack space and return # of missing parameters. */
static void call_init(lua_State *L, GCfunc *fn) static int call_init(lua_State *L, GCfunc *fn)
{ {
if (isluafunc(fn)) { if (isluafunc(fn)) {
MSize numparams = funcproto(fn)->numparams; int numparams = funcproto(fn)->numparams;
TValue *o; int gotparams = (int)(L->top - L->base);
lj_state_checkstack(L, numparams); lj_state_checkstack(L, (MSize)numparams);
for (o = L->base + numparams; L->top < o; L->top++) numparams -= gotparams;
setnilV(L->top); /* Clear missing parameters. */ return numparams >= 0 ? numparams : 0;
} else { } else {
lj_state_checkstack(L, LUA_MINSTACK); lj_state_checkstack(L, LUA_MINSTACK);
return 0;
} }
} }
@ -407,7 +408,7 @@ ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns *pc)
#if LJ_HASJIT #if LJ_HASJIT
jit_State *J = G2J(g); jit_State *J = G2J(g);
#endif #endif
call_init(L, fn); int missing = call_init(L, fn);
#if LJ_HASJIT #if LJ_HASJIT
J->L = L; J->L = L;
if ((uintptr_t)pc & 1) { /* Marker for hot call. */ if ((uintptr_t)pc & 1) { /* Marker for hot call. */
@ -420,8 +421,15 @@ ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns *pc)
lj_trace_ins(J, pc-1); /* The interpreter bytecode PC is offset by 1. */ lj_trace_ins(J, pc-1); /* The interpreter bytecode PC is offset by 1. */
} }
#endif #endif
if ((g->hookmask & LUA_MASKCALL)) if ((g->hookmask & LUA_MASKCALL)) {
int i;
for (i = 0; i < missing; i++) /* Add missing parameters. */
setnilV(L->top++);
callhook(L, LUA_HOOKCALL, -1); callhook(L, LUA_HOOKCALL, -1);
/* Preserve modifications of missing parameters by lua_setlocal(). */
while (missing-- > 0 && tvisnil(L->top - 1))
L->top--;
}
#if LJ_HASJIT #if LJ_HASJIT
out: out:
#endif #endif

View File

@ -570,6 +570,17 @@ static void rec_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
J->base[--rbase] = TREF_TRUE; /* Prepend true to results. */ J->base[--rbase] = TREF_TRUE; /* Prepend true to results. */
frame = frame_prevd(frame); frame = frame_prevd(frame);
} }
if (frame_isvarg(frame)) {
BCReg cbase = (BCReg)frame_delta(frame);
lua_assert(J->framedepth != 1);
if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */
lj_trace_err(J, LJ_TRERR_NYIRETL);
lua_assert(J->baseslot > 1);
rbase += cbase;
J->baseslot -= (BCReg)cbase;
J->base -= cbase;
frame = frame_prevd(frame);
}
if (frame_islua(frame)) { /* Return to Lua frame. */ if (frame_islua(frame)) { /* Return to Lua frame. */
BCIns callins = *(frame_pc(frame)-1); BCIns callins = *(frame_pc(frame)-1);
ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults; ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults;
@ -1840,7 +1851,6 @@ static void rec_func_setup(jit_State *J)
BCReg s, numparams = pt->numparams; BCReg s, numparams = pt->numparams;
if ((pt->flags & PROTO_NO_JIT)) if ((pt->flags & PROTO_NO_JIT))
lj_trace_err(J, LJ_TRERR_CJITOFF); lj_trace_err(J, LJ_TRERR_CJITOFF);
lua_assert(!(pt->flags & PROTO_IS_VARARG));
if (J->baseslot + pt->framesize >= LJ_MAX_JSLOTS) if (J->baseslot + pt->framesize >= LJ_MAX_JSLOTS)
lj_trace_err(J, LJ_TRERR_STACKOV); lj_trace_err(J, LJ_TRERR_STACKOV);
/* Fill up missing parameters with nil. */ /* Fill up missing parameters with nil. */
@ -1850,6 +1860,27 @@ static void rec_func_setup(jit_State *J)
J->maxslot = numparams; J->maxslot = numparams;
} }
/* Record Lua vararg function setup. */
static void rec_func_vararg(jit_State *J)
{
GCproto *pt = J->pt;
BCReg s, fixargs, vframe = J->maxslot+1;
lua_assert((pt->flags & PROTO_IS_VARARG));
if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS)
lj_trace_err(J, LJ_TRERR_STACKOV);
J->base[vframe-1] = J->base[-1]; /* Copy function up. */
/* Copy fixarg slots up and set their original slots to nil. */
fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot;
for (s = 0; s < fixargs; s++) {
J->base[vframe+s] = J->base[s];
J->base[s] = TREF_NIL;
}
J->maxslot = fixargs;
J->framedepth++;
J->base += vframe;
J->baseslot += vframe;
}
/* Record entry to a Lua function. */ /* Record entry to a Lua function. */
static void rec_func_lua(jit_State *J) static void rec_func_lua(jit_State *J)
{ {
@ -2258,8 +2289,11 @@ void lj_record_ins(jit_State *J)
break; break;
case BC_FUNCV: case BC_FUNCV:
rec_func_vararg(J);
rec_func_lua(J);
break;
case BC_JFUNCV: case BC_JFUNCV:
lj_trace_err(J, LJ_TRERR_NYIVF); lua_assert(0); /* Cannot happen. No hotcall counting for varag funcs. */
break; break;
case BC_FUNCC: case BC_FUNCC:

View File

@ -87,15 +87,14 @@ static void snapshot_framelinks(jit_State *J, SnapEntry *map)
if (frame_islua(frame)) { if (frame_islua(frame)) {
map[f++] = SNAP_MKPC(frame_pc(frame)); map[f++] = SNAP_MKPC(frame_pc(frame));
frame = frame_prevl(frame); frame = frame_prevl(frame);
} else if (frame_ispcall(frame)) {
map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
frame = frame_prevd(frame);
} else if (frame_iscont(frame)) { } else if (frame_iscont(frame)) {
map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
map[f++] = SNAP_MKPC(frame_contpc(frame)); map[f++] = SNAP_MKPC(frame_contpc(frame));
frame = frame_prevd(frame); frame = frame_prevd(frame);
} else { } else {
lua_assert(0); lua_assert(!frame_isc(frame));
map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
frame = frame_prevd(frame);
} }
} }
lua_assert(f == (MSize)(1 + J->framedepth)); lua_assert(f == (MSize)(1 + J->framedepth));

View File

@ -23,7 +23,6 @@ TREDEF(BADTYPE, "bad argument type")
TREDEF(CJITOFF, "call to JIT-disabled function") TREDEF(CJITOFF, "call to JIT-disabled function")
TREDEF(CUNROLL, "call unroll limit reached") TREDEF(CUNROLL, "call unroll limit reached")
TREDEF(DOWNREC, "down-recursion, restarting") TREDEF(DOWNREC, "down-recursion, restarting")
TREDEF(NYIVF, "NYI: vararg function")
TREDEF(NYICF, "NYI: C function %p") TREDEF(NYICF, "NYI: C function %p")
TREDEF(NYIFF, "NYI: FastFunc %s") TREDEF(NYIFF, "NYI: FastFunc %s")
TREDEF(NYIFFU, "NYI: unsupported variant of FastFunc %s") TREDEF(NYIFFU, "NYI: unsupported variant of FastFunc %s")