From c2c08ba9b33ed89feb190aa3484bf2360db779a0 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 12 Sep 2010 01:37:02 +0200 Subject: [PATCH] Record calls to vararg functions. This loop is now roughly 1000x faster than the Lua interpreter: local function f(a,b,...) end; for i=1,2e8 do f(1,2,i) end Yet another silly microbenchmark -- I know. --- src/lj_dispatch.c | 26 +++++++++++++++++--------- src/lj_record.c | 38 ++++++++++++++++++++++++++++++++++++-- src/lj_snap.c | 7 +++---- src/lj_traceerr.h | 1 - 4 files changed, 56 insertions(+), 16 deletions(-) diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c index 491c5aa3..3b1be7e2 100644 --- a/src/lj_dispatch.c +++ b/src/lj_dispatch.c @@ -384,17 +384,18 @@ void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc) callhook(L, LUA_HOOKRET, -1); } -/* Initialize call. Ensure stack space and clear missing parameters. */ -static void call_init(lua_State *L, GCfunc *fn) +/* Initialize call. Ensure stack space and return # of missing parameters. */ +static int call_init(lua_State *L, GCfunc *fn) { if (isluafunc(fn)) { - MSize numparams = funcproto(fn)->numparams; - TValue *o; - lj_state_checkstack(L, numparams); - for (o = L->base + numparams; L->top < o; L->top++) - setnilV(L->top); /* Clear missing parameters. */ + int numparams = funcproto(fn)->numparams; + int gotparams = (int)(L->top - L->base); + lj_state_checkstack(L, (MSize)numparams); + numparams -= gotparams; + return numparams >= 0 ? numparams : 0; } else { lj_state_checkstack(L, LUA_MINSTACK); + return 0; } } @@ -407,7 +408,7 @@ ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns *pc) #if LJ_HASJIT jit_State *J = G2J(g); #endif - call_init(L, fn); + int missing = call_init(L, fn); #if LJ_HASJIT J->L = L; if ((uintptr_t)pc & 1) { /* Marker for hot call. */ @@ -420,8 +421,15 @@ ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns *pc) lj_trace_ins(J, pc-1); /* The interpreter bytecode PC is offset by 1. */ } #endif - if ((g->hookmask & LUA_MASKCALL)) + if ((g->hookmask & LUA_MASKCALL)) { + int i; + for (i = 0; i < missing; i++) /* Add missing parameters. */ + setnilV(L->top++); callhook(L, LUA_HOOKCALL, -1); + /* Preserve modifications of missing parameters by lua_setlocal(). */ + while (missing-- > 0 && tvisnil(L->top - 1)) + L->top--; + } #if LJ_HASJIT out: #endif diff --git a/src/lj_record.c b/src/lj_record.c index 2e411632..0dcc1ac7 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -570,6 +570,17 @@ static void rec_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) J->base[--rbase] = TREF_TRUE; /* Prepend true to results. */ frame = frame_prevd(frame); } + if (frame_isvarg(frame)) { + BCReg cbase = (BCReg)frame_delta(frame); + lua_assert(J->framedepth != 1); + if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */ + lj_trace_err(J, LJ_TRERR_NYIRETL); + lua_assert(J->baseslot > 1); + rbase += cbase; + J->baseslot -= (BCReg)cbase; + J->base -= cbase; + frame = frame_prevd(frame); + } if (frame_islua(frame)) { /* Return to Lua frame. */ BCIns callins = *(frame_pc(frame)-1); ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults; @@ -1840,7 +1851,6 @@ static void rec_func_setup(jit_State *J) BCReg s, numparams = pt->numparams; if ((pt->flags & PROTO_NO_JIT)) lj_trace_err(J, LJ_TRERR_CJITOFF); - lua_assert(!(pt->flags & PROTO_IS_VARARG)); if (J->baseslot + pt->framesize >= LJ_MAX_JSLOTS) lj_trace_err(J, LJ_TRERR_STACKOV); /* Fill up missing parameters with nil. */ @@ -1850,6 +1860,27 @@ static void rec_func_setup(jit_State *J) J->maxslot = numparams; } +/* Record Lua vararg function setup. */ +static void rec_func_vararg(jit_State *J) +{ + GCproto *pt = J->pt; + BCReg s, fixargs, vframe = J->maxslot+1; + lua_assert((pt->flags & PROTO_IS_VARARG)); + if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS) + lj_trace_err(J, LJ_TRERR_STACKOV); + J->base[vframe-1] = J->base[-1]; /* Copy function up. */ + /* Copy fixarg slots up and set their original slots to nil. */ + fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot; + for (s = 0; s < fixargs; s++) { + J->base[vframe+s] = J->base[s]; + J->base[s] = TREF_NIL; + } + J->maxslot = fixargs; + J->framedepth++; + J->base += vframe; + J->baseslot += vframe; +} + /* Record entry to a Lua function. */ static void rec_func_lua(jit_State *J) { @@ -2258,8 +2289,11 @@ void lj_record_ins(jit_State *J) break; case BC_FUNCV: + rec_func_vararg(J); + rec_func_lua(J); + break; case BC_JFUNCV: - lj_trace_err(J, LJ_TRERR_NYIVF); + lua_assert(0); /* Cannot happen. No hotcall counting for varag funcs. */ break; case BC_FUNCC: diff --git a/src/lj_snap.c b/src/lj_snap.c index 7c26bfb9..64db5288 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c @@ -87,15 +87,14 @@ static void snapshot_framelinks(jit_State *J, SnapEntry *map) if (frame_islua(frame)) { map[f++] = SNAP_MKPC(frame_pc(frame)); frame = frame_prevl(frame); - } else if (frame_ispcall(frame)) { - map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); - frame = frame_prevd(frame); } else if (frame_iscont(frame)) { map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); map[f++] = SNAP_MKPC(frame_contpc(frame)); frame = frame_prevd(frame); } else { - lua_assert(0); + lua_assert(!frame_isc(frame)); + map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); + frame = frame_prevd(frame); } } lua_assert(f == (MSize)(1 + J->framedepth)); diff --git a/src/lj_traceerr.h b/src/lj_traceerr.h index 1b0df055..9bfdadc6 100644 --- a/src/lj_traceerr.h +++ b/src/lj_traceerr.h @@ -23,7 +23,6 @@ TREDEF(BADTYPE, "bad argument type") TREDEF(CJITOFF, "call to JIT-disabled function") TREDEF(CUNROLL, "call unroll limit reached") TREDEF(DOWNREC, "down-recursion, restarting") -TREDEF(NYIVF, "NYI: vararg function") TREDEF(NYICF, "NYI: C function %p") TREDEF(NYIFF, "NYI: FastFunc %s") TREDEF(NYIFFU, "NYI: unsupported variant of FastFunc %s")