mirror of
https://github.com/LuaJIT/LuaJIT.git
synced 2025-02-07 23:24:09 +00:00
Replace on-trace GC frame syncing with interpreter exit.
Need to sync GC objects to stack only during atomic GC phase. Need to setup a proper frame structure only for calling finalizers. Force an exit to the interpreter and let it handle the uncommon cases. Finally solves the "NYI: gcstep sync with frames" issue.
This commit is contained in:
parent
ff82df797a
commit
932cda0fe3
1208
src/buildvm_x64.h
1208
src/buildvm_x64.h
File diff suppressed because it is too large
Load Diff
1146
src/buildvm_x64win.h
1146
src/buildvm_x64win.h
File diff suppressed because it is too large
Load Diff
@ -2613,6 +2613,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
|
||||
| mov L:RB, [DISPATCH+DISPATCH_GL(jit_L)]
|
||||
| mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
|
||||
| mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
|
||||
| mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0
|
||||
| mov L:RB->base, BASE
|
||||
|.if X64WIN
|
||||
| lea CARG2, [rsp+4*8]
|
||||
|
1281
src/buildvm_x86.h
1281
src/buildvm_x86.h
File diff suppressed because it is too large
Load Diff
66
src/lj_asm.c
66
src/lj_asm.c
@ -2752,67 +2752,32 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
|
||||
|
||||
/* -- GC handling --------------------------------------------------------- */
|
||||
|
||||
/* Sync all live GC values to Lua stack slots. */
|
||||
static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base)
|
||||
{
|
||||
/* Some care must be taken when allocating registers here, since this is
|
||||
** not part of the fast path. All scratch registers are evicted in the
|
||||
** fast path, so it's easiest to force allocation from scratch registers
|
||||
** only. This avoids register allocation state unification.
|
||||
*/
|
||||
RegSet allow = rset_exclude(RSET_SCRATCH & RSET_GPR, base);
|
||||
SnapEntry *map = &as->T->snapmap[snap->mapofs];
|
||||
MSize n, nent = snap->nent;
|
||||
for (n = 0; n < nent; n++) {
|
||||
SnapEntry sn = map[n];
|
||||
IRRef ref = snap_ref(sn);
|
||||
/* NYI: sync the frame, bump base, set topslot, clear new slots. */
|
||||
if ((sn & (SNAP_CONT|SNAP_FRAME)))
|
||||
lj_trace_err(as->J, LJ_TRERR_NYIGCF);
|
||||
if (!irref_isk(ref)) {
|
||||
IRIns *ir = IR(ref);
|
||||
if (irt_isgcv(ir->t)) {
|
||||
int32_t ofs = 8*(int32_t)(snap_slot(sn)-1);
|
||||
Reg src = ra_alloc1(as, ref, allow);
|
||||
emit_movtomro(as, src, base, ofs);
|
||||
emit_movmroi(as, base, ofs+4, irt_toitype(ir->t));
|
||||
checkmclim(as);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Check GC threshold and do one or more GC steps. */
|
||||
static void asm_gc_check(ASMState *as, SnapShot *snap)
|
||||
static void asm_gc_check(ASMState *as)
|
||||
{
|
||||
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
|
||||
IRRef args[2];
|
||||
MCLabel l_end;
|
||||
Reg base, lstate, tmp;
|
||||
RegSet drop = RSET_SCRATCH;
|
||||
if (ra_hasreg(IR(REF_BASE)->r)) /* Stack may be reallocated by the GC. */
|
||||
drop |= RID2RSET(IR(REF_BASE)->r); /* Need to evict BASE, too. */
|
||||
ra_evictset(as, drop);
|
||||
ra_evictset(as, RSET_SCRATCH);
|
||||
l_end = emit_label(as);
|
||||
/* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
|
||||
asm_guardcc(as, CC_NE); /* Assumes asm_snap_prep() already done. */
|
||||
emit_rr(as, XO_TEST, RID_RET, RID_RET);
|
||||
args[0] = ASMREF_L;
|
||||
args[1] = ASMREF_TMP1;
|
||||
asm_gencall(as, ci, args);
|
||||
tmp = ra_releasetmp(as, ASMREF_TMP1);
|
||||
emit_loadi(as, tmp, (int32_t)as->gcsteps);
|
||||
/* We don't know spadj yet, so get the C frame from L->cframe. */
|
||||
emit_movmroi(as, tmp, CFRAME_OFS_PC,
|
||||
(int32_t)as->T->snapmap[snap->mapofs+snap->nent]);
|
||||
emit_gri(as, XG_ARITHi(XOg_AND), tmp|REX_64, CFRAME_RAWMASK);
|
||||
lstate = IR(ASMREF_L)->r;
|
||||
emit_rmro(as, XO_MOV, tmp|REX_64, lstate, offsetof(lua_State, cframe));
|
||||
/* It's ok if lstate is already in a non-scratch reg. But all allocations
|
||||
** in the non-fast path must use a scratch reg. See comment above.
|
||||
** in the non-fast path must use a scratch reg (avoids unification).
|
||||
*/
|
||||
lstate = IR(ASMREF_L)->r;
|
||||
base = ra_alloc1(as, REF_BASE, rset_exclude(RSET_SCRATCH & RSET_GPR, lstate));
|
||||
emit_movtomro(as, base|REX_64, lstate, offsetof(lua_State, base));
|
||||
asm_gc_sync(as, snap, base);
|
||||
/* BASE/L get restored anyway, better do it inside the slow path. */
|
||||
if (as->parent || as->curins == as->loopref) ra_restore(as, REF_BASE);
|
||||
if (rset_test(RSET_SCRATCH, base) && (as->parent || as->snapno != 0))
|
||||
ra_restore(as, REF_BASE);
|
||||
if (rset_test(RSET_SCRATCH, lstate) && ra_hasreg(IR(ASMREF_L)->r))
|
||||
ra_restore(as, ASMREF_L);
|
||||
/* Jump around GC step if GC total < GC threshold. */
|
||||
@ -3034,7 +2999,7 @@ static void asm_loop(ASMState *as)
|
||||
/* LOOP is a guard, so the snapno is up to date. */
|
||||
as->loopsnapno = as->snapno;
|
||||
if (as->gcsteps)
|
||||
asm_gc_check(as, &as->T->snap[as->loopsnapno]);
|
||||
asm_gc_check(as);
|
||||
/* LOOP marks the transition from the variant to the invariant part. */
|
||||
as->testmcp = as->invmcp = NULL;
|
||||
as->sectref = 0;
|
||||
@ -3126,7 +3091,7 @@ static void asm_head_side(ASMState *as)
|
||||
allow = asm_head_side_base(as, pbase, allow);
|
||||
|
||||
/* Scan all parent SLOADs and collect register dependencies. */
|
||||
for (i = as->curins; i > REF_BASE; i--) {
|
||||
for (i = as->stopins; i > REF_BASE; i--) {
|
||||
IRIns *ir = IR(i);
|
||||
RegSP rs;
|
||||
lua_assert(ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT));
|
||||
@ -3161,7 +3126,7 @@ static void asm_head_side(ASMState *as)
|
||||
|
||||
/* Reload spilled target registers. */
|
||||
if (pass2) {
|
||||
for (i = as->curins; i > REF_BASE; i--) {
|
||||
for (i = as->stopins; i > REF_BASE; i--) {
|
||||
IRIns *ir = IR(i);
|
||||
if (irt_ismarked(ir->t)) {
|
||||
RegSet mask;
|
||||
@ -3686,8 +3651,11 @@ void lj_asm_trace(jit_State *J, Trace *T)
|
||||
|
||||
RA_DBG_REF();
|
||||
checkmclim(as);
|
||||
if (as->gcsteps)
|
||||
asm_gc_check(as, &as->T->snap[0]);
|
||||
if (as->gcsteps) {
|
||||
as->curins = as->T->snap[0].ref;
|
||||
asm_snap_prep(as); /* The GC check is a guard. */
|
||||
asm_gc_check(as);
|
||||
}
|
||||
ra_evictk(as);
|
||||
if (as->parent)
|
||||
asm_head_side(as);
|
||||
|
57
src/lj_gc.c
57
src/lj_gc.c
@ -442,6 +442,7 @@ static void gc_finalize(lua_State *L)
|
||||
GCobj *o = gcnext(gcref(g->gc.mmudata));
|
||||
GCudata *ud = gco2ud(o);
|
||||
cTValue *mo;
|
||||
lua_assert(gcref(g->jit_L) == NULL); /* Must not be called on trace. */
|
||||
/* Unchain from list of userdata to be finalized. */
|
||||
if (o == gcref(g->gc.mmudata))
|
||||
setgcrefnull(g->gc.mmudata);
|
||||
@ -457,16 +458,8 @@ static void gc_finalize(lua_State *L)
|
||||
/* Save and restore lots of state around the __gc callback. */
|
||||
uint8_t oldh = hook_save(g);
|
||||
MSize oldt = g->gc.threshold;
|
||||
GCobj *oldjl = gcref(g->jit_L);
|
||||
MSize oldjs = 0;
|
||||
ptrdiff_t oldjb = 0;
|
||||
int errcode;
|
||||
TValue *top;
|
||||
if (oldjl) {
|
||||
oldjs = gco2th(oldjl)->stacksize;
|
||||
oldjb = savestack(gco2th(oldjl), mref(g->jit_base, TValue ));
|
||||
setgcrefnull(g->jit_L);
|
||||
}
|
||||
lj_trace_abort(g);
|
||||
top = L->top;
|
||||
L->top = top+2;
|
||||
@ -477,12 +470,6 @@ static void gc_finalize(lua_State *L)
|
||||
errcode = lj_vm_pcall(L, top+1, 1+0, -1); /* Stack: |mo|ud| -> | */
|
||||
hook_restore(g, oldh);
|
||||
g->gc.threshold = oldt; /* Restore GC threshold. */
|
||||
if (oldjl) {
|
||||
if (gco2th(oldjl)->stacksize < oldjs)
|
||||
lj_state_growstack(gco2th(oldjl), oldjs - gco2th(oldjl)->stacksize);
|
||||
setgcref(g->jit_L, oldjl);
|
||||
setmref(g->jit_base, restorestack(gco2th(oldjl), oldjb));
|
||||
}
|
||||
if (errcode)
|
||||
lj_err_throw(L, errcode); /* Propagate errors. */
|
||||
}
|
||||
@ -514,7 +501,6 @@ static void atomic(global_State *g, lua_State *L)
|
||||
{
|
||||
size_t udsize;
|
||||
|
||||
g->gc.state = GCSatomic;
|
||||
gc_mark_uv(g); /* Need to remark open upvalues (the thread may be dead). */
|
||||
gc_propagate_gray(g); /* Propagate any left-overs. */
|
||||
|
||||
@ -539,9 +525,7 @@ static void atomic(global_State *g, lua_State *L)
|
||||
|
||||
/* Prepare for sweep phase. */
|
||||
g->gc.currentwhite = cast_byte(otherwhite(g)); /* Flip current white. */
|
||||
g->gc.sweepstr = 0;
|
||||
setmref(g->gc.sweep, &g->gc.root);
|
||||
g->gc.state = GCSsweepstring;
|
||||
g->gc.estimate = g->gc.total - (MSize)udsize; /* Initial estimate. */
|
||||
}
|
||||
|
||||
@ -556,7 +540,14 @@ static size_t gc_onestep(lua_State *L)
|
||||
case GCSpropagate:
|
||||
if (gcref(g->gc.gray) != NULL)
|
||||
return propagatemark(g); /* Propagate one gray object. */
|
||||
atomic(g, L); /* End of mark phase. */
|
||||
g->gc.state = GCSatomic; /* End of mark phase. */
|
||||
return 0;
|
||||
case GCSatomic:
|
||||
if (gcref(g->jit_L)) /* Don't run atomic phase on trace. */
|
||||
return LJ_MAX_MEM;
|
||||
atomic(g, L);
|
||||
g->gc.state = GCSsweepstring; /* Start of sweep phase. */
|
||||
g->gc.sweepstr = 0;
|
||||
return 0;
|
||||
case GCSsweepstring: {
|
||||
MSize old = g->gc.total;
|
||||
@ -572,7 +563,12 @@ static size_t gc_onestep(lua_State *L)
|
||||
setmref(g->gc.sweep, gc_sweep(g, mref(g->gc.sweep, GCRef), GCSWEEPMAX));
|
||||
if (gcref(*mref(g->gc.sweep, GCRef)) == NULL) {
|
||||
gc_shrink(g, L);
|
||||
g->gc.state = GCSfinalize; /* End of sweep phase. */
|
||||
if (gcref(g->gc.mmudata)) { /* Need any finalizations? */
|
||||
g->gc.state = GCSfinalize;
|
||||
} else { /* Otherwise skip this phase to help the JIT. */
|
||||
g->gc.state = GCSpause; /* End of GC cycle. */
|
||||
g->gc.debt = 0;
|
||||
}
|
||||
}
|
||||
lua_assert(old >= g->gc.total);
|
||||
g->gc.estimate -= old - g->gc.total;
|
||||
@ -580,6 +576,8 @@ static size_t gc_onestep(lua_State *L)
|
||||
}
|
||||
case GCSfinalize:
|
||||
if (gcref(g->gc.mmudata) != NULL) {
|
||||
if (gcref(g->jit_L)) /* Don't call finalizers on trace. */
|
||||
return LJ_MAX_MEM;
|
||||
gc_finalize(L); /* Finalize one userdata object. */
|
||||
if (g->gc.estimate > GCFINALIZECOST)
|
||||
g->gc.estimate -= GCFINALIZECOST;
|
||||
@ -633,11 +631,13 @@ void LJ_FASTCALL lj_gc_step_fixtop(lua_State *L)
|
||||
|
||||
#if LJ_HASJIT
|
||||
/* Perform multiple GC steps. Called from JIT-compiled code. */
|
||||
void LJ_FASTCALL lj_gc_step_jit(lua_State *L, MSize steps)
|
||||
int LJ_FASTCALL lj_gc_step_jit(lua_State *L, MSize steps)
|
||||
{
|
||||
L->top = curr_topL(L);
|
||||
while (steps-- > 0 && lj_gc_step(L) == 0)
|
||||
;
|
||||
/* Return 1 to force a trace exit. */
|
||||
return (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -647,23 +647,20 @@ void lj_gc_fullgc(lua_State *L)
|
||||
global_State *g = G(L);
|
||||
int32_t ostate = g->vmstate;
|
||||
setvmstate(g, GC);
|
||||
if (g->gc.state <= GCSpropagate) { /* Caught somewhere in the middle. */
|
||||
g->gc.sweepstr = 0;
|
||||
if (g->gc.state <= GCSatomic) { /* Caught somewhere in the middle. */
|
||||
setmref(g->gc.sweep, &g->gc.root); /* Sweep everything (preserving it). */
|
||||
setgcrefnull(g->gc.gray); /* Reset lists from partial propagation. */
|
||||
setgcrefnull(g->gc.grayagain);
|
||||
setgcrefnull(g->gc.weak);
|
||||
g->gc.state = GCSsweepstring; /* Fast forward to the sweep phase. */
|
||||
g->gc.sweepstr = 0;
|
||||
}
|
||||
lua_assert(g->gc.state != GCSpause && g->gc.state != GCSpropagate);
|
||||
while (g->gc.state != GCSfinalize) { /* Finish sweep. */
|
||||
lua_assert(g->gc.state == GCSsweepstring || g->gc.state == GCSsweep);
|
||||
gc_onestep(L);
|
||||
}
|
||||
while (g->gc.state == GCSsweepstring || g->gc.state == GCSsweep)
|
||||
gc_onestep(L); /* Finish sweep. */
|
||||
lua_assert(g->gc.state == GCSfinalize || g->gc.state == GCSpause);
|
||||
/* Now perform a full GC. */
|
||||
gc_mark_start(g);
|
||||
while (g->gc.state != GCSpause)
|
||||
gc_onestep(L);
|
||||
g->gc.state = GCSpause;
|
||||
do { gc_onestep(L); } while (g->gc.state != GCSpause);
|
||||
g->gc.threshold = (g->gc.estimate/100) * g->gc.pause;
|
||||
g->vmstate = ostate;
|
||||
}
|
||||
|
@ -47,7 +47,7 @@ LJ_FUNC void lj_gc_freeall(global_State *g);
|
||||
LJ_FUNCA int LJ_FASTCALL lj_gc_step(lua_State *L);
|
||||
LJ_FUNCA void LJ_FASTCALL lj_gc_step_fixtop(lua_State *L);
|
||||
#if LJ_HASJIT
|
||||
LJ_FUNC void LJ_FASTCALL lj_gc_step_jit(lua_State *L, MSize steps);
|
||||
LJ_FUNC int LJ_FASTCALL lj_gc_step_jit(lua_State *L, MSize steps);
|
||||
#endif
|
||||
LJ_FUNC void lj_gc_fullgc(lua_State *L);
|
||||
|
||||
|
@ -1228,7 +1228,7 @@ static void fs_init(LexState *ls, FuncState *fs)
|
||||
fs->flags = 0;
|
||||
fs->framesize = 2; /* Minimum frame size. */
|
||||
fs->kt = lj_tab_new(L, 0, 0);
|
||||
/* Anchor table of constants and prototype (to avoid being collected). */
|
||||
/* Anchor table of constants in stack to avoid being collected. */
|
||||
settabV(L, L->top, fs->kt);
|
||||
incr_top(L);
|
||||
}
|
||||
|
@ -1885,13 +1885,22 @@ static TRef rec_tnew(jit_State *J, uint32_t ah)
|
||||
|
||||
/* -- Record bytecode ops ------------------------------------------------- */
|
||||
|
||||
/* Optimize state after comparison. */
|
||||
static void optstate_comp(jit_State *J, int cond)
|
||||
/* Prepare for comparison. */
|
||||
static void rec_comp_prep(jit_State *J)
|
||||
{
|
||||
/* Prevent merging with snapshot #0 (GC exit) since we fixup the PC. */
|
||||
if (J->cur.nsnap == 1 && J->cur.snap[0].ref == J->cur.nins)
|
||||
emitir_raw(IRT(IR_NOP, IRT_NIL), 0, 0);
|
||||
lj_snap_add(J);
|
||||
}
|
||||
|
||||
/* Fixup comparison. */
|
||||
static void rec_comp_fixup(jit_State *J, int cond)
|
||||
{
|
||||
BCIns jmpins = J->pc[1];
|
||||
const BCIns *npc = J->pc + 2 + (cond ? bc_j(jmpins) : 0);
|
||||
SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
|
||||
/* Avoid re-recording the comparison in side traces. */
|
||||
/* Set PC to opposite target to avoid re-recording the comp. in side trace. */
|
||||
J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc);
|
||||
J->needsnap = 1;
|
||||
/* Shrink last snapshot if possible. */
|
||||
@ -1987,7 +1996,7 @@ void lj_record_ins(jit_State *J)
|
||||
break; /* Interpreter will throw for two different types. */
|
||||
}
|
||||
}
|
||||
lj_snap_add(J);
|
||||
rec_comp_prep(J);
|
||||
irop = (int)op - (int)BC_ISLT + (int)IR_LT;
|
||||
if (ta == IRT_NUM) {
|
||||
if ((irop & 1)) irop ^= 4; /* ISGE/ISGT are unordered. */
|
||||
@ -2004,7 +2013,7 @@ void lj_record_ins(jit_State *J)
|
||||
break;
|
||||
}
|
||||
emitir(IRTG(irop, ta), ra, rc);
|
||||
optstate_comp(J, ((int)op ^ irop) & 1);
|
||||
rec_comp_fixup(J, ((int)op ^ irop) & 1);
|
||||
}
|
||||
break;
|
||||
|
||||
@ -2015,14 +2024,14 @@ void lj_record_ins(jit_State *J)
|
||||
/* Emit nothing for two non-table, non-udata consts. */
|
||||
if (!(tref_isk2(ra, rc) && !(tref_istab(ra) || tref_isudata(ra)))) {
|
||||
int diff;
|
||||
lj_snap_add(J);
|
||||
rec_comp_prep(J);
|
||||
diff = rec_objcmp(J, ra, rc, rav, rcv);
|
||||
if (diff == 1 && (tref_istab(ra) || tref_isudata(ra))) {
|
||||
/* Only check __eq if different, but the same type (table or udata). */
|
||||
rec_mm_equal(J, &ix, (int)op);
|
||||
break;
|
||||
}
|
||||
optstate_comp(J, ((int)op & 1) == !diff);
|
||||
rec_comp_fixup(J, ((int)op & 1) == !diff);
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -692,8 +692,12 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
|
||||
);
|
||||
|
||||
pc = exd.pc;
|
||||
trace_hotside(J, pc);
|
||||
cf = cframe_raw(L->cframe);
|
||||
setcframe_pc(cf, pc);
|
||||
if (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize)
|
||||
lj_gc_step(L); /* Exited because of GC: drive GC forward. */
|
||||
else
|
||||
trace_hotside(J, pc);
|
||||
if (bc_op(*pc) == BC_JLOOP) {
|
||||
BCIns *retpc = &J->trace[bc_d(*pc)]->startins;
|
||||
if (bc_isret(bc_op(*retpc))) {
|
||||
@ -703,10 +707,10 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
|
||||
*J->patchpc = *retpc;
|
||||
} else {
|
||||
pc = retpc;
|
||||
setcframe_pc(cf, pc);
|
||||
}
|
||||
}
|
||||
}
|
||||
setcframe_pc(cf, pc);
|
||||
/* Return MULTRES or 0. */
|
||||
switch (bc_op(*pc)) {
|
||||
case BC_CALLM: case BC_CALLMT:
|
||||
|
@ -49,7 +49,6 @@ TREDEF(BADRA, "inconsistent register allocation")
|
||||
TREDEF(NYIIR, "NYI: cannot assemble IR instruction %d")
|
||||
TREDEF(NYIPHI, "NYI: PHI shuffling too complex")
|
||||
TREDEF(NYICOAL, "NYI: register coalescing too complex")
|
||||
TREDEF(NYIGCF, "NYI: gcstep sync with frames")
|
||||
|
||||
#undef TREDEF
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user