Check for upvalue state transition in IR_UREFO.

Thanks to Peter Cawley. #1085
This commit is contained in:
Mike Pall 2023-11-05 16:34:46 +01:00
parent 0afa1676b2
commit 07b3cd3cf9
9 changed files with 150 additions and 67 deletions

View File

@ -969,25 +969,33 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_uref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
if (irref_isk(ir->op1)) {
int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
if (irref_isk(ir->op1) && !guarded) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_lsptr(as, ARMI_LDR, dest, v);
} else {
Reg uv = ra_scratch(as, RSET_GPR);
Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
if (ir->o == IR_UREFC) {
asm_guardcc(as, CC_NE);
if (guarded) {
asm_guardcc(as, ir->o == IR_UREFC ? CC_NE : CC_EQ);
emit_n(as, ARMI_CMP|ARMI_K12|1, RID_TMP);
emit_opk(as, ARMI_ADD, dest, uv,
(int32_t)offsetof(GCupval, tv), RSET_GPR);
emit_lso(as, ARMI_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
} else {
emit_lso(as, ARMI_LDR, dest, uv, (int32_t)offsetof(GCupval, v));
}
emit_lso(as, ARMI_LDR, uv, func,
if (ir->o == IR_UREFC)
emit_opk(as, ARMI_ADD, dest, dest,
(int32_t)offsetof(GCupval, tv), RSET_GPR);
else
emit_lso(as, ARMI_LDR, dest, dest, (int32_t)offsetof(GCupval, v));
if (guarded)
emit_lso(as, ARMI_LDRB, RID_TMP, dest,
(int32_t)offsetof(GCupval, closed));
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
int32_t k = (int32_t)gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
emit_loadi(as, dest, k);
} else {
emit_lso(as, ARMI_LDR, dest, ra_alloc1(as, ir->op1, RSET_GPR),
(int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
}
}
}
static void asm_fref(ASMState *as, IRIns *ir)

View File

@ -931,23 +931,31 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_uref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
if (irref_isk(ir->op1)) {
int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
if (irref_isk(ir->op1) && !guarded) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_lsptr(as, A64I_LDRx, dest, v);
} else {
if (ir->o == IR_UREFC) {
asm_guardcnb(as, A64I_CBZ, RID_TMP);
if (guarded)
asm_guardcnb(as, ir->o == IR_UREFC ? A64I_CBZ : A64I_CBNZ, RID_TMP);
if (ir->o == IR_UREFC)
emit_opk(as, A64I_ADDx, dest, dest,
(int32_t)offsetof(GCupval, tv), RSET_GPR);
else
emit_lso(as, A64I_LDRx, dest, dest, (int32_t)offsetof(GCupval, v));
if (guarded)
emit_lso(as, A64I_LDRB, RID_TMP, dest,
(int32_t)offsetof(GCupval, closed));
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
uint64_t k = gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
emit_loadu64(as, dest, k);
} else {
emit_lso(as, A64I_LDRx, dest, dest, (int32_t)offsetof(GCupval, v));
}
emit_lso(as, A64I_LDRx, dest, ra_alloc1(as, ir->op1, RSET_GPR),
(int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8));
}
}
}
static void asm_fref(ASMState *as, IRIns *ir)

View File

@ -1207,23 +1207,30 @@ nolo:
static void asm_uref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
if (irref_isk(ir->op1)) {
int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
if (irref_isk(ir->op1) && !guarded) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_lsptr(as, MIPSI_AL, dest, v, RSET_GPR);
} else {
Reg uv = ra_scratch(as, RSET_GPR);
Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
if (ir->o == IR_UREFC) {
asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
emit_tsi(as, MIPSI_AADDIU, dest, uv, (int32_t)offsetof(GCupval, tv));
emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
if (guarded)
asm_guard(as, ir->o == IR_UREFC ? MIPSI_BEQ : MIPSI_BNE, RID_TMP, RID_ZERO);
if (ir->o == IR_UREFC)
emit_tsi(as, MIPSI_AADDIU, dest, dest, (int32_t)offsetof(GCupval, tv));
else
emit_tsi(as, MIPSI_AL, dest, dest, (int32_t)offsetof(GCupval, v));
if (guarded)
emit_tsi(as, MIPSI_LBU, RID_TMP, dest, (int32_t)offsetof(GCupval, closed));
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]);
emit_loada(as, dest, o);
} else {
emit_tsi(as, MIPSI_AL, dest, uv, (int32_t)offsetof(GCupval, v));
}
emit_tsi(as, MIPSI_AL, uv, func, (int32_t)offsetof(GCfuncL, uvptr) +
emit_tsi(as, MIPSI_AL, dest, ra_alloc1(as, ir->op1, RSET_GPR),
(int32_t)offsetof(GCfuncL, uvptr) +
(int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
}
}
}
static void asm_fref(ASMState *as, IRIns *ir)

View File

@ -840,24 +840,31 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_uref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
if (irref_isk(ir->op1)) {
int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
if (irref_isk(ir->op1) && !guarded) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_lsptr(as, PPCI_LWZ, dest, v, RSET_GPR);
} else {
Reg uv = ra_scratch(as, RSET_GPR);
Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
if (ir->o == IR_UREFC) {
asm_guardcc(as, CC_NE);
if (guarded) {
asm_guardcc(as, ir->o == IR_UREFC ? CC_NE : CC_EQ);
emit_ai(as, PPCI_CMPWI, RID_TMP, 1);
emit_tai(as, PPCI_ADDI, dest, uv, (int32_t)offsetof(GCupval, tv));
emit_tai(as, PPCI_LBZ, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
} else {
emit_tai(as, PPCI_LWZ, dest, uv, (int32_t)offsetof(GCupval, v));
}
emit_tai(as, PPCI_LWZ, uv, func,
if (ir->o == IR_UREFC)
emit_tai(as, PPCI_ADDI, dest, dest, (int32_t)offsetof(GCupval, tv));
else
emit_tai(as, PPCI_LWZ, dest, dest, (int32_t)offsetof(GCupval, v));
if (guarded)
emit_tai(as, PPCI_LBZ, RID_TMP, dest, (int32_t)offsetof(GCupval, closed));
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
int32_t k = (int32_t)gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
emit_loadi(as, dest, k);
} else {
emit_tai(as, PPCI_LWZ, dest, ra_alloc1(as, ir->op1, RSET_GPR),
(int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
}
}
}
static void asm_fref(ASMState *as, IRIns *ir)

View File

@ -1373,25 +1373,32 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_uref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
if (irref_isk(ir->op1)) {
int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
if (irref_isk(ir->op1) && !guarded) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_rma(as, XO_MOV, dest|REX_GC64, v);
} else {
Reg uv = ra_scratch(as, RSET_GPR);
Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
if (ir->o == IR_UREFC) {
if (ir->o == IR_UREFC)
emit_rmro(as, XO_LEA, dest|REX_GC64, uv, offsetof(GCupval, tv));
asm_guardcc(as, CC_NE);
emit_i8(as, 1);
emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed));
} else {
else
emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v));
if (guarded) {
asm_guardcc(as, ir->o == IR_UREFC ? CC_E : CC_NE);
emit_i8(as, 0);
emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed));
}
emit_rmro(as, XO_MOV, uv|REX_GC64, func,
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]);
emit_loada(as, uv, o);
} else {
emit_rmro(as, XO_MOV, uv|REX_GC64, ra_alloc1(as, ir->op1, RSET_GPR),
(int32_t)offsetof(GCfuncL, uvptr) +
(int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
}
}
}
static void asm_fref(ASMState *as, IRIns *ir)

View File

@ -2134,8 +2134,26 @@ LJFOLDX(lj_opt_fwd_uload)
LJFOLD(ALEN any any)
LJFOLDX(lj_opt_fwd_alen)
/* Try to merge UREFO/UREFC into referenced instruction. */
static TRef merge_uref(jit_State *J, IRRef ref, IRIns* ir)
{
if (ir->o == IR_UREFO && irt_isguard(ir->t)) {
/* Might be pointing to some other coroutine's stack.
** And GC might shrink said stack, thereby repointing the upvalue.
** GC might even collect said coroutine, thereby closing the upvalue.
*/
if (gcstep_barrier(J, ref))
return EMITFOLD; /* So cannot merge. */
/* Current fins wants a check, but ir doesn't have one. */
if ((irt_t(fins->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC) &&
irt_type(ir->t) == IRT_IGC)
ir->t.irt += IRT_PGC-IRT_IGC; /* So install a check. */
}
return ref; /* Not a TRef, but the caller doesn't care. */
}
/* Upvalue refs are really loads, but there are no corresponding stores.
** So CSE is ok for them, except for UREFO across a GC step (see below).
** So CSE is ok for them, except for guarded UREFO across a GC step.
** If the referenced function is const, its upvalue addresses are const, too.
** This can be used to improve CSE by looking for the same address,
** even if the upvalues originate from a different function.
@ -2153,9 +2171,7 @@ LJFOLDF(cse_uref)
if (irref_isk(ir->op1)) {
GCfunc *fn2 = ir_kfunc(IR(ir->op1));
if (gco2uv(gcref(fn2->l.uvptr[(ir->op2 >> 8)])) == uv) {
if (fins->o == IR_UREFO && gcstep_barrier(J, ref))
break;
return ref;
return merge_uref(J, ref, ir);
}
}
ref = ir->prev;
@ -2164,6 +2180,24 @@ LJFOLDF(cse_uref)
return EMITFOLD;
}
/* Custom CSE for UREFO. */
LJFOLD(UREFO any any)
LJFOLDF(cse_urefo)
{
if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
IRRef ref = J->chain[IR_UREFO];
IRRef lim = fins->op1;
IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
while (ref > lim) {
IRIns *ir = IR(ref);
if (ir->op12 == op12)
return merge_uref(J, ref, ir);
ref = ir->prev;
}
}
return EMITFOLD;
}
LJFOLD(HREFK any any)
LJFOLDX(lj_opt_fwd_hrefk)
@ -2384,14 +2418,9 @@ LJFOLDF(fold_base)
/* Write barriers are amenable to CSE, but not across any incremental
** GC steps.
**
** The same logic applies to open upvalue references, because a stack
** may be resized during a GC step (not the current stack, but maybe that
** of a coroutine).
*/
LJFOLD(TBAR any)
LJFOLD(OBAR any any)
LJFOLD(UREFO any any)
LJFOLDF(barrier_tab)
{
TRef tr = lj_opt_cse(J);

View File

@ -464,18 +464,23 @@ doemit:
*/
static AliasRet aa_uref(IRIns *refa, IRIns *refb)
{
if (refa->o != refb->o)
return ALIAS_NO; /* Different UREFx type. */
if (refa->op1 == refb->op1) { /* Same function. */
if (refa->op2 == refb->op2)
return ALIAS_MUST; /* Same function, same upvalue idx. */
else
return ALIAS_NO; /* Same function, different upvalue idx. */
} else { /* Different functions, check disambiguation hash values. */
if (((refa->op2 ^ refb->op2) & 0xff))
if (((refa->op2 ^ refb->op2) & 0xff)) {
return ALIAS_NO; /* Upvalues with different hash values cannot alias. */
else
return ALIAS_MAY; /* No conclusion can be drawn for same hash value. */
} else if (refa->o != refb->o) {
/* Different UREFx type, but need to confirm the UREFO really is open. */
if (irt_type(refa->t) == IRT_IGC) refa->t.irt += IRT_PGC-IRT_IGC;
else if (irt_type(refb->t) == IRT_IGC) refb->t.irt += IRT_PGC-IRT_IGC;
return ALIAS_NO;
} else {
/* No conclusion can be drawn for same hash value and same UREFx type. */
return ALIAS_MAY;
}
}
}

View File

@ -1772,12 +1772,12 @@ noconstify:
/* Note: this effectively limits LJ_MAX_UPVAL to 127. */
uv = (uv << 8) | (hashrot(uvp->dhash, uvp->dhash + HASH_BIAS) & 0xff);
if (!uvp->closed) {
uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_PGC), fn, uv));
/* In current stack? */
if (uvval(uvp) >= tvref(J->L->stack) &&
uvval(uvp) < tvref(J->L->maxstack)) {
int32_t slot = (int32_t)(uvval(uvp) - (J->L->base - J->baseslot));
if (slot >= 0) { /* Aliases an SSA slot? */
uref = tref_ref(emitir(IRT(IR_UREFO, IRT_PGC), fn, uv));
emitir(IRTG(IR_EQ, IRT_PGC),
REF_BASE,
emitir(IRT(IR_ADD, IRT_PGC), uref,
@ -1792,12 +1792,21 @@ noconstify:
}
}
}
/* IR_UREFO+IRT_IGC is not checked for open-ness at runtime.
** Always marked as a guard, since it might get promoted to IRT_PGC later.
*/
uref = emitir(IRTG(IR_UREFO, tref_isgcv(val) ? IRT_PGC : IRT_IGC), fn, uv);
uref = tref_ref(uref);
emitir(IRTG(IR_UGT, IRT_PGC),
emitir(IRT(IR_SUB, IRT_PGC), uref, REF_BASE),
lj_ir_kintpgc(J, (J->baseslot + J->maxslot) * 8));
} else {
/* If fn is constant, then so is the GCupval*, and the upvalue cannot
** transition back to open, so no guard is required in this case.
*/
IRType t = (tref_isk(fn) ? 0 : IRT_GUARD) | IRT_PGC;
uref = tref_ref(emitir(IRT(IR_UREFC, t), fn, uv));
needbarrier = 1;
uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_PGC), fn, uv));
}
if (val == 0) { /* Upvalue load */
IRType t = itype2irt(uvval(uvp));

View File

@ -346,8 +346,11 @@ void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L)
lj_assertG(L != mainthread(g), "free of main thread");
if (obj2gco(L) == gcref(g->cur_L))
setgcrefnull(g->cur_L);
if (gcref(L->openupval) != NULL) {
lj_func_closeuv(L, tvref(L->stack));
lj_trace_abort(g); /* For aa_uref soundness. */
lj_assertG(gcref(L->openupval) == NULL, "stale open upvalues");
}
lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue);
lj_mem_freet(g, L);
}