diff --git a/src/lj_asm.c b/src/lj_asm.c index df94933a..a9b87821 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -1357,7 +1357,8 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) /* Generic fusion is only ok for 32 bit operand (but see asm_comp). ** Fusing unaligned memory operands is ok on x86 (except for SIMD types). */ - if (irt_isint(ir->t) || irt_isaddr(ir->t)) { + if ((irt_isint(ir->t) || irt_isaddr(ir->t)) && + noconflict(as, ref, IR_XSTORE)) { asm_fusexref(as, IR(ir->op1), xallow); return RID_MRM; } @@ -1978,7 +1979,7 @@ static void asm_fxload(ASMState *as, IRIns *ir) emit_mrm(as, xo, dest, RID_MRM); } -static void asm_fstore(ASMState *as, IRIns *ir) +static void asm_fxstore(ASMState *as, IRIns *ir) { RegSet allow = RSET_GPR; Reg src = RID_NONE; @@ -1991,7 +1992,11 @@ static void asm_fstore(ASMState *as, IRIns *ir) src = ra_alloc1(as, ir->op2, allow8); rset_clear(allow, src); } - asm_fusefref(as, IR(ir->op1), allow); + if (ir->o == IR_FSTORE) + asm_fusefref(as, IR(ir->op1), allow); + else + asm_fusexref(as, IR(ir->op1), allow); + /* ir->op2 is ignored -- unaligned stores are ok on x86. */ if (ra_hasreg(src)) { x86Op xo; switch (irt_type(ir->t)) { @@ -3467,7 +3472,7 @@ static void asm_ir(ASMState *as, IRIns *ir) case IR_SLOAD: asm_sload(as, ir); break; case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; - case IR_FSTORE: asm_fstore(as, ir); break; + case IR_FSTORE: case IR_XSTORE: asm_fxstore(as, ir); break; /* Allocations. */ case IR_SNEW: asm_snew(as, ir); break; diff --git a/src/lj_ir.h b/src/lj_ir.h index 232ff939..3371a8a4 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h @@ -104,6 +104,7 @@ _(HSTORE, S , ref, ref) \ _(USTORE, S , ref, ref) \ _(FSTORE, S , ref, ref) \ + _(XSTORE, S , ref, ref) \ \ /* Allocations. */ \ _(SNEW, N , ref, ref) /* CSE is ok, so not marked as A. */ \ @@ -152,6 +153,7 @@ LJ_STATIC_ASSERT(((int)IR_LT^4) == (int)IR_ULT); LJ_STATIC_ASSERT((int)IR_HLOAD + IRDELTA_L2S == (int)IR_HSTORE); LJ_STATIC_ASSERT((int)IR_ULOAD + IRDELTA_L2S == (int)IR_USTORE); LJ_STATIC_ASSERT((int)IR_FLOAD + IRDELTA_L2S == (int)IR_FSTORE); +LJ_STATIC_ASSERT((int)IR_XLOAD + IRDELTA_L2S == (int)IR_XSTORE); /* -- Named IR literals --------------------------------------------------- */ diff --git a/src/lj_iropt.h b/src/lj_iropt.h index c05040d6..ce8b564f 100644 --- a/src/lj_iropt.h +++ b/src/lj_iropt.h @@ -110,6 +110,7 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_aload(jit_State *J); LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J); LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J); LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J); +LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J); LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J); LJ_FUNC int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J); LJ_FUNC int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim); diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 8ccfc6bd..3476235e 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -1357,19 +1357,8 @@ LJFOLDF(xload_kptr) return NEXTFOLD; } -/* CSE for XLOAD depends on the type, but not on the IRXLOAD_* flags. */ LJFOLD(XLOAD any any) -LJFOLDF(fwd_xload) -{ - IRRef ref = J->chain[IR_XLOAD]; - IRRef op1 = fins->op1; - while (ref > op1) { - if (IR(ref)->op1 == op1 && irt_sametype(IR(ref)->t, fins->t)) - return ref; - ref = IR(ref)->prev; - } - return EMITFOLD; -} +LJFOLDX(lj_opt_fwd_xload) /* -- Write barriers ------------------------------------------------------ */ diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c index 9b96d66e..d47706fb 100644 --- a/src/lj_opt_mem.c +++ b/src/lj_opt_mem.c @@ -523,6 +523,50 @@ doemit: return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ } +/* -- XLOAD forwarding ---------------------------------------------------- */ + +/* NYI: Alias analysis for XLOAD/XSTORE. */ +static AliasRet aa_xref(jit_State *J, IRIns *refa, IRIns *refb) +{ + UNUSED(J); UNUSED(refa); UNUSED(refb); + return ALIAS_MAY; +} + +/* XLOAD forwarding. */ +TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J) +{ + IRRef xref = fins->op1; + IRRef lim = xref; /* Search limit. */ + IRIns *xr = IR(xref); + IRRef ref; + + if ((fins->op2 & IRXLOAD_READONLY)) + goto cselim; + + /* Search for conflicting stores. */ + ref = J->chain[IR_XSTORE]; + while (ref > xref) { + IRIns *store = IR(ref); + switch (aa_xref(J, xr, IR(store->op1))) { + case ALIAS_NO: break; /* Continue searching. */ + case ALIAS_MAY: lim = ref; goto cselim; /* Limit search for load. */ + case ALIAS_MUST: return store->op2; /* Store forwarding. */ + } + ref = store->prev; + } + +cselim: + /* Try to find a matching load. Below the conflicting store, if any. */ + ref = J->chain[IR_XLOAD]; + while (ref > lim) { + /* CSE for XLOAD depends on the type, but not on the IRXLOAD_* flags. */ + if (IR(ref)->op1 == fins->op1 && irt_sametype(IR(ref)->t, fins->t)) + return ref; + ref = IR(ref)->prev; + } + return lj_ir_emit(J); +} + /* -- Forwarding of lj_tab_len -------------------------------------------- */ /* This is rather simplistic right now, but better than nothing. */