diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 955a54a4..9fa411a0 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -109,7 +109,7 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k) /* Check if there's no conflicting instruction between curins and ref. ** Also avoid fusing loads if there are multiple references. */ -static int noconflict(ASMState *as, IRRef ref, IROp conflict, int noload) +static int noconflict(ASMState *as, IRRef ref, IROp conflict, int check) { IRIns *ir = as->ir; IRRef i = as->curins; @@ -118,7 +118,9 @@ static int noconflict(ASMState *as, IRRef ref, IROp conflict, int noload) while (--i > ref) { if (ir[i].o == conflict) return 0; /* Conflict found. */ - else if (!noload && (ir[i].op1 == ref || ir[i].op2 == ref)) + else if ((check & 1) && ir[i].o == IR_NEWREF) + return 0; + else if ((check & 2) && (ir[i].op1 == ref || ir[i].op2 == ref)) return 0; } return 1; /* Ok, no conflict. */ @@ -134,7 +136,7 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref) lj_assertA(irb->op2 == IRFL_TAB_ARRAY, "expected FLOAD TAB_ARRAY"); /* We can avoid the FLOAD of t->array for colocated arrays. */ if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE && - !neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 1)) { + !neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 0)) { as->mrm.ofs = (int32_t)sizeof(GCtab); /* Ofs to colocated array. */ return irb->op1; /* Table obj. */ } @@ -456,7 +458,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR; if (ir->o == IR_SLOAD) { if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) && - noconflict(as, ref, IR_RETF, 0) && + noconflict(as, ref, IR_RETF, 2) && !(LJ_GC64 && irt_isaddr(ir->t))) { as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow); as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) + @@ -467,13 +469,12 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) } else if (ir->o == IR_FLOAD) { /* Generic fusion is only ok for 32 bit operand (but see asm_comp). */ if ((irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)) && - noconflict(as, ref, IR_FSTORE, 0)) { + noconflict(as, ref, IR_FSTORE, 2)) { asm_fusefref(as, ir, xallow); return RID_MRM; } } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) { - if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0) && - noconflict(as, ref, IR_CALLS, 1) && /* Don't cross table.clear. */ + if (noconflict(as, ref, ir->o + IRDELTA_L2S, 2+(ir->o != IR_ULOAD)) && !(LJ_GC64 && irt_isaddr(ir->t))) { asm_fuseahuref(as, ir->op1, xallow); return RID_MRM; @@ -483,7 +484,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) ** Fusing unaligned memory operands is ok on x86 (except for SIMD types). */ if ((!irt_typerange(ir->t, IRT_I8, IRT_U16)) && - noconflict(as, ref, IR_XSTORE, 0)) { + noconflict(as, ref, IR_XSTORE, 2)) { asm_fusexref(as, ir->op1, xallow); return RID_MRM; }