From 264177b0d041fdea5e07091a0c585607e9b55828 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 2 Jul 2012 22:37:00 +0200 Subject: [PATCH] Use HIOP for XSTORE in SPLIT pass. --- src/lj_asm_arm.h | 44 +++++++++++++++++++++++++++----------------- src/lj_asm_mips.h | 38 ++++++++++++++++++++++++-------------- src/lj_asm_ppc.h | 25 ++++++++++++++++--------- src/lj_asm_x86.h | 10 +++++++--- src/lj_opt_split.c | 22 +++------------------- 5 files changed, 77 insertions(+), 62 deletions(-) diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index 5ec3d59f..c08b6196 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h @@ -206,17 +206,19 @@ static IRRef asm_fuselsl2(ASMState *as, IRRef ref) /* Fuse XLOAD/XSTORE reference into load/store operand. */ static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref, - RegSet allow) + RegSet allow, int32_t ofs) { IRIns *ir = IR(ref); - int32_t ofs = 0; Reg base; if (ra_noreg(ir->r) && mayfuse(as, ref)) { int32_t lim = (ai & 0x04000000) ? 4096 : 256; if (ir->o == IR_ADD) { - if (irref_isk(ir->op2) && (ofs = IR(ir->op2)->i) > -lim && ofs < lim) { + int32_t ofs2; + if (irref_isk(ir->op2) && + (ofs2 = ofs + IR(ir->op2)->i) > -lim && ofs2 < lim) { + ofs = ofs2; ref = ir->op1; - } else { + } else if (ofs == 0) { IRRef lref = ir->op1, rref = ir->op2; Reg rn, rm; if ((ai & 0x04000000)) { @@ -237,6 +239,7 @@ static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref, return; } } else if (ir->o == IR_STRREF) { + lua_assert(ofs == 0); ofs = (int32_t)sizeof(GCstr); if (irref_isk(ir->op2)) { ofs += IR(ir->op2)->i; @@ -809,29 +812,33 @@ static void asm_fload(ASMState *as, IRIns *ir) static void asm_fstore(ASMState *as, IRIns *ir) { - Reg src = ra_alloc1(as, ir->op2, RSET_GPR); - IRIns *irf = IR(ir->op1); - Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); - int32_t ofs = field_ofs[irf->op2]; - ARMIns ai = asm_fxstoreins(ir); - if ((ai & 0x04000000)) - emit_lso(as, ai, src, idx, ofs); - else - emit_lsox(as, ai, src, idx, ofs); + if (ir->r == RID_SINK) { /* Sink store. */ + asm_snap_prep(as); + } else { + Reg src = ra_alloc1(as, ir->op2, RSET_GPR); + IRIns *irf = IR(ir->op1); + Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); + int32_t ofs = field_ofs[irf->op2]; + ARMIns ai = asm_fxstoreins(ir); + if ((ai & 0x04000000)) + emit_lso(as, ai, src, idx, ofs); + else + emit_lsox(as, ai, src, idx, ofs); + } } static void asm_xload(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); - asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR); + asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); } -static void asm_xstore(ASMState *as, IRIns *ir) +static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) { Reg src = ra_alloc1(as, ir->op2, RSET_GPR); asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, - rset_exclude(RSET_GPR, src)); + rset_exclude(RSET_GPR, src), ofs); } static void asm_ahuvload(ASMState *as, IRIns *ir) @@ -1374,6 +1381,9 @@ static void asm_hiop(ASMState *as, IRIns *ir) if (uselo || usehi) asm_fpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO); return; + } else if ((ir-1)->o == IR_XSTORE) { + asm_xstore(as, ir, 4); + return; } if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ switch ((ir-1)->o) { @@ -1702,7 +1712,7 @@ static void asm_ir(ASMState *as, IRIns *ir) case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; case IR_FSTORE: asm_fstore(as, ir); break; - case IR_XSTORE: asm_xstore(as, ir); break; + case IR_XSTORE: asm_xstore(as, ir, 0); break; /* Allocations. */ case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h index 9bae4778..a3a4da6c 100644 --- a/src/lj_asm_mips.h +++ b/src/lj_asm_mips.h @@ -183,20 +183,20 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) /* Fuse XLOAD/XSTORE reference into load/store operand. */ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref, - RegSet allow) + RegSet allow, int32_t ofs) { IRIns *ir = IR(ref); - int32_t ofs = 0; Reg base; if (ra_noreg(ir->r) && mayfuse(as, ref)) { if (ir->o == IR_ADD) { int32_t ofs2; - if (irref_isk(ir->op2) && (ofs2 = IR(ir->op2)->i, checki16(ofs2))) { + if (irref_isk(ir->op2) && (ofs2 = ofs + IR(ir->op2)->i, checki16(ofs2))) { ref = ir->op1; ofs = ofs2; } } else if (ir->o == IR_STRREF) { int32_t ofs2 = 65536; + lua_assert(ofs == 0); ofs = (int32_t)sizeof(GCstr); if (irref_isk(ir->op2)) { ofs2 = ofs + IR(ir->op2)->i; @@ -889,27 +889,32 @@ static void asm_fload(ASMState *as, IRIns *ir) static void asm_fstore(ASMState *as, IRIns *ir) { - Reg src = ra_alloc1z(as, ir->op2, RSET_GPR); - IRIns *irf = IR(ir->op1); - Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); - int32_t ofs = field_ofs[irf->op2]; - MIPSIns mi = asm_fxstoreins(ir); - lua_assert(!irt_isfp(ir->t)); - emit_tsi(as, mi, src, idx, ofs); + if (ir->r == RID_SINK) { /* Sink store. */ + asm_snap_prep(as); + return; + } else { + Reg src = ra_alloc1z(as, ir->op2, RSET_GPR); + IRIns *irf = IR(ir->op1); + Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); + int32_t ofs = field_ofs[irf->op2]; + MIPSIns mi = asm_fxstoreins(ir); + lua_assert(!irt_isfp(ir->t)); + emit_tsi(as, mi, src, idx, ofs); + } } static void asm_xload(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); - asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR); + asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); } -static void asm_xstore(ASMState *as, IRIns *ir) +static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) { Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, - rset_exclude(RSET_GPR, src)); + rset_exclude(RSET_GPR, src), ofs); } static void asm_ahuvload(ASMState *as, IRIns *ir) @@ -1554,6 +1559,11 @@ static void asm_hiop(ASMState *as, IRIns *ir) as->curins--; /* Always skip the loword comparison. */ asm_comp64eq(as, ir); return; + } else if ((ir-1)->o == IR_XSTORE) { + as->curins--; /* Handle both stores here. */ + asm_xstore(as, ir, LJ_LE ? 4 : 0); + asm_xstore(as, ir-1, LJ_LE ? 0 : 4); + return; } if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ switch ((ir-1)->o) { @@ -1832,7 +1842,7 @@ static void asm_ir(ASMState *as, IRIns *ir) case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; case IR_FSTORE: asm_fstore(as, ir); break; - case IR_XSTORE: asm_xstore(as, ir); break; + case IR_XSTORE: asm_xstore(as, ir, 0); break; /* Allocations. */ case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index 73942b8b..ec22e260 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h @@ -162,22 +162,24 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) /* Fuse XLOAD/XSTORE reference into load/store operand. */ static void asm_fusexref(ASMState *as, PPCIns pi, Reg rt, IRRef ref, - RegSet allow) + RegSet allow, int32_t ofs) { IRIns *ir = IR(ref); - int32_t ofs = 0; Reg base; if (ra_noreg(ir->r) && mayfuse(as, ref)) { if (ir->o == IR_ADD) { - if (irref_isk(ir->op2) && (ofs = IR(ir->op2)->i, checki16(ofs))) { + int32_t ofs2; + if (irref_isk(ir->op2) && (ofs2 = ofs + IR(ir->op2)->i, checki16(ofs2))) { + ofs = ofs2; ref = ir->op1; - } else { + } else if (ofs == 0) { Reg right, left = ra_alloc2(as, ir, allow); right = (left >> 8); left &= 255; emit_fab(as, PPCI_LWZX | ((pi >> 20) & 0x780), rt, left, right); return; } } else if (ir->o == IR_STRREF) { + lua_assert(ofs == 0); ofs = (int32_t)sizeof(GCstr); if (irref_isk(ir->op2)) { ofs += IR(ir->op2)->i; @@ -904,13 +906,13 @@ static void asm_xload(ASMState *as, IRIns *ir) lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); if (irt_isi8(ir->t)) emit_as(as, PPCI_EXTSB, dest, dest); - asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR); + asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); } -static void asm_xstore(ASMState *as, IRIns *ir) +static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) { IRIns *irb; - if (mayfuse(as, ir->op2) && (irb = IR(ir->op2))->o == IR_BSWAP && + if (ofs == 0 && mayfuse(as, ir->op2) && (irb = IR(ir->op2))->o == IR_BSWAP && ra_noreg(irb->r) && (irt_isint(ir->t) || irt_isu32(ir->t))) { /* Fuse BSWAP with XSTORE to stwbrx. */ Reg src = ra_alloc1(as, irb->op1, RSET_GPR); @@ -918,7 +920,7 @@ static void asm_xstore(ASMState *as, IRIns *ir) } else { Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, - rset_exclude(RSET_GPR, src)); + rset_exclude(RSET_GPR, src), ofs); } } @@ -1743,6 +1745,11 @@ static void asm_hiop(ASMState *as, IRIns *ir) as->curins--; /* Always skip the loword comparison. */ asm_comp64(as, ir); return; + } else if ((ir-1)->o == IR_XSTORE) { + as->curins--; /* Handle both stores here. */ + asm_xstore(as, ir, 0); + asm_xstore(as, ir-1, 4); + return; } if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ switch ((ir-1)->o) { @@ -2035,7 +2042,7 @@ static void asm_ir(ASMState *as, IRIns *ir) case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; case IR_FSTORE: asm_fstore(as, ir); break; - case IR_XSTORE: asm_xstore(as, ir); break; + case IR_XSTORE: asm_xstore(as, ir, 0); break; /* Allocations. */ case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index c4ebdb1f..7647b03f 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -1273,11 +1273,12 @@ static void asm_fxstore(ASMState *as, IRIns *ir) } rset_clear(allow, src); } - if (ir->o == IR_FSTORE) + if (ir->o == IR_FSTORE) { asm_fusefref(as, IR(ir->op1), allow); - else + } else { asm_fusexref(as, ir->op1, allow); - /* ir->op2 is ignored -- unaligned stores are ok on x86. */ + if (LJ_32 && ir->o == IR_HIOP) as->mrm.ofs += 4; + } if (ra_hasreg(src)) { x86Op xo; switch (irt_type(ir->t)) { @@ -2249,6 +2250,9 @@ static void asm_hiop(ASMState *as, IRIns *ir) } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ asm_comp_int64(as, ir); return; + } else if ((ir-1)->o == IR_XSTORE) { + asm_fxstore(as, ir); + return; } if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ switch ((ir-1)->o) { diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c index 72720e86..77b8e2dd 100644 --- a/src/lj_opt_split.c +++ b/src/lj_opt_split.c @@ -78,8 +78,7 @@ ** 0105 int HIOP 0103 +0 ** 0106 p32 ADD base +16 ** 0107 int XSTORE 0106 0104 -** 0108 p32 ADD base +20 -** 0109 int XSTORE 0108 0105 +** 0108 int HIOP 0106 0105 ** ** mov eax, [esi+0x8] ** mov ecx, [esi+0xc] @@ -328,19 +327,9 @@ static void split_ir(jit_State *J) #endif break; } - case IR_ASTORE: case IR_HSTORE: case IR_USTORE: + case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_XSTORE: split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]); break; - case IR_XSTORE: { -#if LJ_LE - IRRef hiref = hisubst[ir->op2]; -#else - IRRef hiref = nir->op2; nir->op2 = hisubst[ir->op2]; -#endif - split_emit(J, IRT(IR_XSTORE, IRT_SOFTFP), - split_ptr(J, oir, ir->op1), hiref); - break; - } case IR_CONV: { /* Conversion to number. Others handled below. */ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); UNUSED(st); @@ -434,12 +423,7 @@ static void split_ir(jit_State *J) #endif break; case IR_XSTORE: -#if LJ_LE - hiref = hisubst[ir->op2]; -#else - hiref = nir->op2; nir->op2 = hisubst[ir->op2]; -#endif - split_emit(J, IRTI(IR_XSTORE), split_ptr(J, oir, ir->op1), hiref); + split_emit(J, IRTI(IR_HIOP), nir->op1, hisubst[ir->op2]); break; case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);