Use HIOP for XSTORE in SPLIT pass.

This commit is contained in:
Mike Pall 2012-07-02 22:37:00 +02:00
parent 7ae3832f20
commit 264177b0d0
5 changed files with 77 additions and 62 deletions

View File

@ -206,17 +206,19 @@ static IRRef asm_fuselsl2(ASMState *as, IRRef ref)
/* Fuse XLOAD/XSTORE reference into load/store operand. */
static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref,
RegSet allow)
RegSet allow, int32_t ofs)
{
IRIns *ir = IR(ref);
int32_t ofs = 0;
Reg base;
if (ra_noreg(ir->r) && mayfuse(as, ref)) {
int32_t lim = (ai & 0x04000000) ? 4096 : 256;
if (ir->o == IR_ADD) {
if (irref_isk(ir->op2) && (ofs = IR(ir->op2)->i) > -lim && ofs < lim) {
int32_t ofs2;
if (irref_isk(ir->op2) &&
(ofs2 = ofs + IR(ir->op2)->i) > -lim && ofs2 < lim) {
ofs = ofs2;
ref = ir->op1;
} else {
} else if (ofs == 0) {
IRRef lref = ir->op1, rref = ir->op2;
Reg rn, rm;
if ((ai & 0x04000000)) {
@ -237,6 +239,7 @@ static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref,
return;
}
} else if (ir->o == IR_STRREF) {
lua_assert(ofs == 0);
ofs = (int32_t)sizeof(GCstr);
if (irref_isk(ir->op2)) {
ofs += IR(ir->op2)->i;
@ -809,6 +812,9 @@ static void asm_fload(ASMState *as, IRIns *ir)
static void asm_fstore(ASMState *as, IRIns *ir)
{
if (ir->r == RID_SINK) { /* Sink store. */
asm_snap_prep(as);
} else {
Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
IRIns *irf = IR(ir->op1);
Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
@ -819,19 +825,20 @@ static void asm_fstore(ASMState *as, IRIns *ir)
else
emit_lsox(as, ai, src, idx, ofs);
}
}
static void asm_xload(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR);
asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
}
static void asm_xstore(ASMState *as, IRIns *ir)
static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
{
Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
rset_exclude(RSET_GPR, src));
rset_exclude(RSET_GPR, src), ofs);
}
static void asm_ahuvload(ASMState *as, IRIns *ir)
@ -1374,6 +1381,9 @@ static void asm_hiop(ASMState *as, IRIns *ir)
if (uselo || usehi)
asm_fpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO);
return;
} else if ((ir-1)->o == IR_XSTORE) {
asm_xstore(as, ir, 4);
return;
}
if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
switch ((ir-1)->o) {
@ -1702,7 +1712,7 @@ static void asm_ir(ASMState *as, IRIns *ir)
case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
case IR_FSTORE: asm_fstore(as, ir); break;
case IR_XSTORE: asm_xstore(as, ir); break;
case IR_XSTORE: asm_xstore(as, ir, 0); break;
/* Allocations. */
case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;

View File

@ -183,20 +183,20 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)
/* Fuse XLOAD/XSTORE reference into load/store operand. */
static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref,
RegSet allow)
RegSet allow, int32_t ofs)
{
IRIns *ir = IR(ref);
int32_t ofs = 0;
Reg base;
if (ra_noreg(ir->r) && mayfuse(as, ref)) {
if (ir->o == IR_ADD) {
int32_t ofs2;
if (irref_isk(ir->op2) && (ofs2 = IR(ir->op2)->i, checki16(ofs2))) {
if (irref_isk(ir->op2) && (ofs2 = ofs + IR(ir->op2)->i, checki16(ofs2))) {
ref = ir->op1;
ofs = ofs2;
}
} else if (ir->o == IR_STRREF) {
int32_t ofs2 = 65536;
lua_assert(ofs == 0);
ofs = (int32_t)sizeof(GCstr);
if (irref_isk(ir->op2)) {
ofs2 = ofs + IR(ir->op2)->i;
@ -889,6 +889,10 @@ static void asm_fload(ASMState *as, IRIns *ir)
static void asm_fstore(ASMState *as, IRIns *ir)
{
if (ir->r == RID_SINK) { /* Sink store. */
asm_snap_prep(as);
return;
} else {
Reg src = ra_alloc1z(as, ir->op2, RSET_GPR);
IRIns *irf = IR(ir->op1);
Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
@ -897,19 +901,20 @@ static void asm_fstore(ASMState *as, IRIns *ir)
lua_assert(!irt_isfp(ir->t));
emit_tsi(as, mi, src, idx, ofs);
}
}
static void asm_xload(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR);
asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
}
static void asm_xstore(ASMState *as, IRIns *ir)
static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
{
Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
rset_exclude(RSET_GPR, src));
rset_exclude(RSET_GPR, src), ofs);
}
static void asm_ahuvload(ASMState *as, IRIns *ir)
@ -1554,6 +1559,11 @@ static void asm_hiop(ASMState *as, IRIns *ir)
as->curins--; /* Always skip the loword comparison. */
asm_comp64eq(as, ir);
return;
} else if ((ir-1)->o == IR_XSTORE) {
as->curins--; /* Handle both stores here. */
asm_xstore(as, ir, LJ_LE ? 4 : 0);
asm_xstore(as, ir-1, LJ_LE ? 0 : 4);
return;
}
if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
switch ((ir-1)->o) {
@ -1832,7 +1842,7 @@ static void asm_ir(ASMState *as, IRIns *ir)
case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
case IR_FSTORE: asm_fstore(as, ir); break;
case IR_XSTORE: asm_xstore(as, ir); break;
case IR_XSTORE: asm_xstore(as, ir, 0); break;
/* Allocations. */
case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;

View File

@ -162,22 +162,24 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)
/* Fuse XLOAD/XSTORE reference into load/store operand. */
static void asm_fusexref(ASMState *as, PPCIns pi, Reg rt, IRRef ref,
RegSet allow)
RegSet allow, int32_t ofs)
{
IRIns *ir = IR(ref);
int32_t ofs = 0;
Reg base;
if (ra_noreg(ir->r) && mayfuse(as, ref)) {
if (ir->o == IR_ADD) {
if (irref_isk(ir->op2) && (ofs = IR(ir->op2)->i, checki16(ofs))) {
int32_t ofs2;
if (irref_isk(ir->op2) && (ofs2 = ofs + IR(ir->op2)->i, checki16(ofs2))) {
ofs = ofs2;
ref = ir->op1;
} else {
} else if (ofs == 0) {
Reg right, left = ra_alloc2(as, ir, allow);
right = (left >> 8); left &= 255;
emit_fab(as, PPCI_LWZX | ((pi >> 20) & 0x780), rt, left, right);
return;
}
} else if (ir->o == IR_STRREF) {
lua_assert(ofs == 0);
ofs = (int32_t)sizeof(GCstr);
if (irref_isk(ir->op2)) {
ofs += IR(ir->op2)->i;
@ -904,13 +906,13 @@ static void asm_xload(ASMState *as, IRIns *ir)
lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
if (irt_isi8(ir->t))
emit_as(as, PPCI_EXTSB, dest, dest);
asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR);
asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
}
static void asm_xstore(ASMState *as, IRIns *ir)
static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
{
IRIns *irb;
if (mayfuse(as, ir->op2) && (irb = IR(ir->op2))->o == IR_BSWAP &&
if (ofs == 0 && mayfuse(as, ir->op2) && (irb = IR(ir->op2))->o == IR_BSWAP &&
ra_noreg(irb->r) && (irt_isint(ir->t) || irt_isu32(ir->t))) {
/* Fuse BSWAP with XSTORE to stwbrx. */
Reg src = ra_alloc1(as, irb->op1, RSET_GPR);
@ -918,7 +920,7 @@ static void asm_xstore(ASMState *as, IRIns *ir)
} else {
Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
rset_exclude(RSET_GPR, src));
rset_exclude(RSET_GPR, src), ofs);
}
}
@ -1743,6 +1745,11 @@ static void asm_hiop(ASMState *as, IRIns *ir)
as->curins--; /* Always skip the loword comparison. */
asm_comp64(as, ir);
return;
} else if ((ir-1)->o == IR_XSTORE) {
as->curins--; /* Handle both stores here. */
asm_xstore(as, ir, 0);
asm_xstore(as, ir-1, 4);
return;
}
if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
switch ((ir-1)->o) {
@ -2035,7 +2042,7 @@ static void asm_ir(ASMState *as, IRIns *ir)
case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
case IR_FSTORE: asm_fstore(as, ir); break;
case IR_XSTORE: asm_xstore(as, ir); break;
case IR_XSTORE: asm_xstore(as, ir, 0); break;
/* Allocations. */
case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;

View File

@ -1273,11 +1273,12 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
}
rset_clear(allow, src);
}
if (ir->o == IR_FSTORE)
if (ir->o == IR_FSTORE) {
asm_fusefref(as, IR(ir->op1), allow);
else
} else {
asm_fusexref(as, ir->op1, allow);
/* ir->op2 is ignored -- unaligned stores are ok on x86. */
if (LJ_32 && ir->o == IR_HIOP) as->mrm.ofs += 4;
}
if (ra_hasreg(src)) {
x86Op xo;
switch (irt_type(ir->t)) {
@ -2249,6 +2250,9 @@ static void asm_hiop(ASMState *as, IRIns *ir)
} else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */
asm_comp_int64(as, ir);
return;
} else if ((ir-1)->o == IR_XSTORE) {
asm_fxstore(as, ir);
return;
}
if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
switch ((ir-1)->o) {

View File

@ -78,8 +78,7 @@
** 0105 int HIOP 0103 +0
** 0106 p32 ADD base +16
** 0107 int XSTORE 0106 0104
** 0108 p32 ADD base +20
** 0109 int XSTORE 0108 0105
** 0108 int HIOP 0106 0105
**
** mov eax, [esi+0x8]
** mov ecx, [esi+0xc]
@ -328,19 +327,9 @@ static void split_ir(jit_State *J)
#endif
break;
}
case IR_ASTORE: case IR_HSTORE: case IR_USTORE:
case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_XSTORE:
split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]);
break;
case IR_XSTORE: {
#if LJ_LE
IRRef hiref = hisubst[ir->op2];
#else
IRRef hiref = nir->op2; nir->op2 = hisubst[ir->op2];
#endif
split_emit(J, IRT(IR_XSTORE, IRT_SOFTFP),
split_ptr(J, oir, ir->op1), hiref);
break;
}
case IR_CONV: { /* Conversion to number. Others handled below. */
IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
UNUSED(st);
@ -434,12 +423,7 @@ static void split_ir(jit_State *J)
#endif
break;
case IR_XSTORE:
#if LJ_LE
hiref = hisubst[ir->op2];
#else
hiref = nir->op2; nir->op2 = hisubst[ir->op2];
#endif
split_emit(J, IRTI(IR_XSTORE), split_ptr(J, oir, ir->op1), hiref);
split_emit(J, IRTI(IR_HIOP), nir->op1, hisubst[ir->op2]);
break;
case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */
IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);