mirror of
https://github.com/LuaJIT/LuaJIT.git
synced 2025-02-08 07:34:07 +00:00
Reassociate XLOAD across PHIs to handle a[i-1] forwarding case.
Improved SciMark scores: http://luajit.org/download/scimark.lua x86 SciMark LARGE | FFT SOR MC SPARSE LU -----------------------+--------------------------------------- GCC 4.4.3 623.8 | 91.0 883.5 190.4 784.7 1169.6 LuaJIT git +FFI 651.2 | 97.2 1021.9 323.4 673.7 1139.6 LuaJIT git 527.7 | 91.4 1008.5 225.6 400.0 913.2 x64 SciMark LARGE | FFT SOR MC SPARSE LU -----------------------+--------------------------------------- GCC 4.4.3 614.7 | 97.7 883.5 228.5 734.0 1129.9 JVM 1.6.0_22 707.5 | 79.2 1118.1 385.5 658.9 1295.7 LuaJIT git +FFI 632.8 | 89.1 1035.8 298.3 648.1 1092.9 LuaJIT git 516.1 | 88.4 995.4 225.6 382.1 888.9
This commit is contained in:
parent
c8d6f078a5
commit
dbab6cf511
@ -555,10 +555,10 @@ static AliasRet aa_cnew(jit_State *J, IRIns *refa, IRIns *refb)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Alias analysis for XLOAD/XSTORE. */
|
/* Alias analysis for XLOAD/XSTORE. */
|
||||||
static AliasRet aa_xref(jit_State *J, IRIns *xa, IRIns *xb)
|
static AliasRet aa_xref(jit_State *J, IRIns *refa, IRIns *xa, IRIns *xb)
|
||||||
{
|
{
|
||||||
ptrdiff_t ofsa = 0, ofsb = 0;
|
ptrdiff_t ofsa = 0, ofsb = 0;
|
||||||
IRIns *refa = IR(xa->op1), *refb = IR(xb->op1);
|
IRIns *refb = IR(xb->op1);
|
||||||
IRIns *basea = refa, *baseb = refb;
|
IRIns *basea = refa, *baseb = refb;
|
||||||
/* This implements (very) strict aliasing rules.
|
/* This implements (very) strict aliasing rules.
|
||||||
** Different types do NOT alias, except for differences in signedness.
|
** Different types do NOT alias, except for differences in signedness.
|
||||||
@ -602,10 +602,72 @@ static AliasRet aa_xref(jit_State *J, IRIns *xa, IRIns *xb)
|
|||||||
return aa_cnew(J, basea, baseb); /* Try to disambiguate allocations. */
|
return aa_cnew(J, basea, baseb); /* Try to disambiguate allocations. */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Return CSEd reference or 0. Caveat: swaps lower ref to the right! */
|
||||||
|
static IRRef reassoc_trycse(jit_State *J, IROp op, IRRef op1, IRRef op2)
|
||||||
|
{
|
||||||
|
IRRef ref = J->chain[op];
|
||||||
|
IRRef lim = op1;
|
||||||
|
if (op2 > lim) { lim = op2; op2 = op1; op1 = lim; }
|
||||||
|
while (ref > lim) {
|
||||||
|
IRIns *ir = IR(ref);
|
||||||
|
if (ir->op1 == op1 && ir->op2 == op2)
|
||||||
|
return ref;
|
||||||
|
ref = ir->prev;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Reassociate index references. */
|
||||||
|
static IRRef reassoc_xref(jit_State *J, IRIns *ir)
|
||||||
|
{
|
||||||
|
ptrdiff_t ofs = 0;
|
||||||
|
if (ir->o == IR_ADD && irref_isk(ir->op2)) { /* Get constant offset. */
|
||||||
|
IRIns *irk = IR(ir->op2);
|
||||||
|
ofs = (LJ_64 && irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 :
|
||||||
|
(ptrdiff_t)irk->i;
|
||||||
|
ir = IR(ir->op1);
|
||||||
|
}
|
||||||
|
if (ir->o == IR_ADD) { /* Add of base + index. */
|
||||||
|
/* Index ref > base ref for loop-carried dependences. Only check op1. */
|
||||||
|
IRIns *ir2, *ir1 = IR(ir->op1);
|
||||||
|
int32_t shift = 0;
|
||||||
|
IRRef idxref;
|
||||||
|
/* Determine index shifts. Don't bother with IR_MUL here. */
|
||||||
|
if (ir1->o == IR_BSHL && irref_isk(ir1->op2))
|
||||||
|
shift = IR(ir1->op2)->i;
|
||||||
|
else if (ir1->o == IR_ADD && ir1->op1 == ir1->op2)
|
||||||
|
shift = 1;
|
||||||
|
else
|
||||||
|
ir1 = ir;
|
||||||
|
ir2 = IR(ir1->op1);
|
||||||
|
/* A non-reassociated add. Must be a loop-carried dependence. */
|
||||||
|
if (ir2->o == IR_ADD && irt_isint(ir2->t) && irref_isk(ir2->op2))
|
||||||
|
ofs += (ptrdiff_t)IR(ir2->op2)->i << shift;
|
||||||
|
else
|
||||||
|
return 0;
|
||||||
|
idxref = ir2->op1;
|
||||||
|
/* Try to CSE the reassociated chain. Give up if not found. */
|
||||||
|
if (ir1 != ir &&
|
||||||
|
!(idxref = reassoc_trycse(J, ir1->o, idxref,
|
||||||
|
ir1->o == IR_BSHL ? ir1->op2 : idxref)))
|
||||||
|
return 0;
|
||||||
|
if (!(idxref = reassoc_trycse(J, IR_ADD, idxref, ir->op2)))
|
||||||
|
return 0;
|
||||||
|
if (ofs != 0) {
|
||||||
|
IRRef refk = tref_ref(lj_ir_kintp(J, ofs));
|
||||||
|
if (!(idxref = reassoc_trycse(J, IR_ADD, idxref, refk)))
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return idxref; /* Success, found a reassociated index reference. Phew. */
|
||||||
|
}
|
||||||
|
return 0; /* Failure. */
|
||||||
|
}
|
||||||
|
|
||||||
/* XLOAD forwarding. */
|
/* XLOAD forwarding. */
|
||||||
TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J)
|
TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J)
|
||||||
{
|
{
|
||||||
IRRef xref = fins->op1;
|
IRRef xref = fins->op1;
|
||||||
|
IRIns *xr = IR(xref);
|
||||||
IRRef lim = xref; /* Search limit. */
|
IRRef lim = xref; /* Search limit. */
|
||||||
IRRef ref;
|
IRRef ref;
|
||||||
|
|
||||||
@ -614,9 +676,10 @@ TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J)
|
|||||||
|
|
||||||
/* Search for conflicting stores. */
|
/* Search for conflicting stores. */
|
||||||
ref = J->chain[IR_XSTORE];
|
ref = J->chain[IR_XSTORE];
|
||||||
|
retry:
|
||||||
while (ref > xref) {
|
while (ref > xref) {
|
||||||
IRIns *store = IR(ref);
|
IRIns *store = IR(ref);
|
||||||
switch (aa_xref(J, fins, store)) {
|
switch (aa_xref(J, xr, fins, store)) {
|
||||||
case ALIAS_NO: break; /* Continue searching. */
|
case ALIAS_NO: break; /* Continue searching. */
|
||||||
case ALIAS_MAY: lim = ref; goto cselim; /* Limit search for load. */
|
case ALIAS_MAY: lim = ref; goto cselim; /* Limit search for load. */
|
||||||
case ALIAS_MUST: return store->op2; /* Store forwarding. */
|
case ALIAS_MUST: return store->op2; /* Store forwarding. */
|
||||||
@ -629,10 +692,21 @@ cselim:
|
|||||||
ref = J->chain[IR_XLOAD];
|
ref = J->chain[IR_XLOAD];
|
||||||
while (ref > lim) {
|
while (ref > lim) {
|
||||||
/* CSE for XLOAD depends on the type, but not on the IRXLOAD_* flags. */
|
/* CSE for XLOAD depends on the type, but not on the IRXLOAD_* flags. */
|
||||||
if (IR(ref)->op1 == fins->op1 && irt_sametype(IR(ref)->t, fins->t))
|
if (IR(ref)->op1 == xref && irt_sametype(IR(ref)->t, fins->t))
|
||||||
return ref;
|
return ref;
|
||||||
ref = IR(ref)->prev;
|
ref = IR(ref)->prev;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Reassociate XLOAD across PHIs to handle a[i-1] forwarding case. */
|
||||||
|
if (!(fins->op2 & IRXLOAD_READONLY) && J->chain[IR_LOOP] &&
|
||||||
|
xref == fins->op1 && (xref = reassoc_xref(J, xr)) != 0) {
|
||||||
|
ref = J->chain[IR_XSTORE];
|
||||||
|
while (ref > lim) /* Skip stores that have already been checked. */
|
||||||
|
ref = IR(ref)->prev;
|
||||||
|
lim = xref;
|
||||||
|
xr = IR(xref);
|
||||||
|
goto retry; /* Retry with the reassociated reference. */
|
||||||
|
}
|
||||||
return lj_ir_emit(J);
|
return lj_ir_emit(J);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -640,12 +714,13 @@ cselim:
|
|||||||
TRef LJ_FASTCALL lj_opt_dse_xstore(jit_State *J)
|
TRef LJ_FASTCALL lj_opt_dse_xstore(jit_State *J)
|
||||||
{
|
{
|
||||||
IRRef xref = fins->op1;
|
IRRef xref = fins->op1;
|
||||||
|
IRIns *xr = IR(xref);
|
||||||
IRRef val = fins->op2; /* Stored value reference. */
|
IRRef val = fins->op2; /* Stored value reference. */
|
||||||
IRRef1 *refp = &J->chain[IR_XSTORE];
|
IRRef1 *refp = &J->chain[IR_XSTORE];
|
||||||
IRRef ref = *refp;
|
IRRef ref = *refp;
|
||||||
while (ref > xref) { /* Search for redundant or conflicting stores. */
|
while (ref > xref) { /* Search for redundant or conflicting stores. */
|
||||||
IRIns *store = IR(ref);
|
IRIns *store = IR(ref);
|
||||||
switch (aa_xref(J, fins, store)) {
|
switch (aa_xref(J, xr, fins, store)) {
|
||||||
case ALIAS_NO:
|
case ALIAS_NO:
|
||||||
break; /* Continue searching. */
|
break; /* Continue searching. */
|
||||||
case ALIAS_MAY:
|
case ALIAS_MAY:
|
||||||
|
Loading…
Reference in New Issue
Block a user