Refactoring of conversion ops, part 3: add FOLD rules for IR_CONV.

This commit is contained in:
Mike Pall 2010-12-31 01:09:30 +01:00
parent 65b194a2f8
commit 1f26961092
2 changed files with 226 additions and 35 deletions

View File

@ -124,7 +124,7 @@ static uint32_t nexttoken(char **pp, int allowlit, int allowany)
if (*p == '\0') if (*p == '\0')
return i; return i;
} else if (allowany && !strcmp("any", p)) { } else if (allowany && !strcmp("any", p)) {
return 0xff; return allowany;
} else { } else {
for (i = 0; ir_names[i]; i++) for (i = 0; ir_names[i]; i++)
if (!strcmp(ir_names[i], p)) if (!strcmp(ir_names[i], p))
@ -140,9 +140,9 @@ static uint32_t nexttoken(char **pp, int allowlit, int allowany)
static void foldrule(char *p) static void foldrule(char *p)
{ {
uint32_t op = nexttoken(&p, 0, 0); uint32_t op = nexttoken(&p, 0, 0);
uint32_t left = nexttoken(&p, 0, 1); uint32_t left = nexttoken(&p, 0, 0x7f);
uint32_t right = nexttoken(&p, 1, 1); uint32_t right = nexttoken(&p, 1, 0x3ff);
uint32_t key = (funcidx << 24) | (op << 16) | (left << 8) | right; uint32_t key = (funcidx << 24) | (op << 17) | (left << 10) | right;
uint32_t i; uint32_t i;
if (nkeys >= BUILD_MAX_FOLD) { if (nkeys >= BUILD_MAX_FOLD) {
fprintf(stderr, "Error: too many fold rules, increase BUILD_MAX_FOLD.\n"); fprintf(stderr, "Error: too many fold rules, increase BUILD_MAX_FOLD.\n");

View File

@ -489,6 +489,73 @@ LJFOLDF(kfold_toi64_knum)
return INT64FOLD((uint64_t)(int64_t)knumleft); return INT64FOLD((uint64_t)(int64_t)knumleft);
} }
LJFOLD(CONV KINT IRCONV_NUM_INT)
LJFOLDF(kfold_conv_kint_num)
{
return lj_ir_knum(J, cast_num(fleft->i));
}
LJFOLD(CONV KINT IRCONV_I64_INT)
LJFOLD(CONV KINT IRCONV_U64_INT)
LJFOLDF(kfold_conv_kint_i64)
{
return INT64FOLD((uint64_t)(int64_t)fleft->i);
}
LJFOLD(CONV KINT64 IRCONV_NUM_I64)
LJFOLDF(kfold_conv_kint64_num_i64)
{
return lj_ir_knum(J, cast_num((int64_t)ir_kint64(fleft)->u64));
}
LJFOLD(CONV KINT64 IRCONV_NUM_U64)
LJFOLDF(kfold_conv_kint64_num_u64)
{
return lj_ir_knum(J, cast_num(ir_kint64(fleft)->u64));
}
LJFOLD(CONV KINT64 IRCONV_INT_I64)
LJFOLD(CONV KINT64 IRCONV_U32_I64)
LJFOLDF(kfold_conv_kint64_int_i64)
{
return INTFOLD((int32_t)ir_kint64(fleft)->u64);
}
LJFOLD(CONV KNUM IRCONV_INT_NUM)
LJFOLDF(kfold_conv_knum_int_num)
{
lua_Number n = knumleft;
if (!(fins->op2 & IRCONV_TRUNC)) {
int32_t k = lj_num2int(n);
if (irt_isguard(fins->t) && n != cast_num(k)) {
/* We're about to create a guard which always fails, like CONV +1.5.
** Some pathological loops cause this during LICM, e.g.:
** local x,k,t = 0,1.5,{1,[1.5]=2}
** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
** assert(x == 300)
*/
return FAILFOLD;
}
return INTFOLD(k);
} else {
return INTFOLD((int32_t)n);
}
}
LJFOLD(CONV KNUM IRCONV_I64_NUM)
LJFOLDF(kfold_conv_knum_i64_num)
{
lua_assert((fins->op2 & IRCONV_TRUNC));
return INT64FOLD((uint64_t)(int64_t)knumleft);
}
LJFOLD(CONV KNUM IRCONV_U64_NUM)
LJFOLDF(kfold_conv_knum_u64_num)
{
lua_assert((fins->op2 & IRCONV_TRUNC));
return INT64FOLD(lj_num2u64(knumleft));
}
LJFOLD(TOSTR KNUM) LJFOLD(TOSTR KNUM)
LJFOLDF(kfold_tostr_knum) LJFOLDF(kfold_tostr_knum)
{ {
@ -740,8 +807,152 @@ LJFOLDF(simplify_powi_kx)
return NEXTFOLD; return NEXTFOLD;
} }
/* -- FP conversion narrowing --------------------------------------------- */ /* -- Simplify conversions ------------------------------------------------ */
LJFOLD(CONV CONV IRCONV_NUM_INT) /* _NUM */
LJFOLDF(shortcut_conv_num_int)
{
PHIBARRIER(fleft);
/* Only safe with a guarded conversion to int. */
if ((fleft->op2 & IRCONV_SRCMASK) == IRT_NUM && irt_isguard(fleft->t))
return fleft->op1; /* f(g(x)) ==> x */
return NEXTFOLD;
}
LJFOLD(CONV CONV IRCONV_INT_NUM) /* _INT */
LJFOLDF(simplify_conv_int_num)
{
/* Fold even across PHI to avoid expensive num->int conversions in loop. */
if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT)
return fleft->op1;
return NEXTFOLD;
}
LJFOLD(CONV CONV IRCONV_U32_NUM) /* _U32*/
LJFOLDF(simplify_conv_u32_num)
{
/* Fold even across PHI to avoid expensive num->int conversions in loop. */
if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32)
return fleft->op1;
return NEXTFOLD;
}
LJFOLD(CONV CONV IRCONV_I64_NUM) /* _INT or _U32*/
LJFOLDF(simplify_conv_i64_num)
{
PHIBARRIER(fleft);
if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) {
/* Reduce to a sign-extension. */
fins->op1 = fleft->op1;
fins->op2 = ((IRT_I64<<5)|IRT_INT|IRCONV_SEXT);
return RETRYFOLD;
} else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
#if LJ_TARGET_X64
return fleft->op1;
#else
/* Reduce to a zero-extension. */
fins->op1 = fleft->op1;
fins->op2 = (IRT_I64<<5)|IRT_U32;
return RETRYFOLD;
#endif
}
return NEXTFOLD;
}
LJFOLD(CONV CONV IRCONV_U64_NUM) /* _U32*/
LJFOLDF(simplify_conv_u64_num)
{
PHIBARRIER(fleft);
if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
#if LJ_TARGET_X64
return fleft->op1;
#else
/* Reduce to a zero-extension. */
fins->op1 = fleft->op1;
fins->op2 = (IRT_U64<<5)|IRT_U32;
return RETRYFOLD;
#endif
}
return NEXTFOLD;
}
/* Shortcut TOBIT + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
LJFOLD(TOBIT CONV KNUM)
LJFOLDF(simplify_tobit_conv)
{
if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT ||
(fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
/* Fold even across PHI to avoid expensive num->int conversions in loop. */
lua_assert(irt_isnum(fleft->t));
return fleft->op1;
}
return NEXTFOLD;
}
/* Shortcut floor/ceil/round + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
LJFOLD(FPMATH CONV IRFPM_FLOOR)
LJFOLD(FPMATH CONV IRFPM_CEIL)
LJFOLD(FPMATH CONV IRFPM_TRUNC)
LJFOLDF(simplify_floor_conv)
{
if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT ||
(fleft->op2 & IRCONV_SRCMASK) == IRT_U32)
return LEFTFOLD;
return NEXTFOLD;
}
/* Strength reduction of widening. */
LJFOLD(CONV any IRCONV_I64_INT)
LJFOLDF(simplify_conv_sext)
{
IRRef ref = fins->op1;
int64_t ofs = 0;
if (!(fins->op2 & IRCONV_SEXT))
return NEXTFOLD;
PHIBARRIER(fleft);
if (fleft->o == IR_ADD && irref_isk(fleft->op2)) {
ofs = (int64_t)IR(fleft->op2)->i;
ref = fleft->op1;
}
/* Use scalar evolution analysis results to strength-reduce sign-extension. */
if (ref == J->scev.idx) {
IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop;
lua_assert(irt_isint(J->scev.t));
if (lo && IR(lo)->i + ofs >= 0) {
#if LJ_TARGET_X64
/* Eliminate widening. All 32 bit ops do an implicit zero-extension. */
return LEFTFOLD;
#else
/* Reduce to a (cheaper) zero-extension. */
fins->op2 &= ~IRCONV_SEXT;
return RETRYFOLD;
#endif
}
}
return NEXTFOLD;
}
/* Special CSE rule for CONV. */
LJFOLD(CONV any any)
LJFOLDF(cse_conv)
{
if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
IRRef op1 = fins->op1, op2 = (fins->op2 & IRCONV_MODEMASK);
uint8_t guard = irt_isguard(fins->t);
IRRef ref = J->chain[IR_CONV];
while (ref > op1) {
IRIns *ir = IR(ref);
/* Commoning with stronger checks is ok. */
if (ir->op1 == op1 && (ir->op2 & IRCONV_MODEMASK) == op2 &&
irt_isguard(ir->t) >= guard)
return ref;
ref = ir->prev;
}
}
return EMITFOLD; /* No fallthrough to regular CSE. */
}
/* FP conversion narrowing. */
LJFOLD(TOINT ADD any) LJFOLD(TOINT ADD any)
LJFOLD(TOINT SUB any) LJFOLD(TOINT SUB any)
LJFOLD(TOBIT ADD KNUM) LJFOLD(TOBIT ADD KNUM)
@ -771,26 +982,6 @@ LJFOLDF(cse_toint)
return EMITFOLD; /* No fallthrough to regular CSE. */ return EMITFOLD; /* No fallthrough to regular CSE. */
} }
/* Special CSE rule for CONV. */
LJFOLD(CONV any any)
LJFOLDF(cse_conv)
{
if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
IRRef op1 = fins->op1, op2 = (fins->op2 & IRCONV_MODEMASK);
uint8_t guard = irt_isguard(fins->t);
IRRef ref = J->chain[IR_CONV];
while (ref > op1) {
IRIns *ir = IR(ref);
/* Commoning with stronger checks is ok. */
if (ir->op1 == op1 && (ir->op2 & IRCONV_MODEMASK) == op2 &&
irt_isguard(ir->t) >= guard)
return ref;
ref = ir->prev;
}
}
return EMITFOLD; /* No fallthrough to regular CSE. */
}
/* -- Strength reduction of widening -------------------------------------- */ /* -- Strength reduction of widening -------------------------------------- */
LJFOLD(TOI64 any 3) /* IRTOINT_ZEXT64 */ LJFOLD(TOI64 any 3) /* IRTOINT_ZEXT64 */
@ -1723,12 +1914,12 @@ LJFOLDX(lj_ir_emit)
/* Every entry in the generated hash table is a 32 bit pattern: /* Every entry in the generated hash table is a 32 bit pattern:
** **
** xxxxxxxx iiiiiiii llllllll rrrrrrrr ** xxxxxxxx iiiiiii lllllll rrrrrrrrrr
** **
** xxxxxxxx = 8 bit index into fold function table ** xxxxxxxx = 8 bit index into fold function table
** iiiiiiii = 8 bit folded instruction opcode ** iiiiiii = 7 bit folded instruction opcode
** llllllll = 8 bit left instruction opcode ** lllllll = 7 bit left instruction opcode
** rrrrrrrr = 8 bit right instruction opcode or 8 bits from literal field ** rrrrrrrrrr = 8 bit right instruction opcode or 10 bits from literal field
*/ */
#include "lj_folddef.h" #include "lj_folddef.h"
@ -1762,9 +1953,9 @@ TRef LJ_FASTCALL lj_opt_fold(jit_State *J)
/* Fold engine start/retry point. */ /* Fold engine start/retry point. */
retry: retry:
/* Construct key from opcode and operand opcodes (unless literal/none). */ /* Construct key from opcode and operand opcodes (unless literal/none). */
key = ((uint32_t)fins->o << 16); key = ((uint32_t)fins->o << 17);
if (fins->op1 >= J->cur.nk) { if (fins->op1 >= J->cur.nk) {
key += (uint32_t)IR(fins->op1)->o << 8; key += (uint32_t)IR(fins->op1)->o << 10;
*fleft = *IR(fins->op1); *fleft = *IR(fins->op1);
} }
if (fins->op2 >= J->cur.nk) { if (fins->op2 >= J->cur.nk) {
@ -1777,7 +1968,7 @@ retry:
/* Check for a match in order from most specific to least specific. */ /* Check for a match in order from most specific to least specific. */
any = 0; any = 0;
for (;;) { for (;;) {
uint32_t k = key | any; uint32_t k = key | (any & 0x1ffff);
uint32_t h = fold_hashkey(k); uint32_t h = fold_hashkey(k);
uint32_t fh = fold_hash[h]; /* Lookup key in semi-perfect hash table. */ uint32_t fh = fold_hash[h]; /* Lookup key in semi-perfect hash table. */
if ((fh & 0xffffff) == k || (fh = fold_hash[h+1], (fh & 0xffffff) == k)) { if ((fh & 0xffffff) == k || (fh = fold_hash[h+1], (fh & 0xffffff) == k)) {
@ -1785,9 +1976,9 @@ retry:
if (ref != NEXTFOLD) if (ref != NEXTFOLD)
break; break;
} }
if (any == 0xffff) /* Exhausted folding. Pass on to CSE. */ if (any == 0xfffff) /* Exhausted folding. Pass on to CSE. */
return lj_opt_cse(J); return lj_opt_cse(J);
any = (any | (any >> 8)) ^ 0xff00; any = (any | (any >> 10)) ^ 0xffc00;
} }
/* Return value processing, ordered by frequency. */ /* Return value processing, ordered by frequency. */