mirror of
https://github.com/LuaJIT/LuaJIT.git
synced 2025-02-07 23:24:09 +00:00
Refactoring of conversion ops, part 3: add FOLD rules for IR_CONV.
This commit is contained in:
parent
65b194a2f8
commit
1f26961092
@ -124,7 +124,7 @@ static uint32_t nexttoken(char **pp, int allowlit, int allowany)
|
||||
if (*p == '\0')
|
||||
return i;
|
||||
} else if (allowany && !strcmp("any", p)) {
|
||||
return 0xff;
|
||||
return allowany;
|
||||
} else {
|
||||
for (i = 0; ir_names[i]; i++)
|
||||
if (!strcmp(ir_names[i], p))
|
||||
@ -140,9 +140,9 @@ static uint32_t nexttoken(char **pp, int allowlit, int allowany)
|
||||
static void foldrule(char *p)
|
||||
{
|
||||
uint32_t op = nexttoken(&p, 0, 0);
|
||||
uint32_t left = nexttoken(&p, 0, 1);
|
||||
uint32_t right = nexttoken(&p, 1, 1);
|
||||
uint32_t key = (funcidx << 24) | (op << 16) | (left << 8) | right;
|
||||
uint32_t left = nexttoken(&p, 0, 0x7f);
|
||||
uint32_t right = nexttoken(&p, 1, 0x3ff);
|
||||
uint32_t key = (funcidx << 24) | (op << 17) | (left << 10) | right;
|
||||
uint32_t i;
|
||||
if (nkeys >= BUILD_MAX_FOLD) {
|
||||
fprintf(stderr, "Error: too many fold rules, increase BUILD_MAX_FOLD.\n");
|
||||
|
@ -489,6 +489,73 @@ LJFOLDF(kfold_toi64_knum)
|
||||
return INT64FOLD((uint64_t)(int64_t)knumleft);
|
||||
}
|
||||
|
||||
LJFOLD(CONV KINT IRCONV_NUM_INT)
|
||||
LJFOLDF(kfold_conv_kint_num)
|
||||
{
|
||||
return lj_ir_knum(J, cast_num(fleft->i));
|
||||
}
|
||||
|
||||
LJFOLD(CONV KINT IRCONV_I64_INT)
|
||||
LJFOLD(CONV KINT IRCONV_U64_INT)
|
||||
LJFOLDF(kfold_conv_kint_i64)
|
||||
{
|
||||
return INT64FOLD((uint64_t)(int64_t)fleft->i);
|
||||
}
|
||||
|
||||
LJFOLD(CONV KINT64 IRCONV_NUM_I64)
|
||||
LJFOLDF(kfold_conv_kint64_num_i64)
|
||||
{
|
||||
return lj_ir_knum(J, cast_num((int64_t)ir_kint64(fleft)->u64));
|
||||
}
|
||||
|
||||
LJFOLD(CONV KINT64 IRCONV_NUM_U64)
|
||||
LJFOLDF(kfold_conv_kint64_num_u64)
|
||||
{
|
||||
return lj_ir_knum(J, cast_num(ir_kint64(fleft)->u64));
|
||||
}
|
||||
|
||||
LJFOLD(CONV KINT64 IRCONV_INT_I64)
|
||||
LJFOLD(CONV KINT64 IRCONV_U32_I64)
|
||||
LJFOLDF(kfold_conv_kint64_int_i64)
|
||||
{
|
||||
return INTFOLD((int32_t)ir_kint64(fleft)->u64);
|
||||
}
|
||||
|
||||
LJFOLD(CONV KNUM IRCONV_INT_NUM)
|
||||
LJFOLDF(kfold_conv_knum_int_num)
|
||||
{
|
||||
lua_Number n = knumleft;
|
||||
if (!(fins->op2 & IRCONV_TRUNC)) {
|
||||
int32_t k = lj_num2int(n);
|
||||
if (irt_isguard(fins->t) && n != cast_num(k)) {
|
||||
/* We're about to create a guard which always fails, like CONV +1.5.
|
||||
** Some pathological loops cause this during LICM, e.g.:
|
||||
** local x,k,t = 0,1.5,{1,[1.5]=2}
|
||||
** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
|
||||
** assert(x == 300)
|
||||
*/
|
||||
return FAILFOLD;
|
||||
}
|
||||
return INTFOLD(k);
|
||||
} else {
|
||||
return INTFOLD((int32_t)n);
|
||||
}
|
||||
}
|
||||
|
||||
LJFOLD(CONV KNUM IRCONV_I64_NUM)
|
||||
LJFOLDF(kfold_conv_knum_i64_num)
|
||||
{
|
||||
lua_assert((fins->op2 & IRCONV_TRUNC));
|
||||
return INT64FOLD((uint64_t)(int64_t)knumleft);
|
||||
}
|
||||
|
||||
LJFOLD(CONV KNUM IRCONV_U64_NUM)
|
||||
LJFOLDF(kfold_conv_knum_u64_num)
|
||||
{
|
||||
lua_assert((fins->op2 & IRCONV_TRUNC));
|
||||
return INT64FOLD(lj_num2u64(knumleft));
|
||||
}
|
||||
|
||||
LJFOLD(TOSTR KNUM)
|
||||
LJFOLDF(kfold_tostr_knum)
|
||||
{
|
||||
@ -740,8 +807,152 @@ LJFOLDF(simplify_powi_kx)
|
||||
return NEXTFOLD;
|
||||
}
|
||||
|
||||
/* -- FP conversion narrowing --------------------------------------------- */
|
||||
/* -- Simplify conversions ------------------------------------------------ */
|
||||
|
||||
LJFOLD(CONV CONV IRCONV_NUM_INT) /* _NUM */
|
||||
LJFOLDF(shortcut_conv_num_int)
|
||||
{
|
||||
PHIBARRIER(fleft);
|
||||
/* Only safe with a guarded conversion to int. */
|
||||
if ((fleft->op2 & IRCONV_SRCMASK) == IRT_NUM && irt_isguard(fleft->t))
|
||||
return fleft->op1; /* f(g(x)) ==> x */
|
||||
return NEXTFOLD;
|
||||
}
|
||||
|
||||
LJFOLD(CONV CONV IRCONV_INT_NUM) /* _INT */
|
||||
LJFOLDF(simplify_conv_int_num)
|
||||
{
|
||||
/* Fold even across PHI to avoid expensive num->int conversions in loop. */
|
||||
if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT)
|
||||
return fleft->op1;
|
||||
return NEXTFOLD;
|
||||
}
|
||||
|
||||
LJFOLD(CONV CONV IRCONV_U32_NUM) /* _U32*/
|
||||
LJFOLDF(simplify_conv_u32_num)
|
||||
{
|
||||
/* Fold even across PHI to avoid expensive num->int conversions in loop. */
|
||||
if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32)
|
||||
return fleft->op1;
|
||||
return NEXTFOLD;
|
||||
}
|
||||
|
||||
LJFOLD(CONV CONV IRCONV_I64_NUM) /* _INT or _U32*/
|
||||
LJFOLDF(simplify_conv_i64_num)
|
||||
{
|
||||
PHIBARRIER(fleft);
|
||||
if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) {
|
||||
/* Reduce to a sign-extension. */
|
||||
fins->op1 = fleft->op1;
|
||||
fins->op2 = ((IRT_I64<<5)|IRT_INT|IRCONV_SEXT);
|
||||
return RETRYFOLD;
|
||||
} else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
|
||||
#if LJ_TARGET_X64
|
||||
return fleft->op1;
|
||||
#else
|
||||
/* Reduce to a zero-extension. */
|
||||
fins->op1 = fleft->op1;
|
||||
fins->op2 = (IRT_I64<<5)|IRT_U32;
|
||||
return RETRYFOLD;
|
||||
#endif
|
||||
}
|
||||
return NEXTFOLD;
|
||||
}
|
||||
|
||||
LJFOLD(CONV CONV IRCONV_U64_NUM) /* _U32*/
|
||||
LJFOLDF(simplify_conv_u64_num)
|
||||
{
|
||||
PHIBARRIER(fleft);
|
||||
if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
|
||||
#if LJ_TARGET_X64
|
||||
return fleft->op1;
|
||||
#else
|
||||
/* Reduce to a zero-extension. */
|
||||
fins->op1 = fleft->op1;
|
||||
fins->op2 = (IRT_U64<<5)|IRT_U32;
|
||||
return RETRYFOLD;
|
||||
#endif
|
||||
}
|
||||
return NEXTFOLD;
|
||||
}
|
||||
|
||||
/* Shortcut TOBIT + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
|
||||
LJFOLD(TOBIT CONV KNUM)
|
||||
LJFOLDF(simplify_tobit_conv)
|
||||
{
|
||||
if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT ||
|
||||
(fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
|
||||
/* Fold even across PHI to avoid expensive num->int conversions in loop. */
|
||||
lua_assert(irt_isnum(fleft->t));
|
||||
return fleft->op1;
|
||||
}
|
||||
return NEXTFOLD;
|
||||
}
|
||||
|
||||
/* Shortcut floor/ceil/round + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
|
||||
LJFOLD(FPMATH CONV IRFPM_FLOOR)
|
||||
LJFOLD(FPMATH CONV IRFPM_CEIL)
|
||||
LJFOLD(FPMATH CONV IRFPM_TRUNC)
|
||||
LJFOLDF(simplify_floor_conv)
|
||||
{
|
||||
if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT ||
|
||||
(fleft->op2 & IRCONV_SRCMASK) == IRT_U32)
|
||||
return LEFTFOLD;
|
||||
return NEXTFOLD;
|
||||
}
|
||||
|
||||
/* Strength reduction of widening. */
|
||||
LJFOLD(CONV any IRCONV_I64_INT)
|
||||
LJFOLDF(simplify_conv_sext)
|
||||
{
|
||||
IRRef ref = fins->op1;
|
||||
int64_t ofs = 0;
|
||||
if (!(fins->op2 & IRCONV_SEXT))
|
||||
return NEXTFOLD;
|
||||
PHIBARRIER(fleft);
|
||||
if (fleft->o == IR_ADD && irref_isk(fleft->op2)) {
|
||||
ofs = (int64_t)IR(fleft->op2)->i;
|
||||
ref = fleft->op1;
|
||||
}
|
||||
/* Use scalar evolution analysis results to strength-reduce sign-extension. */
|
||||
if (ref == J->scev.idx) {
|
||||
IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop;
|
||||
lua_assert(irt_isint(J->scev.t));
|
||||
if (lo && IR(lo)->i + ofs >= 0) {
|
||||
#if LJ_TARGET_X64
|
||||
/* Eliminate widening. All 32 bit ops do an implicit zero-extension. */
|
||||
return LEFTFOLD;
|
||||
#else
|
||||
/* Reduce to a (cheaper) zero-extension. */
|
||||
fins->op2 &= ~IRCONV_SEXT;
|
||||
return RETRYFOLD;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
return NEXTFOLD;
|
||||
}
|
||||
|
||||
/* Special CSE rule for CONV. */
|
||||
LJFOLD(CONV any any)
|
||||
LJFOLDF(cse_conv)
|
||||
{
|
||||
if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
|
||||
IRRef op1 = fins->op1, op2 = (fins->op2 & IRCONV_MODEMASK);
|
||||
uint8_t guard = irt_isguard(fins->t);
|
||||
IRRef ref = J->chain[IR_CONV];
|
||||
while (ref > op1) {
|
||||
IRIns *ir = IR(ref);
|
||||
/* Commoning with stronger checks is ok. */
|
||||
if (ir->op1 == op1 && (ir->op2 & IRCONV_MODEMASK) == op2 &&
|
||||
irt_isguard(ir->t) >= guard)
|
||||
return ref;
|
||||
ref = ir->prev;
|
||||
}
|
||||
}
|
||||
return EMITFOLD; /* No fallthrough to regular CSE. */
|
||||
}
|
||||
|
||||
/* FP conversion narrowing. */
|
||||
LJFOLD(TOINT ADD any)
|
||||
LJFOLD(TOINT SUB any)
|
||||
LJFOLD(TOBIT ADD KNUM)
|
||||
@ -771,26 +982,6 @@ LJFOLDF(cse_toint)
|
||||
return EMITFOLD; /* No fallthrough to regular CSE. */
|
||||
}
|
||||
|
||||
/* Special CSE rule for CONV. */
|
||||
LJFOLD(CONV any any)
|
||||
LJFOLDF(cse_conv)
|
||||
{
|
||||
if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
|
||||
IRRef op1 = fins->op1, op2 = (fins->op2 & IRCONV_MODEMASK);
|
||||
uint8_t guard = irt_isguard(fins->t);
|
||||
IRRef ref = J->chain[IR_CONV];
|
||||
while (ref > op1) {
|
||||
IRIns *ir = IR(ref);
|
||||
/* Commoning with stronger checks is ok. */
|
||||
if (ir->op1 == op1 && (ir->op2 & IRCONV_MODEMASK) == op2 &&
|
||||
irt_isguard(ir->t) >= guard)
|
||||
return ref;
|
||||
ref = ir->prev;
|
||||
}
|
||||
}
|
||||
return EMITFOLD; /* No fallthrough to regular CSE. */
|
||||
}
|
||||
|
||||
/* -- Strength reduction of widening -------------------------------------- */
|
||||
|
||||
LJFOLD(TOI64 any 3) /* IRTOINT_ZEXT64 */
|
||||
@ -1723,12 +1914,12 @@ LJFOLDX(lj_ir_emit)
|
||||
|
||||
/* Every entry in the generated hash table is a 32 bit pattern:
|
||||
**
|
||||
** xxxxxxxx iiiiiiii llllllll rrrrrrrr
|
||||
** xxxxxxxx iiiiiii lllllll rrrrrrrrrr
|
||||
**
|
||||
** xxxxxxxx = 8 bit index into fold function table
|
||||
** iiiiiiii = 8 bit folded instruction opcode
|
||||
** llllllll = 8 bit left instruction opcode
|
||||
** rrrrrrrr = 8 bit right instruction opcode or 8 bits from literal field
|
||||
** xxxxxxxx = 8 bit index into fold function table
|
||||
** iiiiiii = 7 bit folded instruction opcode
|
||||
** lllllll = 7 bit left instruction opcode
|
||||
** rrrrrrrrrr = 8 bit right instruction opcode or 10 bits from literal field
|
||||
*/
|
||||
|
||||
#include "lj_folddef.h"
|
||||
@ -1762,9 +1953,9 @@ TRef LJ_FASTCALL lj_opt_fold(jit_State *J)
|
||||
/* Fold engine start/retry point. */
|
||||
retry:
|
||||
/* Construct key from opcode and operand opcodes (unless literal/none). */
|
||||
key = ((uint32_t)fins->o << 16);
|
||||
key = ((uint32_t)fins->o << 17);
|
||||
if (fins->op1 >= J->cur.nk) {
|
||||
key += (uint32_t)IR(fins->op1)->o << 8;
|
||||
key += (uint32_t)IR(fins->op1)->o << 10;
|
||||
*fleft = *IR(fins->op1);
|
||||
}
|
||||
if (fins->op2 >= J->cur.nk) {
|
||||
@ -1777,7 +1968,7 @@ retry:
|
||||
/* Check for a match in order from most specific to least specific. */
|
||||
any = 0;
|
||||
for (;;) {
|
||||
uint32_t k = key | any;
|
||||
uint32_t k = key | (any & 0x1ffff);
|
||||
uint32_t h = fold_hashkey(k);
|
||||
uint32_t fh = fold_hash[h]; /* Lookup key in semi-perfect hash table. */
|
||||
if ((fh & 0xffffff) == k || (fh = fold_hash[h+1], (fh & 0xffffff) == k)) {
|
||||
@ -1785,9 +1976,9 @@ retry:
|
||||
if (ref != NEXTFOLD)
|
||||
break;
|
||||
}
|
||||
if (any == 0xffff) /* Exhausted folding. Pass on to CSE. */
|
||||
if (any == 0xfffff) /* Exhausted folding. Pass on to CSE. */
|
||||
return lj_opt_cse(J);
|
||||
any = (any | (any >> 8)) ^ 0xff00;
|
||||
any = (any | (any >> 10)) ^ 0xffc00;
|
||||
}
|
||||
|
||||
/* Return value processing, ordered by frequency. */
|
||||
|
Loading…
Reference in New Issue
Block a user