From 1f269610925829f55ed3e88e4af2b6575598adbc Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Fri, 31 Dec 2010 01:09:30 +0100 Subject: [PATCH] Refactoring of conversion ops, part 3: add FOLD rules for IR_CONV. --- src/buildvm_fold.c | 8 +- src/lj_opt_fold.c | 253 +++++++++++++++++++++++++++++++++++++++------ 2 files changed, 226 insertions(+), 35 deletions(-) diff --git a/src/buildvm_fold.c b/src/buildvm_fold.c index 468e5300..5f463575 100644 --- a/src/buildvm_fold.c +++ b/src/buildvm_fold.c @@ -124,7 +124,7 @@ static uint32_t nexttoken(char **pp, int allowlit, int allowany) if (*p == '\0') return i; } else if (allowany && !strcmp("any", p)) { - return 0xff; + return allowany; } else { for (i = 0; ir_names[i]; i++) if (!strcmp(ir_names[i], p)) @@ -140,9 +140,9 @@ static uint32_t nexttoken(char **pp, int allowlit, int allowany) static void foldrule(char *p) { uint32_t op = nexttoken(&p, 0, 0); - uint32_t left = nexttoken(&p, 0, 1); - uint32_t right = nexttoken(&p, 1, 1); - uint32_t key = (funcidx << 24) | (op << 16) | (left << 8) | right; + uint32_t left = nexttoken(&p, 0, 0x7f); + uint32_t right = nexttoken(&p, 1, 0x3ff); + uint32_t key = (funcidx << 24) | (op << 17) | (left << 10) | right; uint32_t i; if (nkeys >= BUILD_MAX_FOLD) { fprintf(stderr, "Error: too many fold rules, increase BUILD_MAX_FOLD.\n"); diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 4790d245..92b9dfb4 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -489,6 +489,73 @@ LJFOLDF(kfold_toi64_knum) return INT64FOLD((uint64_t)(int64_t)knumleft); } +LJFOLD(CONV KINT IRCONV_NUM_INT) +LJFOLDF(kfold_conv_kint_num) +{ + return lj_ir_knum(J, cast_num(fleft->i)); +} + +LJFOLD(CONV KINT IRCONV_I64_INT) +LJFOLD(CONV KINT IRCONV_U64_INT) +LJFOLDF(kfold_conv_kint_i64) +{ + return INT64FOLD((uint64_t)(int64_t)fleft->i); +} + +LJFOLD(CONV KINT64 IRCONV_NUM_I64) +LJFOLDF(kfold_conv_kint64_num_i64) +{ + return lj_ir_knum(J, cast_num((int64_t)ir_kint64(fleft)->u64)); +} + +LJFOLD(CONV KINT64 IRCONV_NUM_U64) +LJFOLDF(kfold_conv_kint64_num_u64) +{ + return lj_ir_knum(J, cast_num(ir_kint64(fleft)->u64)); +} + +LJFOLD(CONV KINT64 IRCONV_INT_I64) +LJFOLD(CONV KINT64 IRCONV_U32_I64) +LJFOLDF(kfold_conv_kint64_int_i64) +{ + return INTFOLD((int32_t)ir_kint64(fleft)->u64); +} + +LJFOLD(CONV KNUM IRCONV_INT_NUM) +LJFOLDF(kfold_conv_knum_int_num) +{ + lua_Number n = knumleft; + if (!(fins->op2 & IRCONV_TRUNC)) { + int32_t k = lj_num2int(n); + if (irt_isguard(fins->t) && n != cast_num(k)) { + /* We're about to create a guard which always fails, like CONV +1.5. + ** Some pathological loops cause this during LICM, e.g.: + ** local x,k,t = 0,1.5,{1,[1.5]=2} + ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end + ** assert(x == 300) + */ + return FAILFOLD; + } + return INTFOLD(k); + } else { + return INTFOLD((int32_t)n); + } +} + +LJFOLD(CONV KNUM IRCONV_I64_NUM) +LJFOLDF(kfold_conv_knum_i64_num) +{ + lua_assert((fins->op2 & IRCONV_TRUNC)); + return INT64FOLD((uint64_t)(int64_t)knumleft); +} + +LJFOLD(CONV KNUM IRCONV_U64_NUM) +LJFOLDF(kfold_conv_knum_u64_num) +{ + lua_assert((fins->op2 & IRCONV_TRUNC)); + return INT64FOLD(lj_num2u64(knumleft)); +} + LJFOLD(TOSTR KNUM) LJFOLDF(kfold_tostr_knum) { @@ -740,8 +807,152 @@ LJFOLDF(simplify_powi_kx) return NEXTFOLD; } -/* -- FP conversion narrowing --------------------------------------------- */ +/* -- Simplify conversions ------------------------------------------------ */ +LJFOLD(CONV CONV IRCONV_NUM_INT) /* _NUM */ +LJFOLDF(shortcut_conv_num_int) +{ + PHIBARRIER(fleft); + /* Only safe with a guarded conversion to int. */ + if ((fleft->op2 & IRCONV_SRCMASK) == IRT_NUM && irt_isguard(fleft->t)) + return fleft->op1; /* f(g(x)) ==> x */ + return NEXTFOLD; +} + +LJFOLD(CONV CONV IRCONV_INT_NUM) /* _INT */ +LJFOLDF(simplify_conv_int_num) +{ + /* Fold even across PHI to avoid expensive num->int conversions in loop. */ + if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) + return fleft->op1; + return NEXTFOLD; +} + +LJFOLD(CONV CONV IRCONV_U32_NUM) /* _U32*/ +LJFOLDF(simplify_conv_u32_num) +{ + /* Fold even across PHI to avoid expensive num->int conversions in loop. */ + if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) + return fleft->op1; + return NEXTFOLD; +} + +LJFOLD(CONV CONV IRCONV_I64_NUM) /* _INT or _U32*/ +LJFOLDF(simplify_conv_i64_num) +{ + PHIBARRIER(fleft); + if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) { + /* Reduce to a sign-extension. */ + fins->op1 = fleft->op1; + fins->op2 = ((IRT_I64<<5)|IRT_INT|IRCONV_SEXT); + return RETRYFOLD; + } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) { +#if LJ_TARGET_X64 + return fleft->op1; +#else + /* Reduce to a zero-extension. */ + fins->op1 = fleft->op1; + fins->op2 = (IRT_I64<<5)|IRT_U32; + return RETRYFOLD; +#endif + } + return NEXTFOLD; +} + +LJFOLD(CONV CONV IRCONV_U64_NUM) /* _U32*/ +LJFOLDF(simplify_conv_u64_num) +{ + PHIBARRIER(fleft); + if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) { +#if LJ_TARGET_X64 + return fleft->op1; +#else + /* Reduce to a zero-extension. */ + fins->op1 = fleft->op1; + fins->op2 = (IRT_U64<<5)|IRT_U32; + return RETRYFOLD; +#endif + } + return NEXTFOLD; +} + +/* Shortcut TOBIT + IRT_NUM <- IRT_INT/IRT_U32 conversion. */ +LJFOLD(TOBIT CONV KNUM) +LJFOLDF(simplify_tobit_conv) +{ + if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT || + (fleft->op2 & IRCONV_SRCMASK) == IRT_U32) { + /* Fold even across PHI to avoid expensive num->int conversions in loop. */ + lua_assert(irt_isnum(fleft->t)); + return fleft->op1; + } + return NEXTFOLD; +} + +/* Shortcut floor/ceil/round + IRT_NUM <- IRT_INT/IRT_U32 conversion. */ +LJFOLD(FPMATH CONV IRFPM_FLOOR) +LJFOLD(FPMATH CONV IRFPM_CEIL) +LJFOLD(FPMATH CONV IRFPM_TRUNC) +LJFOLDF(simplify_floor_conv) +{ + if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT || + (fleft->op2 & IRCONV_SRCMASK) == IRT_U32) + return LEFTFOLD; + return NEXTFOLD; +} + +/* Strength reduction of widening. */ +LJFOLD(CONV any IRCONV_I64_INT) +LJFOLDF(simplify_conv_sext) +{ + IRRef ref = fins->op1; + int64_t ofs = 0; + if (!(fins->op2 & IRCONV_SEXT)) + return NEXTFOLD; + PHIBARRIER(fleft); + if (fleft->o == IR_ADD && irref_isk(fleft->op2)) { + ofs = (int64_t)IR(fleft->op2)->i; + ref = fleft->op1; + } + /* Use scalar evolution analysis results to strength-reduce sign-extension. */ + if (ref == J->scev.idx) { + IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop; + lua_assert(irt_isint(J->scev.t)); + if (lo && IR(lo)->i + ofs >= 0) { +#if LJ_TARGET_X64 + /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */ + return LEFTFOLD; +#else + /* Reduce to a (cheaper) zero-extension. */ + fins->op2 &= ~IRCONV_SEXT; + return RETRYFOLD; +#endif + } + } + return NEXTFOLD; +} + +/* Special CSE rule for CONV. */ +LJFOLD(CONV any any) +LJFOLDF(cse_conv) +{ + if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) { + IRRef op1 = fins->op1, op2 = (fins->op2 & IRCONV_MODEMASK); + uint8_t guard = irt_isguard(fins->t); + IRRef ref = J->chain[IR_CONV]; + while (ref > op1) { + IRIns *ir = IR(ref); + /* Commoning with stronger checks is ok. */ + if (ir->op1 == op1 && (ir->op2 & IRCONV_MODEMASK) == op2 && + irt_isguard(ir->t) >= guard) + return ref; + ref = ir->prev; + } + } + return EMITFOLD; /* No fallthrough to regular CSE. */ +} + +/* FP conversion narrowing. */ LJFOLD(TOINT ADD any) LJFOLD(TOINT SUB any) LJFOLD(TOBIT ADD KNUM) @@ -771,26 +982,6 @@ LJFOLDF(cse_toint) return EMITFOLD; /* No fallthrough to regular CSE. */ } -/* Special CSE rule for CONV. */ -LJFOLD(CONV any any) -LJFOLDF(cse_conv) -{ - if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) { - IRRef op1 = fins->op1, op2 = (fins->op2 & IRCONV_MODEMASK); - uint8_t guard = irt_isguard(fins->t); - IRRef ref = J->chain[IR_CONV]; - while (ref > op1) { - IRIns *ir = IR(ref); - /* Commoning with stronger checks is ok. */ - if (ir->op1 == op1 && (ir->op2 & IRCONV_MODEMASK) == op2 && - irt_isguard(ir->t) >= guard) - return ref; - ref = ir->prev; - } - } - return EMITFOLD; /* No fallthrough to regular CSE. */ -} - /* -- Strength reduction of widening -------------------------------------- */ LJFOLD(TOI64 any 3) /* IRTOINT_ZEXT64 */ @@ -1723,12 +1914,12 @@ LJFOLDX(lj_ir_emit) /* Every entry in the generated hash table is a 32 bit pattern: ** -** xxxxxxxx iiiiiiii llllllll rrrrrrrr +** xxxxxxxx iiiiiii lllllll rrrrrrrrrr ** -** xxxxxxxx = 8 bit index into fold function table -** iiiiiiii = 8 bit folded instruction opcode -** llllllll = 8 bit left instruction opcode -** rrrrrrrr = 8 bit right instruction opcode or 8 bits from literal field +** xxxxxxxx = 8 bit index into fold function table +** iiiiiii = 7 bit folded instruction opcode +** lllllll = 7 bit left instruction opcode +** rrrrrrrrrr = 8 bit right instruction opcode or 10 bits from literal field */ #include "lj_folddef.h" @@ -1762,9 +1953,9 @@ TRef LJ_FASTCALL lj_opt_fold(jit_State *J) /* Fold engine start/retry point. */ retry: /* Construct key from opcode and operand opcodes (unless literal/none). */ - key = ((uint32_t)fins->o << 16); + key = ((uint32_t)fins->o << 17); if (fins->op1 >= J->cur.nk) { - key += (uint32_t)IR(fins->op1)->o << 8; + key += (uint32_t)IR(fins->op1)->o << 10; *fleft = *IR(fins->op1); } if (fins->op2 >= J->cur.nk) { @@ -1777,7 +1968,7 @@ retry: /* Check for a match in order from most specific to least specific. */ any = 0; for (;;) { - uint32_t k = key | any; + uint32_t k = key | (any & 0x1ffff); uint32_t h = fold_hashkey(k); uint32_t fh = fold_hash[h]; /* Lookup key in semi-perfect hash table. */ if ((fh & 0xffffff) == k || (fh = fold_hash[h+1], (fh & 0xffffff) == k)) { @@ -1785,9 +1976,9 @@ retry: if (ref != NEXTFOLD) break; } - if (any == 0xffff) /* Exhausted folding. Pass on to CSE. */ + if (any == 0xfffff) /* Exhausted folding. Pass on to CSE. */ return lj_opt_cse(J); - any = (any | (any >> 8)) ^ 0xff00; + any = (any | (any >> 10)) ^ 0xffc00; } /* Return value processing, ordered by frequency. */