diff --git a/src/lj_crecord.c b/src/lj_crecord.c index 23b92831..791c177c 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c @@ -165,7 +165,7 @@ static void crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp) /* fallthrough */ case CCX(I, F): if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi; - sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_TRUNC); + sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_TRUNC|IRCONV_ANY); goto xstore; case CCX(I, P): case CCX(I, A): @@ -241,7 +241,7 @@ static void crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp) if (st == IRT_CDATA) goto err_nyi; /* The signed conversion is cheaper. x64 really has 47 bit pointers. */ sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32, - st, IRCONV_TRUNC); + st, IRCONV_TRUNC|IRCONV_ANY); goto xstore; /* Destination is an array. */ @@ -401,11 +401,13 @@ void LJ_FASTCALL recff_cdata_index(jit_State *J, RecordFFData *rd) if (tref_isnumber(idx)) { /* The size of a ptrdiff_t is target-specific. */ #if LJ_64 - idx = emitir(IRT(IR_TOI64, IRT_INTP), idx, - tref_isinteger(idx) ? IRTOINT_SEXT64 : IRTOINT_TRUNCI64); + if (tref_isnum(idx)) + idx = emitconv(idx, IRT_I64, IRT_NUM, IRCONV_TRUNC|IRCONV_ANY); + else + idx = emitconv(idx, IRT_I64, IRT_INT, IRCONV_SEXT); #else - if (!tref_isinteger(idx)) - idx = emitir(IRT(IR_TOINT, IRT_INTP), idx, IRTOINT_ANY); + if (tref_isnum(idx)) + idx = emitconv(idx, IRT_INT, IRT_NUM, IRCONV_TRUNC|IRCONV_ANY); #endif if (ctype_ispointer(ct->info)) { CTSize sz = lj_ctype_size(cts, (sid = ctype_cid(ct->info))); diff --git a/src/lj_ir.c b/src/lj_ir.c index e9364762..3217bc1e 100644 --- a/src/lj_ir.c +++ b/src/lj_ir.c @@ -393,7 +393,7 @@ TRef LJ_FASTCALL lj_ir_tonum(jit_State *J, TRef tr) { if (!tref_isnum(tr)) { if (tref_isinteger(tr)) - tr = emitir(IRTN(IR_TONUM), tr, 0); + tr = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT); else if (tref_isstr(tr)) tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); else @@ -434,7 +434,7 @@ TRef LJ_FASTCALL lj_ir_toint(jit_State *J, TRef tr) tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); else if (!tref_isnum(tr)) lj_trace_err(J, LJ_TRERR_BADTYPE); - tr = emitir(IRTI(IR_TOINT), tr, IRTOINT_ANY); + tr = emitir(IRTI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_ANY); } return tr; } diff --git a/src/lj_ir.h b/src/lj_ir.h index a6d36283..5733f6af 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h @@ -222,9 +222,12 @@ IRFLDEF(FLENUM) /* CONV mode, stored in op2. */ #define IRCONV_SRCMASK 0x001f /* Source IRType. */ #define IRCONV_DSTMASK 0x03e0 /* Dest. IRType (also in ir->t). */ +#define IRCONV_NUM_INT ((IRT_NUM<<5)|IRT_INT) +#define IRCONV_INT_NUM ((IRT_INT<<5)|IRT_NUM) #define IRCONV_TRUNC 0x0400 /* Truncate number to integer. */ #define IRCONV_SEXT 0x0800 /* Sign-extend integer to integer. */ #define IRCONV_MODEMASK 0x0fff +#define IRCONV_CONVMASK 0xf000 #define IRCONV_CSH 12 /* Number to integer conversion mode. Ordered by strength of the checks. */ #define IRCONV_TOBIT (0< ldexp(1.0, tonum(i)) */ - fins->o = IR_TONUM; + fins->o = IR_CONV; fins->op1 = fins->op2; - fins->op2 = 0; + fins->op2 = IRCONV_NUM_INT; fins->op2 = (IRRef1)lj_opt_fold(J); fins->op1 = (IRRef1)lj_ir_knum_one(J); fins->o = IR_LDEXP; @@ -953,18 +953,19 @@ LJFOLDF(cse_conv) } /* FP conversion narrowing. */ -LJFOLD(TOINT ADD any) -LJFOLD(TOINT SUB any) LJFOLD(TOBIT ADD KNUM) LJFOLD(TOBIT SUB KNUM) -LJFOLD(TOI64 ADD 5) /* IRTOINT_TRUNCI64 */ -LJFOLD(TOI64 SUB 5) /* IRTOINT_TRUNCI64 */ +LJFOLD(CONV ADD IRCONV_INT_NUM) +LJFOLD(CONV SUB IRCONV_INT_NUM) +LJFOLD(CONV ADD IRCONV_I64_NUM) +LJFOLD(CONV SUB IRCONV_I64_NUM) LJFOLDF(narrow_convert) { PHIBARRIER(fleft); /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */ if (J->chain[IR_LOOP]) return NEXTFOLD; + lua_assert(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT); return lj_opt_narrow_convert(J); } diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c index f370d59f..4f1d31a4 100644 --- a/src/lj_opt_loop.c +++ b/src/lj_opt_loop.c @@ -301,7 +301,7 @@ static void loop_unroll(jit_State *J) /* Check all loop-carried dependencies for type instability. */ if (!irt_sametype(t, irr->t)) { if (irt_isnum(t) && irt_isinteger(irr->t)) /* Fix int->num case. */ - subst[ins] = tref_ref(emitir(IRTN(IR_TONUM), ref, 0)); + subst[ins] = tref_ref(emitir(IRTN(IR_CONV), ref, IRCONV_NUM_INT)); else if (!(irt_isinteger(t) && irt_isinteger(irr->t))) lj_trace_err(J, LJ_TRERR_TYPEINS); } diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c index b6615f32..fb6601e9 100644 --- a/src/lj_opt_narrow.c +++ b/src/lj_opt_narrow.c @@ -89,16 +89,17 @@ /* -- Elimination of narrowing type conversions --------------------------- */ /* Narrowing of index expressions and bit operations is demand-driven. The -** trace recorder emits a narrowing type conversion (TOINT or TOBIT) in -** all of these cases (e.g. array indexing or string indexing). FOLD +** trace recorder emits a narrowing type conversion (CONV.int.num or TOBIT) +** in all of these cases (e.g. array indexing or string indexing). FOLD ** already takes care of eliminating simple redundant conversions like -** TOINT(TONUM(x)) ==> x. +** CONV.int.num(CONV.num.int(x)) ==> x. ** ** But the surrounding code is FP-heavy and all arithmetic operations are ** performed on FP numbers. Consider a common example such as 'x=t[i+1]', ** with 'i' already an integer (due to induction variable narrowing). The -** index expression would be recorded as TOINT(ADD(TONUM(i), 1)), which is -** clearly suboptimal. +** index expression would be recorded as +** CONV.int.num(ADD(CONV.num.int(i), 1)) +** which is clearly suboptimal. ** ** One can do better by recursively backpropagating the narrowing type ** conversion across FP arithmetic operations. This turns FP ops into @@ -106,9 +107,10 @@ ** the conversion they also need to check for overflow. Currently only ADD ** and SUB are supported. ** -** The above example can be rewritten as ADDOV(TOINT(TONUM(i)), 1) and -** then into ADDOV(i, 1) after folding of the conversions. The original FP -** ops remain in the IR and are eliminated by DCE since all references to +** The above example can be rewritten as +** ADDOV(CONV.int.num(CONV.num.int(i)), 1) +** and then into ADDOV(i, 1) after folding of the conversions. The original +** FP ops remain in the IR and are eliminated by DCE since all references to ** them are gone. ** ** Special care has to be taken to avoid narrowing across an operation @@ -173,6 +175,7 @@ enum { NARROW_REF, /* Push ref. */ NARROW_CONV, /* Push conversion of ref. */ + NARROW_SEXT, /* Push sign-extension of ref. */ NARROW_INT /* Push KINT ref. The next code holds an int32_t. */ }; @@ -188,7 +191,8 @@ typedef struct NarrowConv { NarrowIns *sp; /* Current stack pointer. */ NarrowIns *maxsp; /* Maximum stack pointer minus redzone. */ int lim; /* Limit on the number of emitted conversions. */ - IRRef mode; /* Conversion mode (IRTOINT_*). */ + IRRef mode; /* Conversion mode (IRCONV_*). */ + IRType t; /* Destination type: IRT_INT or IRT_I64. */ NarrowIns stack[NARROW_MAX_STACK]; /* Stack holding stack-machine code. */ } NarrowConv; @@ -198,7 +202,9 @@ static BPropEntry *narrow_bpc_get(jit_State *J, IRRef1 key, IRRef mode) ptrdiff_t i; for (i = 0; i < BPROP_SLOTS; i++) { BPropEntry *bp = &J->bpropcache[i]; - if (bp->key == key && bp->mode <= mode) /* Stronger checks are ok, too. */ + /* Stronger checks are ok, too. */ + if (bp->key == key && bp->mode >= mode && + ((bp->mode ^ mode) & IRCONV_MODEMASK) == 0) return bp; } return NULL; @@ -223,16 +229,16 @@ static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth) IRRef cref; /* Check the easy cases first. */ - if (ir->o == IR_TONUM) { /* Undo inverse conversion. */ - *nc->sp++ = NARROWINS(NARROW_REF, ir->op1); - if (nc->mode == IRTOINT_TRUNCI64) { - *nc->sp++ = NARROWINS(NARROW_REF, IRTOINT_SEXT64); - *nc->sp++ = NARROWINS(IRT(IR_TOI64, IRT_I64), 0); - } + if (ir->o == IR_CONV && (ir->op2 & IRCONV_SRCMASK) == IRT_INT) { + if (nc->t == IRT_I64) + *nc->sp++ = NARROWINS(NARROW_SEXT, ir->op1); /* Reduce to sign-ext. */ + else + *nc->sp++ = NARROWINS(NARROW_REF, ir->op1); /* Undo conversion. */ return 0; } else if (ir->o == IR_KNUM) { /* Narrow FP constant. */ lua_Number n = ir_knum(ir)->n; - if (nc->mode == IRTOINT_TOBIT) { /* Allows a wider range of constants. */ + if ((nc->mode & IRCONV_CONVMASK) == IRCONV_TOBIT) { + /* Allows a wider range of constants. */ int64_t k64 = (int64_t)n; if (n == cast_num(k64)) { /* Only if constant doesn't lose precision. */ *nc->sp++ = NARROWINS(NARROW_INT, 0); @@ -251,36 +257,46 @@ static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth) } /* Try to CSE the conversion. Stronger checks are ok, too. */ - for (cref = J->chain[fins->o]; cref > ref; cref = IR(cref)->prev) - if (IR(cref)->op1 == ref && - irt_isguard(IR(cref)->t) >= irt_isguard(fins->t)) { + cref = J->chain[fins->o]; + while (cref > ref) { + IRIns *cr = IR(cref); + if (cr->op1 == ref && + (fins->o == IR_TOBIT || + ((cr->op2 & IRCONV_MODEMASK) == (nc->mode & IRCONV_MODEMASK) && + irt_isguard(cr->t) >= irt_isguard(fins->t)))) { *nc->sp++ = NARROWINS(NARROW_REF, cref); return 0; /* Already there, no additional conversion needed. */ } + cref = cr->prev; + } /* Backpropagate across ADD/SUB. */ if (ir->o == IR_ADD || ir->o == IR_SUB) { /* Try cache lookup first. */ IRRef mode = nc->mode; BPropEntry *bp; - if (mode == IRTOINT_INDEX && depth > 0) - mode = IRTOINT_CHECK; /* Inner conversions need a stronger check. */ + /* Inner conversions need a stronger check. */ + if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX && depth > 0) + mode += IRCONV_CHECK-IRCONV_INDEX; bp = narrow_bpc_get(nc->J, (IRRef1)ref, mode); if (bp) { *nc->sp++ = NARROWINS(NARROW_REF, bp->val); - if (mode == IRTOINT_TRUNCI64 && mode != bp->mode) { - *nc->sp++ = NARROWINS(NARROW_REF, IRTOINT_SEXT64); - *nc->sp++ = NARROWINS(IRT(IR_TOI64, IRT_I64), 0); - } return 0; + } else if (nc->t == IRT_I64) { + /* Try sign-extending from an existing (checked) conversion to int. */ + mode = (IRT_INT<<5)|IRT_NUM|IRCONV_INDEX; + bp = narrow_bpc_get(nc->J, (IRRef1)ref, mode); + if (bp) { + *nc->sp++ = NARROWINS(NARROW_SEXT, bp->val); + return 0; + } } if (++depth < NARROW_MAX_BACKPROP && nc->sp < nc->maxsp) { NarrowIns *savesp = nc->sp; int count = narrow_conv_backprop(nc, ir->op1, depth); count += narrow_conv_backprop(nc, ir->op2, depth); if (count <= nc->lim) { /* Limit total number of conversions. */ - IRType t = mode == IRTOINT_TRUNCI64 ? IRT_I64 : IRT_INT; - *nc->sp++ = NARROWINS(IRT(ir->o, t), ref); + *nc->sp++ = NARROWINS(IRT(ir->o, nc->t), ref); return count; } nc->sp = savesp; /* Too many conversions, need to backtrack. */ @@ -309,9 +325,12 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc) *sp++ = ref; } else if (op == NARROW_CONV) { *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */ + } else if (op == NARROW_SEXT) { + *sp++ = emitir(IRT(IR_CONV, IRT_I64), ref, + (IRT_I64<<5)|IRT_INT|IRCONV_SEXT); } else if (op == NARROW_INT) { lua_assert(next < last); - *sp++ = nc->mode == IRTOINT_TRUNCI64 ? + *sp++ = nc->t == IRT_I64 ? lj_ir_kint64(J, (int64_t)(int32_t)*next++) : lj_ir_kint(J, *next++); } else { /* Regular IROpT. Pops two operands and pushes one result. */ @@ -319,12 +338,12 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc) lua_assert(sp >= nc->stack+2); sp--; /* Omit some overflow checks for array indexing. See comments above. */ - if (mode == IRTOINT_INDEX) { + if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) { if (next == last && irref_isk(narrow_ref(sp[0])) && (uint32_t)IR(narrow_ref(sp[0]))->i + 0x40000000 < 0x80000000) guardot = 0; - else - mode = IRTOINT_CHECK; /* Otherwise cache a stronger check. */ + else /* Otherwise cache a stronger check. */ + mode += IRCONV_CHECK-IRCONV_INDEX; } sp[-1] = emitir(op+guardot, sp[-1], sp[0]); /* Add to cache. */ @@ -344,8 +363,9 @@ TRef LJ_FASTCALL lj_opt_narrow_convert(jit_State *J) nc.J = J; nc.sp = nc.stack; nc.maxsp = &nc.stack[NARROW_MAX_STACK-4]; + nc.t = irt_type(fins->t); if (fins->o == IR_TOBIT) { - nc.mode = IRTOINT_TOBIT; /* Used only in the backpropagation cache. */ + nc.mode = IRCONV_TOBIT; /* Used only in the backpropagation cache. */ nc.lim = 2; /* TOBIT can use a more optimistic rule. */ } else { nc.mode = fins->op2; @@ -401,7 +421,8 @@ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc) if (!tref_isinteger(rc)) { if (tref_isstr(rc)) rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0); - rc = emitir(IRTGI(IR_TOINT), rc, IRTOINT_CHECK); /* Guarded TOINT! */ + /* Guarded conversion to integer! */ + rc = emitir(IRTGI(IR_CONV), rc, IRCONV_INT_NUM|IRCONV_CHECK); } if (!tref_isk(rc)) { /* Range guard: -65536 <= i <= 65536 */ tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536-2147483647-1)); diff --git a/src/lj_record.c b/src/lj_record.c index ae47f236..ec03afe1 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -169,10 +169,10 @@ int lj_record_objcmp(jit_State *J, TRef a, TRef b, cTValue *av, cTValue *bv) if (ta != tb) { /* Widen mixed number/int comparisons to number/number comparison. */ if (ta == IRT_INT && tb == IRT_NUM) { - a = emitir(IRTN(IR_TONUM), a, 0); + a = emitir(IRTN(IR_CONV), a, IRCONV_NUM_INT); ta = IRT_NUM; } else if (ta == IRT_NUM && tb == IRT_INT) { - b = emitir(IRTN(IR_TONUM), b, 0); + b = emitir(IRTN(IR_CONV), b, IRCONV_NUM_INT); } else { return 2; /* Two different types are never equal. */ } @@ -199,7 +199,7 @@ static void canonicalize_slots(jit_State *J) if (tref_isinteger(tr)) { IRIns *ir = IR(tref_ref(tr)); if (!(ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_READONLY))) - J->slot[s] = emitir(IRTN(IR_TONUM), tr, 0); + J->slot[s] = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT); } } } @@ -869,7 +869,7 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix) if ((MSize)k < LJ_MAX_ASIZE && n == cast_num(k)) { TRef asizeref, ikey = key; if (!tref_isinteger(ikey)) - ikey = emitir(IRTGI(IR_TOINT), ikey, IRTOINT_INDEX); + ikey = emitir(IRTGI(IR_CONV), ikey, IRCONV_INT_NUM|IRCONV_INDEX); asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE); if ((MSize)k < t->asize) { /* Currently an array key? */ TRef arrayref; @@ -898,7 +898,7 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix) /* Otherwise the key is located in the hash part. */ if (tref_isinteger(key)) /* Hash keys are based on numbers, not ints. */ - ix->key = key = emitir(IRTN(IR_TONUM), key, 0); + ix->key = key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT); if (tref_isk(key)) { /* Optimize lookup of constant hash keys. */ MSize hslot = (MSize)((char *)ix->oldv - (char *)&noderef(t->node)[0].val); @@ -1024,7 +1024,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) if (oldv == niltvg(J2G(J))) { /* Need to insert a new key. */ TRef key = ix->key; if (tref_isinteger(key)) /* NEWREF needs a TValue as a key. */ - key = emitir(IRTN(IR_TONUM), key, 0); + key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT); xref = emitir(IRT(IR_NEWREF, IRT_P32), ix->tab, key); keybarrier = 0; /* NEWREF already takes care of the key barrier. */ } @@ -1046,7 +1046,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) keybarrier = 0; /* Previous non-nil value kept the key alive. */ } if (tref_isinteger(ix->val)) /* Convert int to number before storing. */ - ix->val = emitir(IRTN(IR_TONUM), ix->val, 0); + ix->val = emitir(IRTN(IR_CONV), ix->val, IRCONV_NUM_INT); emitir(IRT(loadop+IRDELTA_L2S, tref_type(ix->val)), xref, ix->val); if (keybarrier || tref_isgcv(ix->val)) emitir(IRT(IR_TBAR, IRT_NIL), ix->tab, 0); @@ -1100,7 +1100,7 @@ static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val) return res; } else { /* Upvalue store. */ if (tref_isinteger(val)) /* Convert int to number before storing. */ - val = emitir(IRTN(IR_TONUM), val, 0); + val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); emitir(IRT(IR_USTORE, tref_type(val)), uref, val); if (needbarrier && tref_isgcv(val)) emitir(IRT(IR_OBAR, IRT_NIL), uref, val); @@ -1254,7 +1254,7 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) ptrdiff_t idx = lj_ffrecord_select_mode(J, tridx, &J->L->base[dst-1]); if (idx < 0) goto nyivarg; if (idx != 0 && !tref_isinteger(tridx)) - tridx = emitir(IRTGI(IR_TOINT), tridx, IRTOINT_INDEX); + tridx = emitir(IRTGI(IR_CONV), tridx, IRCONV_INT_NUM|IRCONV_INDEX); if (idx != 0 && tref_isk(tridx)) { emitir(IRTGI(idx <= nvararg ? IR_GE : IR_LT), fr, lj_ir_kint(J, frofs+8*(int32_t)idx)); @@ -1418,10 +1418,10 @@ void lj_record_ins(jit_State *J) if (ta != tc) { /* Widen mixed number/int comparisons to number/number comparison. */ if (ta == IRT_INT && tc == IRT_NUM) { - ra = emitir(IRTN(IR_TONUM), ra, 0); + ra = emitir(IRTN(IR_CONV), ra, IRCONV_NUM_INT); ta = IRT_NUM; } else if (ta == IRT_NUM && tc == IRT_INT) { - rc = emitir(IRTN(IR_TONUM), rc, 0); + rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT); } else if (!((ta == IRT_FALSE || ta == IRT_TRUE) && (tc == IRT_FALSE || tc == IRT_TRUE))) { break; /* Interpreter will throw for two different types. */