Refactoring of conversion ops, part 4: use CONV instead of TOINT/TONUM.

Also narrow CONV.int.num and CONV.i64.num.
This commit is contained in:
Mike Pall 2010-12-31 03:56:30 +01:00
parent 1f26961092
commit 1716540c55
8 changed files with 89 additions and 61 deletions

View File

@ -165,7 +165,7 @@ static void crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp)
/* fallthrough */
case CCX(I, F):
if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi;
sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_TRUNC);
sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_TRUNC|IRCONV_ANY);
goto xstore;
case CCX(I, P):
case CCX(I, A):
@ -241,7 +241,7 @@ static void crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp)
if (st == IRT_CDATA) goto err_nyi;
/* The signed conversion is cheaper. x64 really has 47 bit pointers. */
sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32,
st, IRCONV_TRUNC);
st, IRCONV_TRUNC|IRCONV_ANY);
goto xstore;
/* Destination is an array. */
@ -401,11 +401,13 @@ void LJ_FASTCALL recff_cdata_index(jit_State *J, RecordFFData *rd)
if (tref_isnumber(idx)) {
/* The size of a ptrdiff_t is target-specific. */
#if LJ_64
idx = emitir(IRT(IR_TOI64, IRT_INTP), idx,
tref_isinteger(idx) ? IRTOINT_SEXT64 : IRTOINT_TRUNCI64);
if (tref_isnum(idx))
idx = emitconv(idx, IRT_I64, IRT_NUM, IRCONV_TRUNC|IRCONV_ANY);
else
idx = emitconv(idx, IRT_I64, IRT_INT, IRCONV_SEXT);
#else
if (!tref_isinteger(idx))
idx = emitir(IRT(IR_TOINT, IRT_INTP), idx, IRTOINT_ANY);
if (tref_isnum(idx))
idx = emitconv(idx, IRT_INT, IRT_NUM, IRCONV_TRUNC|IRCONV_ANY);
#endif
if (ctype_ispointer(ct->info)) {
CTSize sz = lj_ctype_size(cts, (sid = ctype_cid(ct->info)));

View File

@ -393,7 +393,7 @@ TRef LJ_FASTCALL lj_ir_tonum(jit_State *J, TRef tr)
{
if (!tref_isnum(tr)) {
if (tref_isinteger(tr))
tr = emitir(IRTN(IR_TONUM), tr, 0);
tr = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT);
else if (tref_isstr(tr))
tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
else
@ -434,7 +434,7 @@ TRef LJ_FASTCALL lj_ir_toint(jit_State *J, TRef tr)
tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
else if (!tref_isnum(tr))
lj_trace_err(J, LJ_TRERR_BADTYPE);
tr = emitir(IRTI(IR_TOINT), tr, IRTOINT_ANY);
tr = emitir(IRTI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_ANY);
}
return tr;
}

View File

@ -222,9 +222,12 @@ IRFLDEF(FLENUM)
/* CONV mode, stored in op2. */
#define IRCONV_SRCMASK 0x001f /* Source IRType. */
#define IRCONV_DSTMASK 0x03e0 /* Dest. IRType (also in ir->t). */
#define IRCONV_NUM_INT ((IRT_NUM<<5)|IRT_INT)
#define IRCONV_INT_NUM ((IRT_INT<<5)|IRT_NUM)
#define IRCONV_TRUNC 0x0400 /* Truncate number to integer. */
#define IRCONV_SEXT 0x0800 /* Sign-extend integer to integer. */
#define IRCONV_MODEMASK 0x0fff
#define IRCONV_CONVMASK 0xf000
#define IRCONV_CSH 12
/* Number to integer conversion mode. Ordered by strength of the checks. */
#define IRCONV_TOBIT (0<<IRCONV_CSH) /* None. Cache only: TOBIT conv. */
@ -232,6 +235,7 @@ IRFLDEF(FLENUM)
#define IRCONV_INDEX (2<<IRCONV_CSH) /* Check + special backprop rules. */
#define IRCONV_CHECK (3<<IRCONV_CSH) /* Number checked for integerness. */
/* C call info for CALL* instructions. */
typedef struct CCallInfo {
ASMFunction func; /* Function pointer. */

View File

@ -205,7 +205,7 @@ typedef struct HotPenalty {
typedef struct BPropEntry {
IRRef1 key; /* Key: original reference. */
IRRef1 val; /* Value: reference after conversion. */
IRRef mode; /* Mode for this entry (currently IRTOINT_*). */
IRRef mode; /* Mode for this entry (currently IRCONV_*). */
} BPropEntry;
/* Number of slots for the backpropagation cache. Must be a power of 2. */

View File

@ -796,9 +796,9 @@ LJFOLDF(simplify_powi_kx)
{
lua_Number n = knumleft;
if (n == 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */
fins->o = IR_TONUM;
fins->o = IR_CONV;
fins->op1 = fins->op2;
fins->op2 = 0;
fins->op2 = IRCONV_NUM_INT;
fins->op2 = (IRRef1)lj_opt_fold(J);
fins->op1 = (IRRef1)lj_ir_knum_one(J);
fins->o = IR_LDEXP;
@ -953,18 +953,19 @@ LJFOLDF(cse_conv)
}
/* FP conversion narrowing. */
LJFOLD(TOINT ADD any)
LJFOLD(TOINT SUB any)
LJFOLD(TOBIT ADD KNUM)
LJFOLD(TOBIT SUB KNUM)
LJFOLD(TOI64 ADD 5) /* IRTOINT_TRUNCI64 */
LJFOLD(TOI64 SUB 5) /* IRTOINT_TRUNCI64 */
LJFOLD(CONV ADD IRCONV_INT_NUM)
LJFOLD(CONV SUB IRCONV_INT_NUM)
LJFOLD(CONV ADD IRCONV_I64_NUM)
LJFOLD(CONV SUB IRCONV_I64_NUM)
LJFOLDF(narrow_convert)
{
PHIBARRIER(fleft);
/* Narrowing ignores PHIs and repeating it inside the loop is not useful. */
if (J->chain[IR_LOOP])
return NEXTFOLD;
lua_assert(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT);
return lj_opt_narrow_convert(J);
}

View File

@ -301,7 +301,7 @@ static void loop_unroll(jit_State *J)
/* Check all loop-carried dependencies for type instability. */
if (!irt_sametype(t, irr->t)) {
if (irt_isnum(t) && irt_isinteger(irr->t)) /* Fix int->num case. */
subst[ins] = tref_ref(emitir(IRTN(IR_TONUM), ref, 0));
subst[ins] = tref_ref(emitir(IRTN(IR_CONV), ref, IRCONV_NUM_INT));
else if (!(irt_isinteger(t) && irt_isinteger(irr->t)))
lj_trace_err(J, LJ_TRERR_TYPEINS);
}

View File

@ -89,16 +89,17 @@
/* -- Elimination of narrowing type conversions --------------------------- */
/* Narrowing of index expressions and bit operations is demand-driven. The
** trace recorder emits a narrowing type conversion (TOINT or TOBIT) in
** all of these cases (e.g. array indexing or string indexing). FOLD
** trace recorder emits a narrowing type conversion (CONV.int.num or TOBIT)
** in all of these cases (e.g. array indexing or string indexing). FOLD
** already takes care of eliminating simple redundant conversions like
** TOINT(TONUM(x)) ==> x.
** CONV.int.num(CONV.num.int(x)) ==> x.
**
** But the surrounding code is FP-heavy and all arithmetic operations are
** performed on FP numbers. Consider a common example such as 'x=t[i+1]',
** with 'i' already an integer (due to induction variable narrowing). The
** index expression would be recorded as TOINT(ADD(TONUM(i), 1)), which is
** clearly suboptimal.
** index expression would be recorded as
** CONV.int.num(ADD(CONV.num.int(i), 1))
** which is clearly suboptimal.
**
** One can do better by recursively backpropagating the narrowing type
** conversion across FP arithmetic operations. This turns FP ops into
@ -106,9 +107,10 @@
** the conversion they also need to check for overflow. Currently only ADD
** and SUB are supported.
**
** The above example can be rewritten as ADDOV(TOINT(TONUM(i)), 1) and
** then into ADDOV(i, 1) after folding of the conversions. The original FP
** ops remain in the IR and are eliminated by DCE since all references to
** The above example can be rewritten as
** ADDOV(CONV.int.num(CONV.num.int(i)), 1)
** and then into ADDOV(i, 1) after folding of the conversions. The original
** FP ops remain in the IR and are eliminated by DCE since all references to
** them are gone.
**
** Special care has to be taken to avoid narrowing across an operation
@ -173,6 +175,7 @@
enum {
NARROW_REF, /* Push ref. */
NARROW_CONV, /* Push conversion of ref. */
NARROW_SEXT, /* Push sign-extension of ref. */
NARROW_INT /* Push KINT ref. The next code holds an int32_t. */
};
@ -188,7 +191,8 @@ typedef struct NarrowConv {
NarrowIns *sp; /* Current stack pointer. */
NarrowIns *maxsp; /* Maximum stack pointer minus redzone. */
int lim; /* Limit on the number of emitted conversions. */
IRRef mode; /* Conversion mode (IRTOINT_*). */
IRRef mode; /* Conversion mode (IRCONV_*). */
IRType t; /* Destination type: IRT_INT or IRT_I64. */
NarrowIns stack[NARROW_MAX_STACK]; /* Stack holding stack-machine code. */
} NarrowConv;
@ -198,7 +202,9 @@ static BPropEntry *narrow_bpc_get(jit_State *J, IRRef1 key, IRRef mode)
ptrdiff_t i;
for (i = 0; i < BPROP_SLOTS; i++) {
BPropEntry *bp = &J->bpropcache[i];
if (bp->key == key && bp->mode <= mode) /* Stronger checks are ok, too. */
/* Stronger checks are ok, too. */
if (bp->key == key && bp->mode >= mode &&
((bp->mode ^ mode) & IRCONV_MODEMASK) == 0)
return bp;
}
return NULL;
@ -223,16 +229,16 @@ static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth)
IRRef cref;
/* Check the easy cases first. */
if (ir->o == IR_TONUM) { /* Undo inverse conversion. */
*nc->sp++ = NARROWINS(NARROW_REF, ir->op1);
if (nc->mode == IRTOINT_TRUNCI64) {
*nc->sp++ = NARROWINS(NARROW_REF, IRTOINT_SEXT64);
*nc->sp++ = NARROWINS(IRT(IR_TOI64, IRT_I64), 0);
}
if (ir->o == IR_CONV && (ir->op2 & IRCONV_SRCMASK) == IRT_INT) {
if (nc->t == IRT_I64)
*nc->sp++ = NARROWINS(NARROW_SEXT, ir->op1); /* Reduce to sign-ext. */
else
*nc->sp++ = NARROWINS(NARROW_REF, ir->op1); /* Undo conversion. */
return 0;
} else if (ir->o == IR_KNUM) { /* Narrow FP constant. */
lua_Number n = ir_knum(ir)->n;
if (nc->mode == IRTOINT_TOBIT) { /* Allows a wider range of constants. */
if ((nc->mode & IRCONV_CONVMASK) == IRCONV_TOBIT) {
/* Allows a wider range of constants. */
int64_t k64 = (int64_t)n;
if (n == cast_num(k64)) { /* Only if constant doesn't lose precision. */
*nc->sp++ = NARROWINS(NARROW_INT, 0);
@ -251,36 +257,46 @@ static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth)
}
/* Try to CSE the conversion. Stronger checks are ok, too. */
for (cref = J->chain[fins->o]; cref > ref; cref = IR(cref)->prev)
if (IR(cref)->op1 == ref &&
irt_isguard(IR(cref)->t) >= irt_isguard(fins->t)) {
cref = J->chain[fins->o];
while (cref > ref) {
IRIns *cr = IR(cref);
if (cr->op1 == ref &&
(fins->o == IR_TOBIT ||
((cr->op2 & IRCONV_MODEMASK) == (nc->mode & IRCONV_MODEMASK) &&
irt_isguard(cr->t) >= irt_isguard(fins->t)))) {
*nc->sp++ = NARROWINS(NARROW_REF, cref);
return 0; /* Already there, no additional conversion needed. */
}
cref = cr->prev;
}
/* Backpropagate across ADD/SUB. */
if (ir->o == IR_ADD || ir->o == IR_SUB) {
/* Try cache lookup first. */
IRRef mode = nc->mode;
BPropEntry *bp;
if (mode == IRTOINT_INDEX && depth > 0)
mode = IRTOINT_CHECK; /* Inner conversions need a stronger check. */
/* Inner conversions need a stronger check. */
if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX && depth > 0)
mode += IRCONV_CHECK-IRCONV_INDEX;
bp = narrow_bpc_get(nc->J, (IRRef1)ref, mode);
if (bp) {
*nc->sp++ = NARROWINS(NARROW_REF, bp->val);
if (mode == IRTOINT_TRUNCI64 && mode != bp->mode) {
*nc->sp++ = NARROWINS(NARROW_REF, IRTOINT_SEXT64);
*nc->sp++ = NARROWINS(IRT(IR_TOI64, IRT_I64), 0);
}
return 0;
} else if (nc->t == IRT_I64) {
/* Try sign-extending from an existing (checked) conversion to int. */
mode = (IRT_INT<<5)|IRT_NUM|IRCONV_INDEX;
bp = narrow_bpc_get(nc->J, (IRRef1)ref, mode);
if (bp) {
*nc->sp++ = NARROWINS(NARROW_SEXT, bp->val);
return 0;
}
}
if (++depth < NARROW_MAX_BACKPROP && nc->sp < nc->maxsp) {
NarrowIns *savesp = nc->sp;
int count = narrow_conv_backprop(nc, ir->op1, depth);
count += narrow_conv_backprop(nc, ir->op2, depth);
if (count <= nc->lim) { /* Limit total number of conversions. */
IRType t = mode == IRTOINT_TRUNCI64 ? IRT_I64 : IRT_INT;
*nc->sp++ = NARROWINS(IRT(ir->o, t), ref);
*nc->sp++ = NARROWINS(IRT(ir->o, nc->t), ref);
return count;
}
nc->sp = savesp; /* Too many conversions, need to backtrack. */
@ -309,9 +325,12 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc)
*sp++ = ref;
} else if (op == NARROW_CONV) {
*sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */
} else if (op == NARROW_SEXT) {
*sp++ = emitir(IRT(IR_CONV, IRT_I64), ref,
(IRT_I64<<5)|IRT_INT|IRCONV_SEXT);
} else if (op == NARROW_INT) {
lua_assert(next < last);
*sp++ = nc->mode == IRTOINT_TRUNCI64 ?
*sp++ = nc->t == IRT_I64 ?
lj_ir_kint64(J, (int64_t)(int32_t)*next++) :
lj_ir_kint(J, *next++);
} else { /* Regular IROpT. Pops two operands and pushes one result. */
@ -319,12 +338,12 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc)
lua_assert(sp >= nc->stack+2);
sp--;
/* Omit some overflow checks for array indexing. See comments above. */
if (mode == IRTOINT_INDEX) {
if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) {
if (next == last && irref_isk(narrow_ref(sp[0])) &&
(uint32_t)IR(narrow_ref(sp[0]))->i + 0x40000000 < 0x80000000)
guardot = 0;
else
mode = IRTOINT_CHECK; /* Otherwise cache a stronger check. */
else /* Otherwise cache a stronger check. */
mode += IRCONV_CHECK-IRCONV_INDEX;
}
sp[-1] = emitir(op+guardot, sp[-1], sp[0]);
/* Add to cache. */
@ -344,8 +363,9 @@ TRef LJ_FASTCALL lj_opt_narrow_convert(jit_State *J)
nc.J = J;
nc.sp = nc.stack;
nc.maxsp = &nc.stack[NARROW_MAX_STACK-4];
nc.t = irt_type(fins->t);
if (fins->o == IR_TOBIT) {
nc.mode = IRTOINT_TOBIT; /* Used only in the backpropagation cache. */
nc.mode = IRCONV_TOBIT; /* Used only in the backpropagation cache. */
nc.lim = 2; /* TOBIT can use a more optimistic rule. */
} else {
nc.mode = fins->op2;
@ -401,7 +421,8 @@ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc)
if (!tref_isinteger(rc)) {
if (tref_isstr(rc))
rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0);
rc = emitir(IRTGI(IR_TOINT), rc, IRTOINT_CHECK); /* Guarded TOINT! */
/* Guarded conversion to integer! */
rc = emitir(IRTGI(IR_CONV), rc, IRCONV_INT_NUM|IRCONV_CHECK);
}
if (!tref_isk(rc)) { /* Range guard: -65536 <= i <= 65536 */
tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536-2147483647-1));

View File

@ -169,10 +169,10 @@ int lj_record_objcmp(jit_State *J, TRef a, TRef b, cTValue *av, cTValue *bv)
if (ta != tb) {
/* Widen mixed number/int comparisons to number/number comparison. */
if (ta == IRT_INT && tb == IRT_NUM) {
a = emitir(IRTN(IR_TONUM), a, 0);
a = emitir(IRTN(IR_CONV), a, IRCONV_NUM_INT);
ta = IRT_NUM;
} else if (ta == IRT_NUM && tb == IRT_INT) {
b = emitir(IRTN(IR_TONUM), b, 0);
b = emitir(IRTN(IR_CONV), b, IRCONV_NUM_INT);
} else {
return 2; /* Two different types are never equal. */
}
@ -199,7 +199,7 @@ static void canonicalize_slots(jit_State *J)
if (tref_isinteger(tr)) {
IRIns *ir = IR(tref_ref(tr));
if (!(ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_READONLY)))
J->slot[s] = emitir(IRTN(IR_TONUM), tr, 0);
J->slot[s] = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT);
}
}
}
@ -869,7 +869,7 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix)
if ((MSize)k < LJ_MAX_ASIZE && n == cast_num(k)) {
TRef asizeref, ikey = key;
if (!tref_isinteger(ikey))
ikey = emitir(IRTGI(IR_TOINT), ikey, IRTOINT_INDEX);
ikey = emitir(IRTGI(IR_CONV), ikey, IRCONV_INT_NUM|IRCONV_INDEX);
asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE);
if ((MSize)k < t->asize) { /* Currently an array key? */
TRef arrayref;
@ -898,7 +898,7 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix)
/* Otherwise the key is located in the hash part. */
if (tref_isinteger(key)) /* Hash keys are based on numbers, not ints. */
ix->key = key = emitir(IRTN(IR_TONUM), key, 0);
ix->key = key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT);
if (tref_isk(key)) {
/* Optimize lookup of constant hash keys. */
MSize hslot = (MSize)((char *)ix->oldv - (char *)&noderef(t->node)[0].val);
@ -1024,7 +1024,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
if (oldv == niltvg(J2G(J))) { /* Need to insert a new key. */
TRef key = ix->key;
if (tref_isinteger(key)) /* NEWREF needs a TValue as a key. */
key = emitir(IRTN(IR_TONUM), key, 0);
key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT);
xref = emitir(IRT(IR_NEWREF, IRT_P32), ix->tab, key);
keybarrier = 0; /* NEWREF already takes care of the key barrier. */
}
@ -1046,7 +1046,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
keybarrier = 0; /* Previous non-nil value kept the key alive. */
}
if (tref_isinteger(ix->val)) /* Convert int to number before storing. */
ix->val = emitir(IRTN(IR_TONUM), ix->val, 0);
ix->val = emitir(IRTN(IR_CONV), ix->val, IRCONV_NUM_INT);
emitir(IRT(loadop+IRDELTA_L2S, tref_type(ix->val)), xref, ix->val);
if (keybarrier || tref_isgcv(ix->val))
emitir(IRT(IR_TBAR, IRT_NIL), ix->tab, 0);
@ -1100,7 +1100,7 @@ static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val)
return res;
} else { /* Upvalue store. */
if (tref_isinteger(val)) /* Convert int to number before storing. */
val = emitir(IRTN(IR_TONUM), val, 0);
val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT);
emitir(IRT(IR_USTORE, tref_type(val)), uref, val);
if (needbarrier && tref_isgcv(val))
emitir(IRT(IR_OBAR, IRT_NIL), uref, val);
@ -1254,7 +1254,7 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
ptrdiff_t idx = lj_ffrecord_select_mode(J, tridx, &J->L->base[dst-1]);
if (idx < 0) goto nyivarg;
if (idx != 0 && !tref_isinteger(tridx))
tridx = emitir(IRTGI(IR_TOINT), tridx, IRTOINT_INDEX);
tridx = emitir(IRTGI(IR_CONV), tridx, IRCONV_INT_NUM|IRCONV_INDEX);
if (idx != 0 && tref_isk(tridx)) {
emitir(IRTGI(idx <= nvararg ? IR_GE : IR_LT),
fr, lj_ir_kint(J, frofs+8*(int32_t)idx));
@ -1418,10 +1418,10 @@ void lj_record_ins(jit_State *J)
if (ta != tc) {
/* Widen mixed number/int comparisons to number/number comparison. */
if (ta == IRT_INT && tc == IRT_NUM) {
ra = emitir(IRTN(IR_TONUM), ra, 0);
ra = emitir(IRTN(IR_CONV), ra, IRCONV_NUM_INT);
ta = IRT_NUM;
} else if (ta == IRT_NUM && tc == IRT_INT) {
rc = emitir(IRTN(IR_TONUM), rc, 0);
rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT);
} else if (!((ta == IRT_FALSE || ta == IRT_TRUE) &&
(tc == IRT_FALSE || tc == IRT_TRUE))) {
break; /* Interpreter will throw for two different types. */