DUALNUM: Handle integer type in JIT compiler.

This commit is contained in:
Mike Pall 2011-03-10 01:57:24 +01:00
parent 3f26e3a89d
commit bfce3c1127
16 changed files with 486 additions and 278 deletions

View File

@ -128,15 +128,16 @@ lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h
lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \
lj_arch.h lj_str.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ lj_arch.h lj_str.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \
lj_dispatch.h lj_traceerr.h lj_dispatch.h lj_traceerr.h lj_vm.h
lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \ lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \
lj_arch.h lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h \
lj_vm.h
lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_state.h \ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_state.h \
lj_bc.h lj_ctype.h lj_lex.h lj_parse.h lj_vm.h lj_vmevent.h lj_bc.h lj_ctype.h lj_lex.h lj_parse.h lj_vm.h lj_vmevent.h
lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \ lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \
lj_ffdef.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \
lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h
lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \

View File

@ -2059,7 +2059,7 @@ static void asm_href(ASMState *as, IRIns *ir)
} else { } else {
emit_sjcc(as, CC_P, l_next); emit_sjcc(as, CC_P, l_next);
emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n)); emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n));
emit_sjcc(as, CC_A, l_next); emit_sjcc(as, CC_AE, l_next);
/* The type check avoids NaN penalties and complaints from Valgrind. */ /* The type check avoids NaN penalties and complaints from Valgrind. */
#if LJ_64 #if LJ_64
emit_u32(as, LJ_TISNUM); emit_u32(as, LJ_TISNUM);
@ -2388,7 +2388,8 @@ static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
static void asm_ahuvload(ASMState *as, IRIns *ir) static void asm_ahuvload(ASMState *as, IRIns *ir)
{ {
lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t)); lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
(LJ_DUALNUM && irt_isint(ir->t)));
#if LJ_64 #if LJ_64
if (irt_islightud(ir->t)) { if (irt_islightud(ir->t)) {
Reg dest = asm_load_lightud64(as, ir, 1); Reg dest = asm_load_lightud64(as, ir, 1);
@ -2409,8 +2410,9 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
} }
/* Always do the type check, even if the load result is unused. */ /* Always do the type check, even if the load result is unused. */
as->mrm.ofs += 4; as->mrm.ofs += 4;
asm_guardcc(as, irt_isnum(ir->t) ? CC_A : CC_NE); asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE);
if (LJ_64 && irt_isnum(ir->t)) { if (LJ_64 && irt_type(ir->t) >= IRT_NUM) {
lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t));
emit_u32(as, LJ_TISNUM); emit_u32(as, LJ_TISNUM);
emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM); emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM);
} else { } else {
@ -2443,7 +2445,7 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
if (ra_hasreg(src)) { if (ra_hasreg(src)) {
emit_mrm(as, XO_MOVto, src, RID_MRM); emit_mrm(as, XO_MOVto, src, RID_MRM);
} else if (!irt_ispri(irr->t)) { } else if (!irt_ispri(irr->t)) {
lua_assert(irt_isaddr(ir->t)); lua_assert(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t)));
emit_i32(as, irr->i); emit_i32(as, irr->i);
emit_mrm(as, XO_MOVmi, 0, RID_MRM); emit_mrm(as, XO_MOVmi, 0, RID_MRM);
} }
@ -2460,8 +2462,9 @@ static void asm_sload(ASMState *as, IRIns *ir)
Reg base; Reg base;
lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK));
lua_assert(!irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); lua_assert(LJ_DUALNUM ||
if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t)) { !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)));
if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
Reg left = ra_scratch(as, RSET_FPR); Reg left = ra_scratch(as, RSET_FPR);
asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */
base = ra_alloc1(as, REF_BASE, RSET_GPR); base = ra_alloc1(as, REF_BASE, RSET_GPR);
@ -2481,12 +2484,14 @@ static void asm_sload(ASMState *as, IRIns *ir)
Reg dest = ra_dest(as, ir, allow); Reg dest = ra_dest(as, ir, allow);
base = ra_alloc1(as, REF_BASE, RSET_GPR); base = ra_alloc1(as, REF_BASE, RSET_GPR);
lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
if ((ir->op2 & IRSLOAD_CONVERT)) if ((ir->op2 & IRSLOAD_CONVERT)) {
emit_rmro(as, XO_CVTSD2SI, dest, base, ofs); t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */
else if (irt_isnum(t)) emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTSD2SI, dest, base, ofs);
} else if (irt_isnum(t)) {
emit_rmro(as, XMM_MOVRM(as), dest, base, ofs); emit_rmro(as, XMM_MOVRM(as), dest, base, ofs);
else } else {
emit_rmro(as, XO_MOV, dest, base, ofs); emit_rmro(as, XO_MOV, dest, base, ofs);
}
} else { } else {
if (!(ir->op2 & IRSLOAD_TYPECHECK)) if (!(ir->op2 & IRSLOAD_TYPECHECK))
return; /* No type check: avoid base alloc. */ return; /* No type check: avoid base alloc. */
@ -2494,8 +2499,9 @@ static void asm_sload(ASMState *as, IRIns *ir)
} }
if ((ir->op2 & IRSLOAD_TYPECHECK)) { if ((ir->op2 & IRSLOAD_TYPECHECK)) {
/* Need type check, even if the load result is unused. */ /* Need type check, even if the load result is unused. */
asm_guardcc(as, irt_isnum(t) ? CC_A : CC_NE); asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE);
if (LJ_64 && irt_isnum(t)) { if (LJ_64 && irt_type(t) >= IRT_NUM) {
lua_assert(irt_isinteger(t) || irt_isnum(t));
emit_u32(as, LJ_TISNUM); emit_u32(as, LJ_TISNUM);
emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4); emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4);
} else { } else {
@ -3408,7 +3414,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
Reg src = ra_alloc1(as, ref, RSET_FPR); Reg src = ra_alloc1(as, ref, RSET_FPR);
emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs); emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs);
} else { } else {
lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t)); lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) ||
(LJ_DUALNUM && irt_isinteger(ir->t)));
if (!irref_isk(ref)) { if (!irref_isk(ref)) {
Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs); emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs);

View File

@ -185,6 +185,8 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
(sinfo & CTF_UNSIGNED) ? 0 : IRCONV_SEXT); (sinfo & CTF_UNSIGNED) ? 0 : IRCONV_SEXT);
else if (dsize < 8 && ssize == 8) /* Truncate from 64 bit integer. */ else if (dsize < 8 && ssize == 8) /* Truncate from 64 bit integer. */
sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, 0); sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, 0);
else if (ssize <= 4)
sp = lj_opt_narrow_toint(J, sp);
xstore: xstore:
if (dt == IRT_I64 || dt == IRT_U64) lj_needsplit(J); if (dt == IRT_I64 || dt == IRT_U64) lj_needsplit(J);
if (dp == 0) return sp; if (dp == 0) return sp;
@ -355,10 +357,10 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, TValue *sval)
CType *s; CType *s;
if (LJ_LIKELY(tref_isinteger(sp))) { if (LJ_LIKELY(tref_isinteger(sp))) {
sid = CTID_INT32; sid = CTID_INT32;
svisnz = (void *)(intptr_t)(numV(sval) != 0); svisnz = (void *)(intptr_t)(tvisint(sval)?(intV(sval)!=0):!tviszero(sval));
} else if (tref_isnum(sp)) { } else if (tref_isnum(sp)) {
sid = CTID_DOUBLE; sid = CTID_DOUBLE;
svisnz = (void *)(intptr_t)(numV(sval) != 0); svisnz = (void *)(intptr_t)(tvisint(sval)?(intV(sval)!=0):!tviszero(sval));
} else if (tref_isbool(sp)) { } else if (tref_isbool(sp)) {
sp = lj_ir_kint(J, tref_istrue(sp) ? 1 : 0); sp = lj_ir_kint(J, tref_istrue(sp) ? 1 : 0);
sid = CTID_BOOL; sid = CTID_BOOL;
@ -443,16 +445,16 @@ static CTypeID crec_constructor(jit_State *J, GCcdata *cd, TRef tr)
static TRef crec_reassoc_ofs(jit_State *J, TRef tr, ptrdiff_t *ofsp, MSize sz) static TRef crec_reassoc_ofs(jit_State *J, TRef tr, ptrdiff_t *ofsp, MSize sz)
{ {
IRIns *ir = IR(tref_ref(tr)); IRIns *ir = IR(tref_ref(tr));
if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && irref_isk(ir->op2) &&
ir->o == IR_ADD && irref_isk(ir->op2)) { (ir->o == IR_ADD || ir->o == IR_ADDOV || ir->o == IR_SUBOV)) {
IRIns *irk = IR(ir->op2); IRIns *irk = IR(ir->op2);
tr = ir->op1; ptrdiff_t k;
#if LJ_64 if (LJ_64 && irk->o == IR_KINT64)
if (irk->o == IR_KINT64) k = (ptrdiff_t)ir_kint64(irk)->u64 * sz;
*ofsp += (ptrdiff_t)ir_kint64(irk)->u64 * sz;
else else
#endif k = (ptrdiff_t)irk->i * sz;
*ofsp += (ptrdiff_t)irk->i * sz; if (ir->o == IR_SUBOV) *ofsp -= k; else *ofsp += k;
tr = ir->op1; /* Not a TRef, but the caller doesn't care. */
} }
return tr; return tr;
} }
@ -477,16 +479,7 @@ void LJ_FASTCALL recff_cdata_index(jit_State *J, RecordFFData *rd)
idx = J->base[1]; idx = J->base[1];
if (tref_isnumber(idx)) { if (tref_isnumber(idx)) {
/* The size of a ptrdiff_t is target-specific. */ idx = lj_opt_narrow_cindex(J, idx);
#if LJ_64
if (tref_isnum(idx))
idx = emitconv(idx, IRT_I64, IRT_NUM, IRCONV_TRUNC|IRCONV_ANY);
else
idx = emitconv(idx, IRT_I64, IRT_INT, IRCONV_SEXT);
#else
if (tref_isnum(idx))
idx = emitconv(idx, IRT_INT, IRT_NUM, IRCONV_TRUNC|IRCONV_ANY);
#endif
integer_key: integer_key:
if (ctype_ispointer(ct->info)) { if (ctype_ispointer(ct->info)) {
CTSize sz; CTSize sz;
@ -635,7 +628,7 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
TRef sp, dp; TRef sp, dp;
TValue tv; TValue tv;
TValue *sval = &tv; TValue *sval = &tv;
setnumV(&tv, 0); setintV(&tv, 0);
if (!gcref(df->name)) continue; /* Ignore unnamed fields. */ if (!gcref(df->name)) continue; /* Ignore unnamed fields. */
dc = ctype_rawchild(cts, df); /* Field type. */ dc = ctype_rawchild(cts, df); /* Field type. */
if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info))) if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info)))

View File

@ -63,9 +63,9 @@ typedef void (LJ_FASTCALL *RecordFunc)(jit_State *J, RecordFFData *rd);
/* Get runtime value of int argument. */ /* Get runtime value of int argument. */
static int32_t argv2int(jit_State *J, TValue *o) static int32_t argv2int(jit_State *J, TValue *o)
{ {
if (!tvisnum(o) && !(tvisstr(o) && lj_str_tonum(strV(o), o))) if (!tvisnumber(o) && !(tvisstr(o) && lj_str_tonumber(strV(o), o)))
lj_trace_err(J, LJ_TRERR_BADTYPE); lj_trace_err(J, LJ_TRERR_BADTYPE);
return lj_num2bit(numV(o)); return tvisint(o) ? intV(o) : lj_num2int(numV(o));
} }
/* Get runtime value of string argument. */ /* Get runtime value of string argument. */
@ -75,8 +75,11 @@ static GCstr *argv2str(jit_State *J, TValue *o)
return strV(o); return strV(o);
} else { } else {
GCstr *s; GCstr *s;
if (!tvisnum(o)) if (!tvisnumber(o))
lj_trace_err(J, LJ_TRERR_BADTYPE); lj_trace_err(J, LJ_TRERR_BADTYPE);
if (tvisint(o))
s = lj_str_fromint(J->L, intV(o));
else
s = lj_str_fromnum(J->L, &o->n); s = lj_str_fromnum(J->L, &o->n);
setstrV(J->L, o, s); setstrV(J->L, o, s);
return s; return s;
@ -128,7 +131,7 @@ static void LJ_FASTCALL recff_type(jit_State *J, RecordFFData *rd)
{ {
/* Arguments already specialized. Result is a constant string. Neat, huh? */ /* Arguments already specialized. Result is a constant string. Neat, huh? */
uint32_t t; uint32_t t;
if (tvisnum(&rd->argv[0])) if (tvisnumber(&rd->argv[0]))
t = ~LJ_TNUMX; t = ~LJ_TNUMX;
else if (LJ_64 && tvislightud(&rd->argv[0])) else if (LJ_64 && tvislightud(&rd->argv[0]))
t = ~LJ_TLIGHTUD; t = ~LJ_TLIGHTUD;
@ -255,7 +258,7 @@ static void LJ_FASTCALL recff_tonumber(jit_State *J, RecordFFData *rd)
TRef tr = J->base[0]; TRef tr = J->base[0];
TRef base = J->base[1]; TRef base = J->base[1];
if (tr && base) { if (tr && base) {
base = lj_ir_toint(J, base); base = lj_opt_narrow_toint(J, base);
if (!tref_isk(base) || IR(tref_ref(base))->i != 10) if (!tref_isk(base) || IR(tref_ref(base))->i != 10)
recff_nyiu(J); recff_nyiu(J);
} }
@ -332,12 +335,12 @@ static void LJ_FASTCALL recff_ipairs_aux(jit_State *J, RecordFFData *rd)
RecordIndex ix; RecordIndex ix;
ix.tab = J->base[0]; ix.tab = J->base[0];
if (tref_istab(ix.tab)) { if (tref_istab(ix.tab)) {
if (!tvisnum(&rd->argv[1])) /* No support for string coercion. */ if (!tvisnumber(&rd->argv[1])) /* No support for string coercion. */
lj_trace_err(J, LJ_TRERR_BADTYPE); lj_trace_err(J, LJ_TRERR_BADTYPE);
setnumV(&ix.keyv, numV(&rd->argv[1])+(lua_Number)1); setintV(&ix.keyv, numberVint(&rd->argv[1])+1);
settabV(J->L, &ix.tabv, tabV(&rd->argv[0])); settabV(J->L, &ix.tabv, tabV(&rd->argv[0]));
ix.val = 0; ix.idxchain = 0; ix.val = 0; ix.idxchain = 0;
ix.key = lj_ir_toint(J, J->base[1]); ix.key = lj_opt_narrow_toint(J, J->base[1]);
J->base[0] = ix.key = emitir(IRTI(IR_ADD), ix.key, lj_ir_kint(J, 1)); J->base[0] = ix.key = emitir(IRTI(IR_ADD), ix.key, lj_ir_kint(J, 1));
J->base[1] = lj_record_idx(J, &ix); J->base[1] = lj_record_idx(J, &ix);
rd->nres = tref_isnil(J->base[1]) ? 0 : 2; rd->nres = tref_isnil(J->base[1]) ? 0 : 2;
@ -525,26 +528,26 @@ static void LJ_FASTCALL recff_math_random(jit_State *J, RecordFFData *rd)
/* Record unary bit.tobit, bit.bnot, bit.bswap. */ /* Record unary bit.tobit, bit.bnot, bit.bswap. */
static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd) static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd)
{ {
TRef tr = lj_ir_tobit(J, J->base[0]); TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
J->base[0] = (rd->data == IR_TOBIT) ? tr : emitir(IRTI(rd->data), tr, 0); J->base[0] = (rd->data == IR_TOBIT) ? tr : emitir(IRTI(rd->data), tr, 0);
} }
/* Record N-ary bit.band, bit.bor, bit.bxor. */ /* Record N-ary bit.band, bit.bor, bit.bxor. */
static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd) static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd)
{ {
TRef tr = lj_ir_tobit(J, J->base[0]); TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
uint32_t op = rd->data; uint32_t op = rd->data;
BCReg i; BCReg i;
for (i = 1; J->base[i] != 0; i++) for (i = 1; J->base[i] != 0; i++)
tr = emitir(IRTI(op), tr, lj_ir_tobit(J, J->base[i])); tr = emitir(IRTI(op), tr, lj_opt_narrow_tobit(J, J->base[i]));
J->base[0] = tr; J->base[0] = tr;
} }
/* Record bit shifts. */ /* Record bit shifts. */
static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd) static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd)
{ {
TRef tr = lj_ir_tobit(J, J->base[0]); TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
TRef tsh = lj_ir_tobit(J, J->base[1]); TRef tsh = lj_opt_narrow_tobit(J, J->base[1]);
if (!(rd->data < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && if (!(rd->data < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
!tref_isk(tsh)) !tref_isk(tsh))
tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31)); tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31));
@ -570,25 +573,25 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
int32_t start, end; int32_t start, end;
if (rd->data) { /* string.sub(str, start [,end]) */ if (rd->data) { /* string.sub(str, start [,end]) */
start = argv2int(J, &rd->argv[1]); start = argv2int(J, &rd->argv[1]);
trstart = lj_ir_toint(J, J->base[1]); trstart = lj_opt_narrow_toint(J, J->base[1]);
trend = J->base[2]; trend = J->base[2];
if (tref_isnil(trend)) { if (tref_isnil(trend)) {
trend = lj_ir_kint(J, -1); trend = lj_ir_kint(J, -1);
end = -1; end = -1;
} else { } else {
trend = lj_ir_toint(J, trend); trend = lj_opt_narrow_toint(J, trend);
end = argv2int(J, &rd->argv[2]); end = argv2int(J, &rd->argv[2]);
} }
} else { /* string.byte(str, [,start [,end]]) */ } else { /* string.byte(str, [,start [,end]]) */
if (J->base[1]) { if (J->base[1]) {
start = argv2int(J, &rd->argv[1]); start = argv2int(J, &rd->argv[1]);
trstart = lj_ir_toint(J, J->base[1]); trstart = lj_opt_narrow_toint(J, J->base[1]);
trend = J->base[2]; trend = J->base[2];
if (tref_isnil(trend)) { if (tref_isnil(trend)) {
trend = trstart; trend = trstart;
end = start; end = start;
} else { } else {
trend = lj_ir_toint(J, trend); trend = lj_opt_narrow_toint(J, trend);
end = argv2int(J, &rd->argv[2]); end = argv2int(J, &rd->argv[2]);
} }
} else { } else {

View File

@ -426,32 +426,6 @@ TRef LJ_FASTCALL lj_ir_tostr(jit_State *J, TRef tr)
return tr; return tr;
} }
/* Convert from number or string to bitop operand (overflow wrapped). */
TRef LJ_FASTCALL lj_ir_tobit(jit_State *J, TRef tr)
{
if (!tref_isinteger(tr)) {
if (tref_isstr(tr))
tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
else if (!tref_isnum(tr))
lj_trace_err(J, LJ_TRERR_BADTYPE);
tr = emitir(IRTI(IR_TOBIT), tr, lj_ir_knum_tobit(J));
}
return tr;
}
/* Convert from number or string to integer (overflow undefined). */
TRef LJ_FASTCALL lj_ir_toint(jit_State *J, TRef tr)
{
if (!tref_isinteger(tr)) {
if (tref_isstr(tr))
tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
else if (!tref_isnum(tr))
lj_trace_err(J, LJ_TRERR_BADTYPE);
tr = emitir(IRTI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_ANY);
}
return tr;
}
/* -- Miscellaneous IR ops ------------------------------------------------ */ /* -- Miscellaneous IR ops ------------------------------------------------ */
/* Evaluate numeric comparison. */ /* Evaluate numeric comparison. */

View File

@ -124,7 +124,7 @@
_(XBAR, S , ___, ___) \ _(XBAR, S , ___, ___) \
\ \
/* Type conversions. */ \ /* Type conversions. */ \
_(CONV, N , ref, lit) \ _(CONV, NW, ref, lit) \
_(TOBIT, N , ref, ref) \ _(TOBIT, N , ref, ref) \
_(TOSTR, N , ref, ___) \ _(TOSTR, N , ref, ___) \
_(STRTO, N , ref, ___) \ _(STRTO, N , ref, ___) \
@ -345,8 +345,8 @@ typedef enum {
#define IRM_AW (IRM_A|IRM_W) #define IRM_AW (IRM_A|IRM_W)
#define IRM_LW (IRM_L|IRM_W) #define IRM_LW (IRM_L|IRM_W)
#define irm_op1(m) (cast(IRMode, (m)&3)) #define irm_op1(m) ((IRMode)((m)&3))
#define irm_op2(m) (cast(IRMode, ((m)>>2)&3)) #define irm_op2(m) ((IRMode)(((m)>>2)&3))
#define irm_iscomm(m) ((m) & IRM_C) #define irm_iscomm(m) ((m) & IRM_C)
#define irm_kind(m) ((m) & IRM_S) #define irm_kind(m) ((m) & IRM_S)
@ -401,8 +401,8 @@ typedef struct IRType1 { uint8_t irt; } IRType1;
#define IRTG(o, t) (IRT((o), IRT_GUARD|(t))) #define IRTG(o, t) (IRT((o), IRT_GUARD|(t)))
#define IRTGI(o) (IRT((o), IRT_GUARD|IRT_INT)) #define IRTGI(o) (IRT((o), IRT_GUARD|IRT_INT))
#define irt_t(t) (cast(IRType, (t).irt)) #define irt_t(t) ((IRType)(t).irt)
#define irt_type(t) (cast(IRType, (t).irt & IRT_TYPE)) #define irt_type(t) ((IRType)((t).irt & IRT_TYPE))
#define irt_sametype(t1, t2) ((((t1).irt ^ (t2).irt) & IRT_TYPE) == 0) #define irt_sametype(t1, t2) ((((t1).irt ^ (t2).irt) & IRT_TYPE) == 0)
#define irt_typerange(t, first, last) \ #define irt_typerange(t, first, last) \
((uint32_t)((t).irt & IRT_TYPE) - (uint32_t)(first) <= (uint32_t)(last-first)) ((uint32_t)((t).irt & IRT_TYPE) - (uint32_t)(first) <= (uint32_t)(last-first))
@ -441,18 +441,30 @@ typedef struct IRType1 { uint8_t irt; } IRType1;
static LJ_AINLINE IRType itype2irt(const TValue *tv) static LJ_AINLINE IRType itype2irt(const TValue *tv)
{ {
if (tvisnum(tv)) if (tvisint(tv))
return IRT_INT;
else if (tvisnum(tv))
return IRT_NUM; return IRT_NUM;
#if LJ_64 #if LJ_64
else if (tvislightud(tv)) else if (tvislightud(tv))
return IRT_LIGHTUD; return IRT_LIGHTUD;
#endif #endif
else else
return cast(IRType, ~itype(tv)); return (IRType)~itype(tv);
} }
#define irt_toitype(t) \ static LJ_AINLINE uint32_t irt_toitype_(IRType t)
check_exp(!(LJ_64 && irt_islightud((t))), ~(uint32_t)irt_type((t))) {
lua_assert(!LJ_64 || t != IRT_LIGHTUD);
if (LJ_DUALNUM && t > IRT_NUM) {
return LJ_TISNUM;
} else {
lua_assert(t <= IRT_NUM);
return ~(uint32_t)t;
}
}
#define irt_toitype(t) irt_toitype_(irt_type((t)))
#define irt_isguard(t) ((t).irt & IRT_GUARD) #define irt_isguard(t) ((t).irt & IRT_GUARD)
#define irt_ismarked(t) ((t).irt & IRT_MARK) #define irt_ismarked(t) ((t).irt & IRT_MARK)

View File

@ -84,8 +84,6 @@ LJ_FUNC void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir);
/* Convert IR operand types. */ /* Convert IR operand types. */
LJ_FUNC TRef LJ_FASTCALL lj_ir_tonum(jit_State *J, TRef tr); LJ_FUNC TRef LJ_FASTCALL lj_ir_tonum(jit_State *J, TRef tr);
LJ_FUNC TRef LJ_FASTCALL lj_ir_tostr(jit_State *J, TRef tr); LJ_FUNC TRef LJ_FASTCALL lj_ir_tostr(jit_State *J, TRef tr);
LJ_FUNC TRef LJ_FASTCALL lj_ir_tobit(jit_State *J, TRef tr);
LJ_FUNC TRef LJ_FASTCALL lj_ir_toint(jit_State *J, TRef tr);
/* Miscellaneous IR ops. */ /* Miscellaneous IR ops. */
LJ_FUNC int lj_ir_numcmp(lua_Number a, lua_Number b, IROp op); LJ_FUNC int lj_ir_numcmp(lua_Number a, lua_Number b, IROp op);
@ -134,9 +132,17 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_dse_xstore(jit_State *J);
/* Narrowing. */ /* Narrowing. */
LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_convert(jit_State *J); LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_convert(jit_State *J);
LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_index(jit_State *J, TRef key);
LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_toint(jit_State *J, TRef tr);
LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_tobit(jit_State *J, TRef tr);
#if LJ_HASFFI
LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef key);
#endif
LJ_FUNC TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc,
TValue *vb, TValue *vc, IROp op);
LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc); LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc);
LJ_FUNC TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc); LJ_FUNC TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc);
LJ_FUNC IRType lj_opt_narrow_forl(cTValue *forbase); LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase);
/* Optimization passes. */ /* Optimization passes. */
LJ_FUNC void lj_opt_dce(jit_State *J); LJ_FUNC void lj_opt_dce(jit_State *J);

View File

@ -393,13 +393,27 @@ void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o)
lj_err_msg(L, LJ_ERR_FORLIM); lj_err_msg(L, LJ_ERR_FORLIM);
if (!(tvisnumber(o+2) || (tvisstr(o+2) && lj_str_tonumber(strV(o+2), o+2)))) if (!(tvisnumber(o+2) || (tvisstr(o+2) && lj_str_tonumber(strV(o+2), o+2))))
lj_err_msg(L, LJ_ERR_FORSTEP); lj_err_msg(L, LJ_ERR_FORSTEP);
#if LJ_DUALNUM if (LJ_DUALNUM) {
/* Ensure all slots are integers or all slots are numbers. */ /* Ensure all slots are integers or all slots are numbers. */
if (!(tvisint(o) && tvisint(o+1) && tvisint(o+2))) { int32_t k[3];
int nint = 0;
ptrdiff_t i;
for (i = 0; i <= 2; i++) {
if (tvisint(o+i)) {
k[i] = intV(o+i); nint++;
} else {
k[i] = lj_num2int(numV(o+i)); nint += ((lua_Number)k[i] == numV(o+i));
}
}
if (nint == 3) { /* Narrow to integers. */
setintV(o, k[0]);
setintV(o+1, k[1]);
setintV(o+2, k[2]);
} else if (nint != 0) { /* Widen to numbers. */
if (tvisint(o)) setnumV(o, (lua_Number)intV(o)); if (tvisint(o)) setnumV(o, (lua_Number)intV(o));
if (tvisint(o+1)) setnumV(o+1, (lua_Number)intV(o+1)); if (tvisint(o+1)) setnumV(o+1, (lua_Number)intV(o+1));
if (tvisint(o+2)) setnumV(o+2, (lua_Number)intV(o+2)); if (tvisint(o+2)) setnumV(o+2, (lua_Number)intV(o+2));
} }
#endif }
} }

View File

@ -29,6 +29,6 @@ LJ_FUNCA TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne);
LJ_FUNCA TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins); LJ_FUNCA TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins);
LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op); LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op);
LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top); LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top);
LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *base); LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o);
#endif #endif

View File

@ -325,8 +325,6 @@ typedef struct GCproto {
#define proto_kgc(pt, idx) \ #define proto_kgc(pt, idx) \
check_exp((uintptr_t)(intptr_t)(idx) >= (uintptr_t)-(intptr_t)(pt)->sizekgc, \ check_exp((uintptr_t)(intptr_t)(idx) >= (uintptr_t)-(intptr_t)(pt)->sizekgc, \
gcref(mref((pt)->k, GCRef)[(idx)])) gcref(mref((pt)->k, GCRef)[(idx)]))
#define proto_knum(pt, idx) \
check_exp((uintptr_t)(idx) < (pt)->sizekn, mref((pt)->k, lua_Number)[(idx)])
#define proto_knumtv(pt, idx) \ #define proto_knumtv(pt, idx) \
check_exp((uintptr_t)(idx) < (pt)->sizekn, &mref((pt)->k, TValue)[(idx)]) check_exp((uintptr_t)(idx) < (pt)->sizekn, &mref((pt)->k, TValue)[(idx)])
#define proto_bc(pt) ((BCIns *)((char *)(pt) + sizeof(GCproto))) #define proto_bc(pt) ((BCIns *)((char *)(pt) + sizeof(GCproto)))

View File

@ -558,7 +558,10 @@ LJFOLD(CONV KINT IRCONV_I64_INT)
LJFOLD(CONV KINT IRCONV_U64_INT) LJFOLD(CONV KINT IRCONV_U64_INT)
LJFOLDF(kfold_conv_kint_i64) LJFOLDF(kfold_conv_kint_i64)
{ {
if ((fins->op2 & IRCONV_SEXT))
return INT64FOLD((uint64_t)(int64_t)fleft->i); return INT64FOLD((uint64_t)(int64_t)fleft->i);
else
return INT64FOLD((uint64_t)(int64_t)(uint32_t)fleft->i);
} }
LJFOLD(CONV KINT64 IRCONV_NUM_I64) LJFOLD(CONV KINT64 IRCONV_NUM_I64)

View File

@ -300,8 +300,11 @@ static void loop_unroll(jit_State *J)
} }
/* Check all loop-carried dependencies for type instability. */ /* Check all loop-carried dependencies for type instability. */
if (!irt_sametype(t, irr->t)) { if (!irt_sametype(t, irr->t)) {
if (irt_isnum(t) && irt_isinteger(irr->t)) /* Fix int->num case. */ if (irt_isnum(t) && irt_isinteger(irr->t)) /* Fix int->num. */
subst[ins] = tref_ref(emitir(IRTN(IR_CONV), ref, IRCONV_NUM_INT)); subst[ins] = tref_ref(emitir(IRTN(IR_CONV), ref, IRCONV_NUM_INT));
else if (irt_isnum(irr->t) && irt_isinteger(t)) /* Fix num->int. */
subst[ins] = tref_ref(emitir(IRTGI(IR_CONV), ref,
IRCONV_INT_NUM|IRCONV_CHECK));
else if (!(irt_isinteger(t) && irt_isinteger(irr->t))) else if (!(irt_isinteger(t) && irt_isinteger(irr->t)))
lj_trace_err(J, LJ_TRERR_TYPEINS); lj_trace_err(J, LJ_TRERR_TYPEINS);
} }
@ -355,8 +358,8 @@ int lj_opt_loop(jit_State *J)
int errcode = lj_vm_cpcall(J->L, NULL, J, cploop_opt); int errcode = lj_vm_cpcall(J->L, NULL, J, cploop_opt);
if (LJ_UNLIKELY(errcode)) { if (LJ_UNLIKELY(errcode)) {
lua_State *L = J->L; lua_State *L = J->L;
if (errcode == LUA_ERRRUN && tvisnum(L->top-1)) { /* Trace error? */ if (errcode == LUA_ERRRUN && tvisnumber(L->top-1)) { /* Trace error? */
int32_t e = lj_num2int(numV(L->top-1)); int32_t e = numberVint(L->top-1);
switch ((TraceError)e) { switch ((TraceError)e) {
case LJ_TRERR_TYPEINS: /* Type instability. */ case LJ_TRERR_TYPEINS: /* Type instability. */
case LJ_TRERR_GFAIL: /* Guard would always fail. */ case LJ_TRERR_GFAIL: /* Guard would always fail. */

View File

@ -1,5 +1,6 @@
/* /*
** NARROW: Narrowing of numbers to integers (double to int32_t). ** NARROW: Narrowing of numbers to integers (double to int32_t).
** STRIPOV: Stripping of overflow checks.
** Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h
*/ */
@ -16,6 +17,7 @@
#include "lj_jit.h" #include "lj_jit.h"
#include "lj_iropt.h" #include "lj_iropt.h"
#include "lj_trace.h" #include "lj_trace.h"
#include "lj_vm.h"
/* Rationale for narrowing optimizations: /* Rationale for narrowing optimizations:
** **
@ -57,24 +59,34 @@
** **
** A better solution is to keep all numbers as FP values and only narrow ** A better solution is to keep all numbers as FP values and only narrow
** when it's beneficial to do so. LuaJIT uses predictive narrowing for ** when it's beneficial to do so. LuaJIT uses predictive narrowing for
** induction variables and demand-driven narrowing for index expressions ** induction variables and demand-driven narrowing for index expressions,
** and bit operations. Additionally it can eliminate or hoists most of the ** integer arguments and bit operations. Additionally it can eliminate or
** resulting overflow checks. Regular arithmetic computations are never ** hoist most of the resulting overflow checks. Regular arithmetic
** narrowed to integers. ** computations are never narrowed to integers.
** **
** The integer type in the IR has convenient wrap-around semantics and ** The integer type in the IR has convenient wrap-around semantics and
** ignores overflow. Extra operations have been added for ** ignores overflow. Extra operations have been added for
** overflow-checking arithmetic (ADDOV/SUBOV) instead of an extra type. ** overflow-checking arithmetic (ADDOV/SUBOV) instead of an extra type.
** Apart from reducing overall complexity of the compiler, this also ** Apart from reducing overall complexity of the compiler, this also
** nicely solves the problem where you want to apply algebraic ** nicely solves the problem where you want to apply algebraic
** simplifications to ADD, but not to ADDOV. And the assembler can use lea ** simplifications to ADD, but not to ADDOV. And the x86/x64 assembler can
** instead of an add for integer ADD, but not for ADDOV (lea does not ** use lea instead of an add for integer ADD, but not for ADDOV (lea does
** affect the flags, but it helps to avoid register moves). ** not affect the flags, but it helps to avoid register moves).
** **
** Note that all of the above has to be reconsidered if LuaJIT is to be **
** ported to architectures with slow FP operations or with no hardware FPU ** All of the above has to be reconsidered for architectures with slow FP
** at all. In the latter case an integer-only port may be the best overall ** operations or without a hardware FPU. The dual-number mode of LuaJIT
** solution (if this still meets user demands). ** addresses this issue. Arithmetic operations are performed on integers
** as far as possible and overflow checks are added as needed.
**
** This implies that narrowing for integer arguments and bit operations
** should also strip overflow checks, e.g. replace ADDOV with ADD. The
** original overflow guards are weak and can be eliminated by DCE, if
** there's no other use.
**
** A slight twist is that it's usually beneficial to use overflow-checked
** integer arithmetics if all inputs are already integers. This is the only
** change that affects the single-number mode, too.
*/ */
/* Some local macros to save typing. Undef'd at the end. */ /* Some local macros to save typing. Undef'd at the end. */
@ -94,10 +106,10 @@
** already takes care of eliminating simple redundant conversions like ** already takes care of eliminating simple redundant conversions like
** CONV.int.num(CONV.num.int(x)) ==> x. ** CONV.int.num(CONV.num.int(x)) ==> x.
** **
** But the surrounding code is FP-heavy and all arithmetic operations are ** But the surrounding code is FP-heavy and arithmetic operations are
** performed on FP numbers. Consider a common example such as 'x=t[i+1]', ** performed on FP numbers (for the single-number mode). Consider a common
** with 'i' already an integer (due to induction variable narrowing). The ** example such as 'x=t[i+1]', with 'i' already an integer (due to induction
** index expression would be recorded as ** variable narrowing). The index expression would be recorded as
** CONV.int.num(ADD(CONV.num.int(i), 1)) ** CONV.int.num(ADD(CONV.num.int(i), 1))
** which is clearly suboptimal. ** which is clearly suboptimal.
** **
@ -113,6 +125,9 @@
** FP ops remain in the IR and are eliminated by DCE since all references to ** FP ops remain in the IR and are eliminated by DCE since all references to
** them are gone. ** them are gone.
** **
** [In dual-number mode the trace recorder already emits ADDOV etc., but
** this can be further reduced. See below.]
**
** Special care has to be taken to avoid narrowing across an operation ** Special care has to be taken to avoid narrowing across an operation
** which is potentially operating on non-integral operands. One obvious ** which is potentially operating on non-integral operands. One obvious
** case is when an expression contains a non-integral constant, but ends ** case is when an expression contains a non-integral constant, but ends
@ -221,6 +236,26 @@ static void narrow_bpc_set(jit_State *J, IRRef1 key, IRRef1 val, IRRef mode)
bp->mode = mode; bp->mode = mode;
} }
/* Backpropagate overflow stripping. */
static void narrow_stripov_backprop(NarrowConv *nc, IRRef ref, int depth)
{
jit_State *J = nc->J;
IRIns *ir = IR(ref);
if (ir->o == IR_ADDOV || ir->o == IR_SUBOV ||
(ir->o == IR_MULOV && (nc->mode & IRCONV_CONVMASK) == IRCONV_ANY)) {
BPropEntry *bp = narrow_bpc_get(nc->J, ref, IRCONV_TOBIT);
if (bp) {
ref = bp->val;
} else if (++depth < NARROW_MAX_BACKPROP && nc->sp < nc->maxsp) {
narrow_stripov_backprop(nc, ir->op1, depth);
narrow_stripov_backprop(nc, ir->op2, depth);
*nc->sp++ = NARROWINS(IRT(ir->o - IR_ADDOV + IR_ADD, IRT_INT), ref);
return;
}
}
*nc->sp++ = NARROWINS(NARROW_REF, ref);
}
/* Backpropagate narrowing conversion. Return number of needed conversions. */ /* Backpropagate narrowing conversion. Return number of needed conversions. */
static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth) static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth)
{ {
@ -230,24 +265,26 @@ static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth)
/* Check the easy cases first. */ /* Check the easy cases first. */
if (ir->o == IR_CONV && (ir->op2 & IRCONV_SRCMASK) == IRT_INT) { if (ir->o == IR_CONV && (ir->op2 & IRCONV_SRCMASK) == IRT_INT) {
if (nc->t == IRT_I64) if ((nc->mode & IRCONV_CONVMASK) <= IRCONV_ANY)
*nc->sp++ = NARROWINS(NARROW_SEXT, ir->op1); /* Reduce to sign-ext. */ narrow_stripov_backprop(nc, ir->op1, depth+1);
else else
*nc->sp++ = NARROWINS(NARROW_REF, ir->op1); /* Undo conversion. */ *nc->sp++ = NARROWINS(NARROW_REF, ir->op1); /* Undo conversion. */
if (nc->t == IRT_I64)
*nc->sp++ = NARROWINS(NARROW_SEXT, 0); /* Sign-extend integer. */
return 0; return 0;
} else if (ir->o == IR_KNUM) { /* Narrow FP constant. */ } else if (ir->o == IR_KNUM) { /* Narrow FP constant. */
lua_Number n = ir_knum(ir)->n; lua_Number n = ir_knum(ir)->n;
if ((nc->mode & IRCONV_CONVMASK) == IRCONV_TOBIT) { if ((nc->mode & IRCONV_CONVMASK) == IRCONV_TOBIT) {
/* Allows a wider range of constants. */ /* Allows a wider range of constants. */
int64_t k64 = (int64_t)n; int64_t k64 = (int64_t)n;
if (n == cast_num(k64)) { /* Only if constant doesn't lose precision. */ if (n == (lua_Number)k64) { /* Only if const doesn't lose precision. */
*nc->sp++ = NARROWINS(NARROW_INT, 0); *nc->sp++ = NARROWINS(NARROW_INT, 0);
*nc->sp++ = (NarrowIns)k64; /* But always truncate to 32 bits. */ *nc->sp++ = (NarrowIns)k64; /* But always truncate to 32 bits. */
return 0; return 0;
} }
} else { } else {
int32_t k = lj_num2int(n); int32_t k = lj_num2int(n);
if (n == cast_num(k)) { /* Only if constant is really an integer. */ if (n == (lua_Number)k) { /* Only if constant is really an integer. */
*nc->sp++ = NARROWINS(NARROW_INT, 0); *nc->sp++ = NARROWINS(NARROW_INT, 0);
*nc->sp++ = (NarrowIns)k; *nc->sp++ = (NarrowIns)k;
return 0; return 0;
@ -287,7 +324,8 @@ static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth)
mode = (IRT_INT<<5)|IRT_NUM|IRCONV_INDEX; mode = (IRT_INT<<5)|IRT_NUM|IRCONV_INDEX;
bp = narrow_bpc_get(nc->J, (IRRef1)ref, mode); bp = narrow_bpc_get(nc->J, (IRRef1)ref, mode);
if (bp) { if (bp) {
*nc->sp++ = NARROWINS(NARROW_SEXT, bp->val); *nc->sp++ = NARROWINS(NARROW_REF, bp->val);
*nc->sp++ = NARROWINS(NARROW_SEXT, 0);
return 0; return 0;
} }
} }
@ -326,7 +364,8 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc)
} else if (op == NARROW_CONV) { } else if (op == NARROW_CONV) {
*sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */ *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */
} else if (op == NARROW_SEXT) { } else if (op == NARROW_SEXT) {
*sp++ = emitir(IRT(IR_CONV, IRT_I64), ref, lua_assert(sp >= nc->stack+1);
sp[-1] = emitir(IRT(IR_CONV, IRT_I64), sp[-1],
(IRT_I64<<5)|IRT_INT|IRCONV_SEXT); (IRT_I64<<5)|IRT_INT|IRCONV_SEXT);
} else if (op == NARROW_INT) { } else if (op == NARROW_INT) {
lua_assert(next < last); lua_assert(next < last);
@ -340,7 +379,7 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc)
/* Omit some overflow checks for array indexing. See comments above. */ /* Omit some overflow checks for array indexing. See comments above. */
if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) { if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) {
if (next == last && irref_isk(narrow_ref(sp[0])) && if (next == last && irref_isk(narrow_ref(sp[0])) &&
(uint32_t)IR(narrow_ref(sp[0]))->i + 0x40000000 < 0x80000000) (uint32_t)IR(narrow_ref(sp[0]))->i + 0x40000000u < 0x80000000u)
guardot = 0; guardot = 0;
else /* Otherwise cache a stronger check. */ else /* Otherwise cache a stronger check. */
mode += IRCONV_CHECK-IRCONV_INDEX; mode += IRCONV_CHECK-IRCONV_INDEX;
@ -377,12 +416,123 @@ TRef LJ_FASTCALL lj_opt_narrow_convert(jit_State *J)
return NEXTFOLD; return NEXTFOLD;
} }
/* -- Narrowing of implicit conversions ----------------------------------- */
/* Recursively strip overflow checks. */
static TRef narrow_stripov(jit_State *J, TRef tr, int lastop, IRRef mode)
{
IRRef ref = tref_ref(tr);
IRIns *ir = IR(ref);
int op = ir->o;
if (op >= IR_ADDOV && op <= lastop) {
BPropEntry *bp = narrow_bpc_get(J, ref, mode);
if (bp) {
return TREF(bp->val, irt_t(IR(bp->val)->t));
} else {
IRRef op1 = ir->op1, op2 = ir->op2; /* The IR may be reallocated. */
op1 = narrow_stripov(J, op1, lastop, mode);
op2 = narrow_stripov(J, op2, lastop, mode);
tr = emitir(IRT(op - IR_ADDOV + IR_ADD,
((mode & IRCONV_DSTMASK) >> IRCONV_DSH)), op1, op2);
narrow_bpc_set(J, ref, tref_ref(tr), mode);
}
} else if (LJ_64 && (mode & IRCONV_SEXT) && !irt_is64(ir->t)) {
tr = emitir(IRT(IR_CONV, IRT_INTP), tr, mode);
}
return tr;
}
/* Narrow array index. */
TRef LJ_FASTCALL lj_opt_narrow_index(jit_State *J, TRef tr)
{
IRIns *ir;
lua_assert(tref_isnumber(tr));
if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */
return emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_INDEX);
/* Omit some overflow checks for array indexing. See comments above. */
ir = IR(tref_ref(tr));
if ((ir->o == IR_ADDOV || ir->o == IR_SUBOV) && irref_isk(ir->op2) &&
(uint32_t)IR(ir->op2)->i + 0x40000000u < 0x80000000u)
return emitir(IRTI(ir->o - IR_ADDOV + IR_ADD), ir->op1, ir->op2);
return tr;
}
/* Narrow conversion to integer operand (overflow undefined). */
TRef LJ_FASTCALL lj_opt_narrow_toint(jit_State *J, TRef tr)
{
if (tref_isstr(tr))
tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */
return emitir(IRTI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_ANY);
if (!tref_isinteger(tr))
lj_trace_err(J, LJ_TRERR_BADTYPE);
/*
** Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV.
** Use IRCONV_TOBIT for the cache entries, since the semantics are the same.
*/
return narrow_stripov(J, tr, IR_MULOV, (IRT_INT<<5)|IRT_INT|IRCONV_TOBIT);
}
/* Narrow conversion to bitop operand (overflow wrapped). */
TRef LJ_FASTCALL lj_opt_narrow_tobit(jit_State *J, TRef tr)
{
if (tref_isstr(tr))
tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */
return emitir(IRTI(IR_TOBIT), tr, lj_ir_knum_tobit(J));
if (!tref_isinteger(tr))
lj_trace_err(J, LJ_TRERR_BADTYPE);
/*
** Wrapped overflow semantics allow stripping of ADDOV and SUBOV.
** MULOV cannot be stripped due to precision widening.
*/
return narrow_stripov(J, tr, IR_SUBOV, (IRT_INT<<5)|IRT_INT|IRCONV_TOBIT);
}
#if LJ_HASFFI
/* Narrow C array index (overflow undefined). */
TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr)
{
lua_assert(tref_isnumber(tr));
if (tref_isnum(tr))
return emitir(IRTI(IR_CONV), tr,
(IRT_INTP<<5)|IRT_NUM|IRCONV_TRUNC|IRCONV_ANY);
/* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */
return narrow_stripov(J, tr, IR_MULOV,
LJ_64 ? ((IRT_INTP<<5)|IRT_INT|IRCONV_SEXT) :
((IRT_INTP<<5)|IRT_INT|IRCONV_TOBIT));
}
#endif
/* -- Narrowing of arithmetic operators ----------------------------------- */ /* -- Narrowing of arithmetic operators ----------------------------------- */
/* Check whether a number fits into an int32_t (-0 is ok, too). */ /* Check whether a number fits into an int32_t (-0 is ok, too). */
static int numisint(lua_Number n) static int numisint(lua_Number n)
{ {
return (n == cast_num(lj_num2int(n))); return (n == (lua_Number)lj_num2int(n));
}
/* Narrowing of arithmetic operations. */
TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc,
TValue *vb, TValue *vc, IROp op)
{
if (tref_isstr(rb)) {
rb = emitir(IRTG(IR_STRTO, IRT_NUM), rb, 0);
lj_str_tonum(strV(vb), vb);
}
if (tref_isstr(rc)) {
rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0);
lj_str_tonum(strV(vc), vc);
}
/* Must not narrow MUL in non-DUALNUM variant, because it loses -0. */
if ((op >= IR_ADD && op <= (LJ_DUALNUM ? IR_MUL : IR_SUB)) &&
tref_isinteger(rb) && tref_isinteger(rc) &&
numisint(lj_vm_foldarith(numberVnum(vb), numberVnum(vc),
(int)op - (int)IR_ADD)))
return emitir(IRTGI((int)op - (int)IR_ADD + (int)IR_ADDOV), rb, rc);
if (!tref_isnum(rb)) rb = emitir(IRTN(IR_CONV), rb, IRCONV_NUM_INT);
if (!tref_isnum(rc)) rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT);
return emitir(IRTN(op), rb, rc);
} }
/* Narrowing of modulo operator. */ /* Narrowing of modulo operator. */
@ -409,16 +559,15 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc)
/* Narrowing of power operator or math.pow. */ /* Narrowing of power operator or math.pow. */
TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc) TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc)
{ {
lua_Number n;
if (tvisstr(vc) && !lj_str_tonum(strV(vc), vc)) if (tvisstr(vc) && !lj_str_tonum(strV(vc), vc))
lj_trace_err(J, LJ_TRERR_BADTYPE); lj_trace_err(J, LJ_TRERR_BADTYPE);
n = numV(vc);
/* Narrowing must be unconditional to preserve (-x)^i semantics. */ /* Narrowing must be unconditional to preserve (-x)^i semantics. */
if (numisint(n)) { if (tvisint(vc) || numisint(numV(vc))) {
int checkrange = 0; int checkrange = 0;
/* Split pow is faster for bigger exponents. But do this only for (+k)^i. */ /* Split pow is faster for bigger exponents. But do this only for (+k)^i. */
if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) { if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) {
if (!(n >= -65536.0 && n <= 65536.0)) goto split_pow; int32_t k = numberVint(vc);
if (!(k >= -65536 && k <= 65536)) goto split_pow;
checkrange = 1; checkrange = 1;
} }
if (!tref_isinteger(rc)) { if (!tref_isinteger(rc)) {
@ -448,20 +597,28 @@ split_pow:
/* -- Predictive narrowing of induction variables ------------------------- */ /* -- Predictive narrowing of induction variables ------------------------- */
/* Narrow the FORL index type by looking at the runtime values. */ /* Narrow a single runtime value. */
IRType lj_opt_narrow_forl(cTValue *forbase) static int narrow_forl(jit_State *J, cTValue *o)
{ {
lua_assert(tvisnum(&forbase[FORL_IDX]) && if (tvisint(o)) return 1;
tvisnum(&forbase[FORL_STOP]) && if (LJ_DUALNUM || (J->flags & JIT_F_OPT_NARROW)) return numisint(numV(o));
tvisnum(&forbase[FORL_STEP])); return 0;
}
/* Narrow the FORL index type by looking at the runtime values. */
IRType lj_opt_narrow_forl(jit_State *J, cTValue *tv)
{
lua_assert(tvisnumber(&tv[FORL_IDX]) &&
tvisnumber(&tv[FORL_STOP]) &&
tvisnumber(&tv[FORL_STEP]));
/* Narrow only if the runtime values of start/stop/step are all integers. */ /* Narrow only if the runtime values of start/stop/step are all integers. */
if (numisint(numV(&forbase[FORL_IDX])) && if (narrow_forl(J, &tv[FORL_IDX]) &&
numisint(numV(&forbase[FORL_STOP])) && narrow_forl(J, &tv[FORL_STOP]) &&
numisint(numV(&forbase[FORL_STEP]))) { narrow_forl(J, &tv[FORL_STEP])) {
/* And if the loop index can't possibly overflow. */ /* And if the loop index can't possibly overflow. */
lua_Number step = numV(&forbase[FORL_STEP]); lua_Number step = numberVnum(&tv[FORL_STEP]);
lua_Number sum = numV(&forbase[FORL_STOP]) + step; lua_Number sum = numberVnum(&tv[FORL_STOP]) + step;
if (0 <= step ? sum <= 2147483647.0 : sum >= -2147483648.0) if (0 <= step ? (sum <= 2147483647.0) : (sum >= -2147483648.0))
return IRT_INT; return IRT_INT;
} }
return IRT_NUM; return IRT_NUM;

View File

@ -13,6 +13,7 @@
#include "lj_err.h" #include "lj_err.h"
#include "lj_str.h" #include "lj_str.h"
#include "lj_tab.h" #include "lj_tab.h"
#include "lj_meta.h"
#include "lj_frame.h" #include "lj_frame.h"
#include "lj_bc.h" #include "lj_bc.h"
#include "lj_ff.h" #include "lj_ff.h"
@ -102,7 +103,7 @@ static void rec_check_slots(jit_State *J)
lua_assert((J->slot[s+1] & TREF_FRAME)); lua_assert((J->slot[s+1] & TREF_FRAME));
depth++; depth++;
} else { } else {
if (tvisnum(tv)) if (tvisnumber(tv))
lua_assert(tref_isnumber(tr)); /* Could be IRT_INT etc., too. */ lua_assert(tref_isnumber(tr)); /* Could be IRT_INT etc., too. */
else else
lua_assert(itype2irt(tv) == tref_type(tr)); lua_assert(itype2irt(tv) == tref_type(tr));
@ -197,6 +198,7 @@ typedef enum {
static void canonicalize_slots(jit_State *J) static void canonicalize_slots(jit_State *J)
{ {
BCReg s; BCReg s;
if (LJ_DUALNUM) return;
for (s = J->baseslot+J->maxslot-1; s >= 1; s--) { for (s = J->baseslot+J->maxslot-1; s >= 1; s--) {
TRef tr = J->slot[s]; TRef tr = J->slot[s];
if (tref_isinteger(tr)) { if (tref_isinteger(tr)) {
@ -254,16 +256,16 @@ static TRef find_kinit(jit_State *J, const BCIns *endpc, BCReg slot, IRType t)
} }
if (op == BC_KSHORT) { if (op == BC_KSHORT) {
int32_t k = (int32_t)(int16_t)bc_d(ins); int32_t k = (int32_t)(int16_t)bc_d(ins);
return t == IRT_INT ? lj_ir_kint(J, k) : lj_ir_knum(J, cast_num(k)); return t == IRT_INT ? lj_ir_kint(J, k) : lj_ir_knum(J, (lua_Number)k);
} else { } else {
lua_Number n = proto_knum(J->pt, bc_d(ins)); cTValue *tv = proto_knumtv(J->pt, bc_d(ins));
if (t == IRT_INT) { if (t == IRT_INT) {
int32_t k = lj_num2int(n); int32_t k = numberVint(tv);
if (n == cast_num(k)) /* -0 is ok here. */ if (tvisint(tv) || numV(tv) == (lua_Number)k) /* -0 is ok here. */
return lj_ir_kint(J, k); return lj_ir_kint(J, k);
return 0; /* Type mismatch. */ return 0; /* Type mismatch. */
} else { } else {
return lj_ir_knum(J, n); return lj_ir_knum(J, numberVnum(tv));
} }
} }
} }
@ -273,41 +275,47 @@ static TRef find_kinit(jit_State *J, const BCIns *endpc, BCReg slot, IRType t)
return 0; /* No assignment to this slot found? */ return 0; /* No assignment to this slot found? */
} }
/* Load and optionally convert a FORI argument from a slot. */
static TRef fori_load(jit_State *J, BCReg slot, IRType t, int mode)
{
int conv = (tvisint(&J->L->base[slot]) != (t==IRT_INT)) ? IRSLOAD_CONVERT : 0;
return sloadt(J, (int32_t)slot,
t + (((mode & IRSLOAD_TYPECHECK) ||
(conv && t == IRT_INT && !(mode >> 16))) ?
IRT_GUARD : 0),
mode + conv);
}
/* Peek before FORI to find a const initializer. Otherwise load from slot. */ /* Peek before FORI to find a const initializer. Otherwise load from slot. */
static TRef fori_arg(jit_State *J, const BCIns *fori, BCReg slot, IRType t) static TRef fori_arg(jit_State *J, const BCIns *fori, BCReg slot,
IRType t, int mode)
{ {
TRef tr = J->base[slot]; TRef tr = J->base[slot];
if (!tr) { if (!tr) {
tr = find_kinit(J, fori, slot, t); tr = find_kinit(J, fori, slot, t);
if (!tr) if (!tr)
tr = sloadt(J, (int32_t)slot, tr = fori_load(J, slot, t, mode);
t == IRT_INT ? (IRT_INT|IRT_GUARD) : t,
t == IRT_INT ? (IRSLOAD_CONVERT|IRSLOAD_READONLY|IRSLOAD_INHERIT) :
(IRSLOAD_READONLY|IRSLOAD_INHERIT));
} }
return tr; return tr;
} }
/* In-place coercion of FORI arguments. */ /* Return the direction of the FOR loop iterator.
static lua_Number for_coerce(jit_State *J, TValue *o)
{
if (!tvisnum(o) && !(tvisstr(o) && lj_str_tonum(strV(o), o)))
lj_trace_err(J, LJ_TRERR_BADTYPE);
return numV(o);
}
/* Simulate the runtime behavior of the FOR loop iterator.
** It's important to exactly reproduce the semantics of the interpreter. ** It's important to exactly reproduce the semantics of the interpreter.
*/ */
static LoopEvent for_iter(jit_State *J, IROp *op, BCReg ra, int isforl) static int rec_for_direction(cTValue *o)
{ {
TValue *forbase = &J->L->base[ra]; return (tvisint(o) ? intV(o) : (int32_t)o->u32.hi) >= 0;
lua_Number stopv = for_coerce(J, &forbase[FORL_STOP]); }
lua_Number idxv = for_coerce(J, &forbase[FORL_IDX]);
lua_Number stepv = for_coerce(J, &forbase[FORL_STEP]); /* Simulate the runtime behavior of the FOR loop iterator. */
static LoopEvent rec_for_iter(IROp *op, cTValue *o, int isforl)
{
lua_Number stopv = numberVnum(&o[FORL_STOP]);
lua_Number idxv = numberVnum(&o[FORL_IDX]);
lua_Number stepv = numberVnum(&o[FORL_STEP]);
if (isforl) if (isforl)
idxv += stepv; idxv += stepv;
if ((int32_t)forbase[FORL_STEP].u32.hi >= 0) { if (rec_for_direction(&o[FORL_STEP])) {
if (idxv <= stopv) { *op = IR_LE; return LOOPEV_ENTER; } if (idxv <= stopv) { *op = IR_LE; return LOOPEV_ENTER; }
*op = IR_GT; return LOOPEV_LEAVE; *op = IR_GT; return LOOPEV_LEAVE;
} else { } else {
@ -316,44 +324,123 @@ static LoopEvent for_iter(jit_State *J, IROp *op, BCReg ra, int isforl)
} }
} }
/* Record checks for FOR loop overflow and step direction. */
static void rec_for_check(jit_State *J, IRType t, int dir, TRef stop, TRef step)
{
if (!tref_isk(step)) {
/* Non-constant step: need a guard for the direction. */
TRef zero = (t == IRT_INT) ? lj_ir_kint(J, 0) : lj_ir_knum_zero(J);
emitir(IRTG(dir ? IR_GE : IR_LT, t), step, zero);
/* Add hoistable overflow checks for a narrowed FORL index. */
if (t == IRT_INT) {
if (tref_isk(stop)) {
/* Constant stop: optimize check away or to a range check for step. */
int32_t k = IR(tref_ref(stop))->i;
if (dir) {
if (k > 0)
emitir(IRTGI(IR_LE), step, lj_ir_kint(J, (int32_t)0x7fffffff-k));
} else {
if (k < 0)
emitir(IRTGI(IR_GE), step, lj_ir_kint(J, (int32_t)0x80000000-k));
}
} else {
/* Stop+step variable: need full overflow check. */
TRef tr = emitir(IRTGI(IR_ADDOV), step, stop);
emitir(IRTI(IR_USE), tr, 0); /* ADDOV is weak. Avoid dead result. */
}
}
} else if (t == IRT_INT && !tref_isk(stop)) {
/* Constant step: optimize overflow check to a range check for stop. */
int32_t k = IR(tref_ref(step))->i;
k = (int32_t)(dir ? 0x7fffffff : 0x80000000) - k;
emitir(IRTGI(dir ? IR_LE : IR_GE), stop, lj_ir_kint(J, k));
}
}
/* Record a FORL instruction. */
static void rec_for_loop(jit_State *J, const BCIns *fori, ScEvEntry *scev,
int init)
{
BCReg ra = bc_a(*fori);
cTValue *tv = &J->L->base[ra];
TRef idx = J->base[ra+FORL_IDX];
IRType t = idx ? tref_type(idx) :
(init || LJ_DUALNUM) ? lj_opt_narrow_forl(J, tv) : IRT_NUM;
int mode = IRSLOAD_INHERIT +
((!LJ_DUALNUM || tvisint(tv) == (t == IRT_INT)) ? IRSLOAD_READONLY : 0);
TRef stop = fori_arg(J, fori, ra+FORL_STOP, t, mode);
TRef step = fori_arg(J, fori, ra+FORL_STEP, t, mode);
int tc, dir = rec_for_direction(&tv[FORL_STEP]);
lua_assert(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI);
scev->t.irt = t;
scev->dir = dir;
scev->stop = tref_ref(stop);
scev->step = tref_ref(step);
if (init)
rec_for_check(J, t, dir, stop, step);
scev->start = tref_ref(find_kinit(J, fori, ra+FORL_IDX, IRT_INT));
tc = (LJ_DUALNUM &&
!(scev->start && irref_isk(scev->stop) && irref_isk(scev->step))) ?
IRSLOAD_TYPECHECK : 0;
if (tc) {
J->base[ra+FORL_STOP] = stop;
J->base[ra+FORL_STEP] = step;
}
if (!idx)
idx = fori_load(J, ra+FORL_IDX, t,
IRSLOAD_INHERIT + tc + (J->scev.start << 16));
if (!init)
J->base[ra+FORL_IDX] = idx = emitir(IRT(IR_ADD, t), idx, step);
J->base[ra+FORL_EXT] = idx;
scev->idx = tref_ref(idx);
J->maxslot = ra+FORL_EXT+1;
}
/* Record FORL/JFORL or FORI/JFORI. */ /* Record FORL/JFORL or FORI/JFORI. */
static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl) static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl)
{ {
BCReg ra = bc_a(*fori); BCReg ra = bc_a(*fori);
IROp op; TValue *tv = &J->L->base[ra];
LoopEvent ev = for_iter(J, &op, ra, isforl);
TRef *tr = &J->base[ra]; TRef *tr = &J->base[ra];
TRef idx, stop; IROp op;
LoopEvent ev;
TRef stop;
IRType t; IRType t;
if (isforl) { /* Handle FORL/JFORL opcodes. */ if (isforl) { /* Handle FORL/JFORL opcodes. */
TRef step; TRef idx = tr[FORL_IDX];
idx = tr[FORL_IDX];
if (tref_ref(idx) == J->scev.idx) { if (tref_ref(idx) == J->scev.idx) {
t = J->scev.t.irt; t = J->scev.t.irt;
stop = J->scev.stop; stop = J->scev.stop;
step = J->scev.step; idx = emitir(IRT(IR_ADD, t), idx, J->scev.step);
tr[FORL_EXT] = tr[FORL_IDX] = idx;
} else { } else {
if (!idx) idx = sloadt(J, (int32_t)(ra+FORL_IDX), IRT_NUM, 0); ScEvEntry scev;
t = tref_type(idx); rec_for_loop(J, fori, &scev, 0);
stop = fori_arg(J, fori, ra+FORL_STOP, t); t = scev.t.irt;
step = fori_arg(J, fori, ra+FORL_STEP, t); stop = scev.stop;
} }
tr[FORL_IDX] = idx = emitir(IRT(IR_ADD, t), idx, step);
} else { /* Handle FORI/JFORI opcodes. */ } else { /* Handle FORI/JFORI opcodes. */
BCReg i; BCReg i;
t = IRT_NUM; lj_meta_for(J->L, tv);
t = lj_opt_narrow_forl(J, tv);
for (i = FORL_IDX; i <= FORL_STEP; i++) { for (i = FORL_IDX; i <= FORL_STEP; i++) {
lua_assert(J->base[ra+i] != 0); /* Assumes the slots are already set. */ lua_assert(tref_isnumber_str(tr[i]));
tr[i] = lj_ir_tonum(J, J->base[ra+i]); if (tref_isstr(tr[i]))
tr[i] = emitir(IRTG(IR_STRTO, IRT_NUM), tr[i], 0);
if (t == IRT_INT) {
if (!tref_isinteger(tr[i]))
tr[i] = emitir(IRTI(IR_CONV), tr[i], IRCONV_INT_NUM|IRCONV_CHECK);
} else {
if (!tref_isnum(tr[i]))
tr[i] = emitir(IRTN(IR_CONV), tr[i], IRCONV_NUM_INT);
} }
idx = tr[FORL_IDX]; }
tr[FORL_EXT] = tr[FORL_IDX];
stop = tr[FORL_STOP]; stop = tr[FORL_STOP];
if (!tref_isk(tr[FORL_STEP])) /* Non-const step: need direction guard. */ rec_for_check(J, t, rec_for_direction(&tv[FORL_STEP]), stop, tr[FORL_STEP]);
emitir(IRTG(((op-IR_LT)>>1)+IR_LT, IRT_NUM),
tr[FORL_STEP], lj_ir_knum_zero(J));
} }
tr[FORL_EXT] = idx; ev = rec_for_iter(&op, tv, isforl);
if (ev == LOOPEV_LEAVE) { if (ev == LOOPEV_LEAVE) {
J->maxslot = ra+FORL_EXT+1; J->maxslot = ra+FORL_EXT+1;
J->pc = fori+1; J->pc = fori+1;
@ -363,7 +450,7 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl)
} }
lj_snap_add(J); lj_snap_add(J);
emitir(IRTG(op, t), idx, stop); emitir(IRTG(op, t), tr[FORL_IDX], stop);
if (ev == LOOPEV_LEAVE) { if (ev == LOOPEV_LEAVE) {
J->maxslot = ra; J->maxslot = ra;
@ -870,7 +957,7 @@ static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize)
if (ref == J->scev.idx) { if (ref == J->scev.idx) {
int32_t stop; int32_t stop;
lua_assert(irt_isint(J->scev.t) && ir->o == IR_SLOAD); lua_assert(irt_isint(J->scev.t) && ir->o == IR_SLOAD);
stop = lj_num2int(numV(&(J->L->base - J->baseslot)[ir->op1 + FORL_STOP])); stop = numberVint(&(J->L->base - J->baseslot)[ir->op1 + FORL_STOP]);
/* Runtime value for stop of loop is within bounds? */ /* Runtime value for stop of loop is within bounds? */
if ((int64_t)stop + ofs < (int64_t)asize) { if ((int64_t)stop + ofs < (int64_t)asize) {
/* Emit invariant bounds check for stop. */ /* Emit invariant bounds check for stop. */
@ -897,15 +984,12 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix)
/* Integer keys are looked up in the array part first. */ /* Integer keys are looked up in the array part first. */
key = ix->key; key = ix->key;
if (tref_isnumber(key)) { if (tref_isnumber(key)) {
lua_Number n = numV(&ix->keyv); int32_t k = numberVint(&ix->keyv);
int32_t k = lj_num2int(n); if (!tvisint(&ix->keyv) && numV(&ix->keyv) != (lua_Number)k)
lua_assert(tvisnum(&ix->keyv)); k = LJ_MAX_ASIZE;
/* Potential array key? */ if ((MSize)k < LJ_MAX_ASIZE) { /* Potential array key? */
if ((MSize)k < LJ_MAX_ASIZE && n == cast_num(k)) { TRef ikey = lj_opt_narrow_index(J, key);
TRef asizeref, ikey = key; TRef asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE);
if (!tref_isinteger(ikey))
ikey = emitir(IRTGI(IR_CONV), ikey, IRCONV_INT_NUM|IRCONV_INDEX);
asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE);
if ((MSize)k < t->asize) { /* Currently an array key? */ if ((MSize)k < t->asize) { /* Currently an array key? */
TRef arrayref; TRef arrayref;
rec_idx_abc(J, asizeref, ikey, t->asize); rec_idx_abc(J, asizeref, ikey, t->asize);
@ -1081,7 +1165,8 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
} else { } else {
keybarrier = 0; /* Previous non-nil value kept the key alive. */ keybarrier = 0; /* Previous non-nil value kept the key alive. */
} }
if (tref_isinteger(ix->val)) /* Convert int to number before storing. */ /* Convert int to number before storing. */
if (!LJ_DUALNUM && tref_isinteger(ix->val))
ix->val = emitir(IRTN(IR_CONV), ix->val, IRCONV_NUM_INT); ix->val = emitir(IRTN(IR_CONV), ix->val, IRCONV_NUM_INT);
emitir(IRT(loadop+IRDELTA_L2S, tref_type(ix->val)), xref, ix->val); emitir(IRT(loadop+IRDELTA_L2S, tref_type(ix->val)), xref, ix->val);
if (keybarrier || tref_isgcv(ix->val)) if (keybarrier || tref_isgcv(ix->val))
@ -1135,7 +1220,8 @@ static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val)
if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitive refs. */ if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitive refs. */
return res; return res;
} else { /* Upvalue store. */ } else { /* Upvalue store. */
if (tref_isinteger(val)) /* Convert int to number before storing. */ /* Convert int to number before storing. */
if (!LJ_DUALNUM && tref_isinteger(val))
val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT);
emitir(IRT(IR_USTORE, tref_type(val)), uref, val); emitir(IRT(IR_USTORE, tref_type(val)), uref, val);
if (needbarrier && tref_isgcv(val)) if (needbarrier && tref_isgcv(val))
@ -1455,16 +1541,15 @@ void lj_record_ins(jit_State *J)
case BCMnone: rb = 0; rc = bc_d(ins); break; /* Upgrade rc to 'rd'. */ case BCMnone: rb = 0; rc = bc_d(ins); break; /* Upgrade rc to 'rd'. */
case BCMvar: case BCMvar:
copyTV(J->L, rbv, &lbase[rb]); ix.tab = rb = getslot(J, rb); break; copyTV(J->L, rbv, &lbase[rb]); ix.tab = rb = getslot(J, rb); break;
case BCMnum: { lua_Number n = proto_knum(J->pt, rb);
setnumV(rbv, n); ix.tab = rb = lj_ir_knumint(J, n); } break;
default: break; /* Handled later. */ default: break; /* Handled later. */
} }
switch (bcmode_c(op)) { switch (bcmode_c(op)) {
case BCMvar: case BCMvar:
copyTV(J->L, rcv, &lbase[rc]); ix.key = rc = getslot(J, rc); break; copyTV(J->L, rcv, &lbase[rc]); ix.key = rc = getslot(J, rc); break;
case BCMpri: setitype(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break; case BCMpri: setitype(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break;
case BCMnum: { lua_Number n = proto_knum(J->pt, rc); case BCMnum: { cTValue *tv = proto_knumtv(J->pt, rc);
setnumV(rcv, n); ix.key = rc = lj_ir_knumint(J, n); } break; copyTV(J->L, rcv, tv); ix.key = rc = tvisint(tv) ? lj_ir_kint(J, intV(tv)) :
lj_ir_knumint(J, numV(tv)); } break;
case BCMstr: { GCstr *s = gco2str(proto_kgc(J->pt, ~(ptrdiff_t)rc)); case BCMstr: { GCstr *s = gco2str(proto_kgc(J->pt, ~(ptrdiff_t)rc));
setstrV(J->L, rcv, s); ix.key = rc = lj_ir_kstr(J, s); } break; setstrV(J->L, rcv, s); ix.key = rc = lj_ir_kstr(J, s); } break;
default: break; /* Handled later. */ default: break; /* Handled later. */
@ -1502,9 +1587,11 @@ void lj_record_ins(jit_State *J)
irop = (int)op - (int)BC_ISLT + (int)IR_LT; irop = (int)op - (int)BC_ISLT + (int)IR_LT;
if (ta == IRT_NUM) { if (ta == IRT_NUM) {
if ((irop & 1)) irop ^= 4; /* ISGE/ISGT are unordered. */ if ((irop & 1)) irop ^= 4; /* ISGE/ISGT are unordered. */
if (!lj_ir_numcmp(numV(rav), numV(rcv), (IROp)irop)) irop ^= 5; if (!lj_ir_numcmp(numberVnum(rav), numberVnum(rcv), (IROp)irop))
irop ^= 5;
} else if (ta == IRT_INT) { } else if (ta == IRT_INT) {
if (!lj_ir_numcmp(numV(rav), numV(rcv), (IROp)irop)) irop ^= 1; if (!lj_ir_numcmp(numberVnum(rav), numberVnum(rcv), (IROp)irop))
irop ^= 1;
} else if (ta == IRT_STR) { } else if (ta == IRT_STR) {
if (!lj_ir_strcmp(strV(rav), strV(rcv), (IROp)irop)) irop ^= 1; if (!lj_ir_strcmp(strV(rav), strV(rcv), (IROp)irop)) irop ^= 1;
ra = lj_ir_call(J, IRCALL_lj_str_cmp, ra, rc); ra = lj_ir_call(J, IRCALL_lj_str_cmp, ra, rc);
@ -1599,13 +1686,11 @@ void lj_record_ins(jit_State *J)
case BC_ADDVN: case BC_SUBVN: case BC_MULVN: case BC_DIVVN: case BC_ADDVN: case BC_SUBVN: case BC_MULVN: case BC_DIVVN:
case BC_ADDVV: case BC_SUBVV: case BC_MULVV: case BC_DIVVV: { case BC_ADDVV: case BC_SUBVV: case BC_MULVV: case BC_DIVVV: {
MMS mm = bcmode_mm(op); MMS mm = bcmode_mm(op);
if (tref_isnumber_str(rb) && tref_isnumber_str(rc)) { if (tref_isnumber_str(rb) && tref_isnumber_str(rc))
rb = lj_ir_tonum(J, rb); rc = lj_opt_narrow_arith(J, rb, rc, &ix.tabv, &ix.keyv,
rc = lj_ir_tonum(J, rc); (int)mm - (int)MM_add + (int)IR_ADD);
rc = emitir(IRTN((int)mm - (int)MM_add + (int)IR_ADD), rb, rc); else
} else {
rc = rec_mm_arith(J, &ix, mm); rc = rec_mm_arith(J, &ix, mm);
}
break; break;
} }
@ -1827,59 +1912,6 @@ void lj_record_ins(jit_State *J)
/* -- Recording setup ----------------------------------------------------- */ /* -- Recording setup ----------------------------------------------------- */
/* Setup recording for a FORL loop. */
static void rec_setup_forl(jit_State *J, const BCIns *fori)
{
BCReg ra = bc_a(*fori);
cTValue *forbase = &J->L->base[ra];
IRType t = (J->flags & JIT_F_OPT_NARROW) ? lj_opt_narrow_forl(forbase)
: IRT_NUM;
TRef start;
TRef stop = fori_arg(J, fori, ra+FORL_STOP, t);
TRef step = fori_arg(J, fori, ra+FORL_STEP, t);
int dir = (0 <= numV(&forbase[FORL_STEP]));
lua_assert(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI);
J->scev.t.irt = t;
J->scev.dir = dir;
J->scev.stop = tref_ref(stop);
J->scev.step = tref_ref(step);
if (!tref_isk(step)) {
/* Non-constant step: need a guard for the direction. */
TRef zero = (t == IRT_INT) ? lj_ir_kint(J, 0) : lj_ir_knum_zero(J);
emitir(IRTG(dir ? IR_GE : IR_LT, t), step, zero);
/* Add hoistable overflow checks for a narrowed FORL index. */
if (t == IRT_INT) {
if (tref_isk(stop)) {
/* Constant stop: optimize check away or to a range check for step. */
int32_t k = IR(tref_ref(stop))->i;
if (dir) {
if (k > 0)
emitir(IRTGI(IR_LE), step, lj_ir_kint(J, (int32_t)0x7fffffff-k));
} else {
if (k < 0)
emitir(IRTGI(IR_GE), step, lj_ir_kint(J, (int32_t)0x80000000-k));
}
} else {
/* Stop+step variable: need full overflow check. */
TRef tr = emitir(IRTGI(IR_ADDOV), step, stop);
emitir(IRTI(IR_USE), tr, 0); /* ADDOV is weak. Avoid dead result. */
}
}
} else if (t == IRT_INT && !tref_isk(stop)) {
/* Constant step: optimize overflow check to a range check for stop. */
int32_t k = IR(tref_ref(step))->i;
k = (int32_t)(dir ? 0x7fffffff : 0x80000000) - k;
emitir(IRTGI(dir ? IR_LE : IR_GE), stop, lj_ir_kint(J, k));
}
J->scev.start = tref_ref(find_kinit(J, fori, ra+FORL_IDX, IRT_INT));
start = sloadt(J, (int32_t)(ra+FORL_IDX),
(t == IRT_INT && !J->scev.start) ? (IRT_INT|IRT_GUARD) : t,
t == IRT_INT ? (IRSLOAD_CONVERT|IRSLOAD_INHERIT) : IRSLOAD_INHERIT);
J->base[ra+FORL_EXT] = start;
J->scev.idx = tref_ref(start);
J->maxslot = ra+FORL_EXT+1;
}
/* Setup recording for a root trace started by a hot loop. */ /* Setup recording for a root trace started by a hot loop. */
static const BCIns *rec_setup_root(jit_State *J) static const BCIns *rec_setup_root(jit_State *J)
{ {
@ -2033,7 +2065,7 @@ void lj_record_setup(jit_State *J)
if (J->pc > proto_bc(J->pt) && bc_op(J->pc[-1]) == BC_JFORI && if (J->pc > proto_bc(J->pt) && bc_op(J->pc[-1]) == BC_JFORI &&
bc_d(J->pc[bc_j(J->pc[-1])-1]) == root) { bc_d(J->pc[bc_j(J->pc[-1])-1]) == root) {
lj_snap_add(J); lj_snap_add(J);
rec_setup_forl(J, J->pc-1); rec_for_loop(J, J->pc-1, &J->scev, 1);
goto sidecheck; goto sidecheck;
} }
} else { } else {
@ -2054,7 +2086,7 @@ void lj_record_setup(jit_State *J)
*/ */
lj_snap_add(J); lj_snap_add(J);
if (bc_op(J->cur.startins) == BC_FORL) if (bc_op(J->cur.startins) == BC_FORL)
rec_setup_forl(J, J->pc-1); rec_for_loop(J, J->pc-1, &J->scev, 1);
if (1 + J->pt->framesize >= LJ_MAX_JSLOTS) if (1 + J->pt->framesize >= LJ_MAX_JSLOTS)
lj_trace_err(J, LJ_TRERR_STACKOV); lj_trace_err(J, LJ_TRERR_STACKOV);
} }

View File

@ -68,7 +68,8 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
if (!(ir->op2 & IRSLOAD_INHERIT)) if (!(ir->op2 & IRSLOAD_INHERIT))
continue; continue;
/* No need to restore readonly slots and unmodified non-parent slots. */ /* No need to restore readonly slots and unmodified non-parent slots. */
if ((ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) &&
(ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT)
sn |= SNAP_NORESTORE; sn |= SNAP_NORESTORE;
} }
map[n++] = sn; map[n++] = sn;

View File

@ -495,8 +495,8 @@ static int trace_abort(jit_State *J)
J->postproc = LJ_POST_NONE; J->postproc = LJ_POST_NONE;
lj_mcode_abort(J); lj_mcode_abort(J);
if (tvisnum(L->top-1)) if (tvisnumber(L->top-1))
e = (TraceError)lj_num2int(numV(L->top-1)); e = (TraceError)numberVint(L->top-1);
if (e == LJ_TRERR_MCODELM) { if (e == LJ_TRERR_MCODELM) {
J->state = LJ_TRACE_ASM; J->state = LJ_TRACE_ASM;
return 1; /* Retry ASM with new MCode area. */ return 1; /* Retry ASM with new MCode area. */
@ -703,8 +703,12 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
setintV(L->top++, J->exitno); setintV(L->top++, J->exitno);
setintV(L->top++, RID_NUM_GPR); setintV(L->top++, RID_NUM_GPR);
setintV(L->top++, RID_NUM_FPR); setintV(L->top++, RID_NUM_FPR);
for (i = 0; i < RID_NUM_GPR; i++) for (i = 0; i < RID_NUM_GPR; i++) {
setnumV(L->top++, cast_num(ex->gpr[i])); if (sizeof(ex->gpr[i]) == sizeof(int32_t))
setintV(L->top++, (int32_t)ex->gpr[i]);
else
setnumV(L->top++, (lua_Number)ex->gpr[i]);
}
for (i = 0; i < RID_NUM_FPR; i++) { for (i = 0; i < RID_NUM_FPR; i++) {
setnumV(L->top, ex->fpr[i]); setnumV(L->top, ex->fpr[i]);
if (LJ_UNLIKELY(tvisnan(L->top))) if (LJ_UNLIKELY(tvisnan(L->top)))