From bfce3c1127fd57fe0c935c92bcf45b4737041edd Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 10 Mar 2011 01:57:24 +0100 Subject: [PATCH] DUALNUM: Handle integer type in JIT compiler. --- src/Makefile.dep | 9 +- src/lj_asm.c | 35 +++--- src/lj_crecord.c | 35 +++--- src/lj_ffrecord.c | 39 +++--- src/lj_ir.c | 26 ---- src/lj_ir.h | 30 +++-- src/lj_iropt.h | 12 +- src/lj_meta.c | 28 +++-- src/lj_meta.h | 2 +- src/lj_obj.h | 2 - src/lj_opt_fold.c | 5 +- src/lj_opt_loop.c | 9 +- src/lj_opt_narrow.c | 235 ++++++++++++++++++++++++++++++------ src/lj_record.c | 282 ++++++++++++++++++++++++-------------------- src/lj_snap.c | 3 +- src/lj_trace.c | 12 +- 16 files changed, 486 insertions(+), 278 deletions(-) diff --git a/src/Makefile.dep b/src/Makefile.dep index 1684ebd7..8458ec78 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep @@ -128,15 +128,16 @@ lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ lj_arch.h lj_str.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ - lj_dispatch.h lj_traceerr.h + lj_dispatch.h lj_traceerr.h lj_vm.h lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \ - lj_arch.h + lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h \ + lj_vm.h lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_state.h \ lj_bc.h lj_ctype.h lj_lex.h lj_parse.h lj_vm.h lj_vmevent.h lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \ - lj_ffdef.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ + lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ + lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ diff --git a/src/lj_asm.c b/src/lj_asm.c index 5f3c5fab..d395010d 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -2059,7 +2059,7 @@ static void asm_href(ASMState *as, IRIns *ir) } else { emit_sjcc(as, CC_P, l_next); emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n)); - emit_sjcc(as, CC_A, l_next); + emit_sjcc(as, CC_AE, l_next); /* The type check avoids NaN penalties and complaints from Valgrind. */ #if LJ_64 emit_u32(as, LJ_TISNUM); @@ -2388,7 +2388,8 @@ static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) static void asm_ahuvload(ASMState *as, IRIns *ir) { - lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t)); + lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || + (LJ_DUALNUM && irt_isint(ir->t))); #if LJ_64 if (irt_islightud(ir->t)) { Reg dest = asm_load_lightud64(as, ir, 1); @@ -2409,8 +2410,9 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) } /* Always do the type check, even if the load result is unused. */ as->mrm.ofs += 4; - asm_guardcc(as, irt_isnum(ir->t) ? CC_A : CC_NE); - if (LJ_64 && irt_isnum(ir->t)) { + asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE); + if (LJ_64 && irt_type(ir->t) >= IRT_NUM) { + lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t)); emit_u32(as, LJ_TISNUM); emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM); } else { @@ -2443,7 +2445,7 @@ static void asm_ahustore(ASMState *as, IRIns *ir) if (ra_hasreg(src)) { emit_mrm(as, XO_MOVto, src, RID_MRM); } else if (!irt_ispri(irr->t)) { - lua_assert(irt_isaddr(ir->t)); + lua_assert(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t))); emit_i32(as, irr->i); emit_mrm(as, XO_MOVmi, 0, RID_MRM); } @@ -2460,8 +2462,9 @@ static void asm_sload(ASMState *as, IRIns *ir) Reg base; lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); - lua_assert(!irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); - if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t)) { + lua_assert(LJ_DUALNUM || + !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); + if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { Reg left = ra_scratch(as, RSET_FPR); asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ base = ra_alloc1(as, REF_BASE, RSET_GPR); @@ -2481,12 +2484,14 @@ static void asm_sload(ASMState *as, IRIns *ir) Reg dest = ra_dest(as, ir, allow); base = ra_alloc1(as, REF_BASE, RSET_GPR); lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); - if ((ir->op2 & IRSLOAD_CONVERT)) - emit_rmro(as, XO_CVTSD2SI, dest, base, ofs); - else if (irt_isnum(t)) + if ((ir->op2 & IRSLOAD_CONVERT)) { + t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ + emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTSD2SI, dest, base, ofs); + } else if (irt_isnum(t)) { emit_rmro(as, XMM_MOVRM(as), dest, base, ofs); - else + } else { emit_rmro(as, XO_MOV, dest, base, ofs); + } } else { if (!(ir->op2 & IRSLOAD_TYPECHECK)) return; /* No type check: avoid base alloc. */ @@ -2494,8 +2499,9 @@ static void asm_sload(ASMState *as, IRIns *ir) } if ((ir->op2 & IRSLOAD_TYPECHECK)) { /* Need type check, even if the load result is unused. */ - asm_guardcc(as, irt_isnum(t) ? CC_A : CC_NE); - if (LJ_64 && irt_isnum(t)) { + asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE); + if (LJ_64 && irt_type(t) >= IRT_NUM) { + lua_assert(irt_isinteger(t) || irt_isnum(t)); emit_u32(as, LJ_TISNUM); emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4); } else { @@ -3408,7 +3414,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) Reg src = ra_alloc1(as, ref, RSET_FPR); emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs); } else { - lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t)); + lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || + (LJ_DUALNUM && irt_isinteger(ir->t))); if (!irref_isk(ref)) { Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs); diff --git a/src/lj_crecord.c b/src/lj_crecord.c index c93cece3..9482cc18 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c @@ -185,6 +185,8 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp, (sinfo & CTF_UNSIGNED) ? 0 : IRCONV_SEXT); else if (dsize < 8 && ssize == 8) /* Truncate from 64 bit integer. */ sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, 0); + else if (ssize <= 4) + sp = lj_opt_narrow_toint(J, sp); xstore: if (dt == IRT_I64 || dt == IRT_U64) lj_needsplit(J); if (dp == 0) return sp; @@ -355,10 +357,10 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, TValue *sval) CType *s; if (LJ_LIKELY(tref_isinteger(sp))) { sid = CTID_INT32; - svisnz = (void *)(intptr_t)(numV(sval) != 0); + svisnz = (void *)(intptr_t)(tvisint(sval)?(intV(sval)!=0):!tviszero(sval)); } else if (tref_isnum(sp)) { sid = CTID_DOUBLE; - svisnz = (void *)(intptr_t)(numV(sval) != 0); + svisnz = (void *)(intptr_t)(tvisint(sval)?(intV(sval)!=0):!tviszero(sval)); } else if (tref_isbool(sp)) { sp = lj_ir_kint(J, tref_istrue(sp) ? 1 : 0); sid = CTID_BOOL; @@ -443,16 +445,16 @@ static CTypeID crec_constructor(jit_State *J, GCcdata *cd, TRef tr) static TRef crec_reassoc_ofs(jit_State *J, TRef tr, ptrdiff_t *ofsp, MSize sz) { IRIns *ir = IR(tref_ref(tr)); - if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && - ir->o == IR_ADD && irref_isk(ir->op2)) { + if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && irref_isk(ir->op2) && + (ir->o == IR_ADD || ir->o == IR_ADDOV || ir->o == IR_SUBOV)) { IRIns *irk = IR(ir->op2); - tr = ir->op1; -#if LJ_64 - if (irk->o == IR_KINT64) - *ofsp += (ptrdiff_t)ir_kint64(irk)->u64 * sz; + ptrdiff_t k; + if (LJ_64 && irk->o == IR_KINT64) + k = (ptrdiff_t)ir_kint64(irk)->u64 * sz; else -#endif - *ofsp += (ptrdiff_t)irk->i * sz; + k = (ptrdiff_t)irk->i * sz; + if (ir->o == IR_SUBOV) *ofsp -= k; else *ofsp += k; + tr = ir->op1; /* Not a TRef, but the caller doesn't care. */ } return tr; } @@ -477,16 +479,7 @@ void LJ_FASTCALL recff_cdata_index(jit_State *J, RecordFFData *rd) idx = J->base[1]; if (tref_isnumber(idx)) { - /* The size of a ptrdiff_t is target-specific. */ -#if LJ_64 - if (tref_isnum(idx)) - idx = emitconv(idx, IRT_I64, IRT_NUM, IRCONV_TRUNC|IRCONV_ANY); - else - idx = emitconv(idx, IRT_I64, IRT_INT, IRCONV_SEXT); -#else - if (tref_isnum(idx)) - idx = emitconv(idx, IRT_INT, IRT_NUM, IRCONV_TRUNC|IRCONV_ANY); -#endif + idx = lj_opt_narrow_cindex(J, idx); integer_key: if (ctype_ispointer(ct->info)) { CTSize sz; @@ -635,7 +628,7 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id) TRef sp, dp; TValue tv; TValue *sval = &tv; - setnumV(&tv, 0); + setintV(&tv, 0); if (!gcref(df->name)) continue; /* Ignore unnamed fields. */ dc = ctype_rawchild(cts, df); /* Field type. */ if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info))) diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index 631321d9..8077bf84 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c @@ -63,9 +63,9 @@ typedef void (LJ_FASTCALL *RecordFunc)(jit_State *J, RecordFFData *rd); /* Get runtime value of int argument. */ static int32_t argv2int(jit_State *J, TValue *o) { - if (!tvisnum(o) && !(tvisstr(o) && lj_str_tonum(strV(o), o))) + if (!tvisnumber(o) && !(tvisstr(o) && lj_str_tonumber(strV(o), o))) lj_trace_err(J, LJ_TRERR_BADTYPE); - return lj_num2bit(numV(o)); + return tvisint(o) ? intV(o) : lj_num2int(numV(o)); } /* Get runtime value of string argument. */ @@ -75,9 +75,12 @@ static GCstr *argv2str(jit_State *J, TValue *o) return strV(o); } else { GCstr *s; - if (!tvisnum(o)) + if (!tvisnumber(o)) lj_trace_err(J, LJ_TRERR_BADTYPE); - s = lj_str_fromnum(J->L, &o->n); + if (tvisint(o)) + s = lj_str_fromint(J->L, intV(o)); + else + s = lj_str_fromnum(J->L, &o->n); setstrV(J->L, o, s); return s; } @@ -128,7 +131,7 @@ static void LJ_FASTCALL recff_type(jit_State *J, RecordFFData *rd) { /* Arguments already specialized. Result is a constant string. Neat, huh? */ uint32_t t; - if (tvisnum(&rd->argv[0])) + if (tvisnumber(&rd->argv[0])) t = ~LJ_TNUMX; else if (LJ_64 && tvislightud(&rd->argv[0])) t = ~LJ_TLIGHTUD; @@ -255,7 +258,7 @@ static void LJ_FASTCALL recff_tonumber(jit_State *J, RecordFFData *rd) TRef tr = J->base[0]; TRef base = J->base[1]; if (tr && base) { - base = lj_ir_toint(J, base); + base = lj_opt_narrow_toint(J, base); if (!tref_isk(base) || IR(tref_ref(base))->i != 10) recff_nyiu(J); } @@ -332,12 +335,12 @@ static void LJ_FASTCALL recff_ipairs_aux(jit_State *J, RecordFFData *rd) RecordIndex ix; ix.tab = J->base[0]; if (tref_istab(ix.tab)) { - if (!tvisnum(&rd->argv[1])) /* No support for string coercion. */ + if (!tvisnumber(&rd->argv[1])) /* No support for string coercion. */ lj_trace_err(J, LJ_TRERR_BADTYPE); - setnumV(&ix.keyv, numV(&rd->argv[1])+(lua_Number)1); + setintV(&ix.keyv, numberVint(&rd->argv[1])+1); settabV(J->L, &ix.tabv, tabV(&rd->argv[0])); ix.val = 0; ix.idxchain = 0; - ix.key = lj_ir_toint(J, J->base[1]); + ix.key = lj_opt_narrow_toint(J, J->base[1]); J->base[0] = ix.key = emitir(IRTI(IR_ADD), ix.key, lj_ir_kint(J, 1)); J->base[1] = lj_record_idx(J, &ix); rd->nres = tref_isnil(J->base[1]) ? 0 : 2; @@ -525,26 +528,26 @@ static void LJ_FASTCALL recff_math_random(jit_State *J, RecordFFData *rd) /* Record unary bit.tobit, bit.bnot, bit.bswap. */ static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd) { - TRef tr = lj_ir_tobit(J, J->base[0]); + TRef tr = lj_opt_narrow_tobit(J, J->base[0]); J->base[0] = (rd->data == IR_TOBIT) ? tr : emitir(IRTI(rd->data), tr, 0); } /* Record N-ary bit.band, bit.bor, bit.bxor. */ static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd) { - TRef tr = lj_ir_tobit(J, J->base[0]); + TRef tr = lj_opt_narrow_tobit(J, J->base[0]); uint32_t op = rd->data; BCReg i; for (i = 1; J->base[i] != 0; i++) - tr = emitir(IRTI(op), tr, lj_ir_tobit(J, J->base[i])); + tr = emitir(IRTI(op), tr, lj_opt_narrow_tobit(J, J->base[i])); J->base[0] = tr; } /* Record bit shifts. */ static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd) { - TRef tr = lj_ir_tobit(J, J->base[0]); - TRef tsh = lj_ir_tobit(J, J->base[1]); + TRef tr = lj_opt_narrow_tobit(J, J->base[0]); + TRef tsh = lj_opt_narrow_tobit(J, J->base[1]); if (!(rd->data < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && !tref_isk(tsh)) tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31)); @@ -570,25 +573,25 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd) int32_t start, end; if (rd->data) { /* string.sub(str, start [,end]) */ start = argv2int(J, &rd->argv[1]); - trstart = lj_ir_toint(J, J->base[1]); + trstart = lj_opt_narrow_toint(J, J->base[1]); trend = J->base[2]; if (tref_isnil(trend)) { trend = lj_ir_kint(J, -1); end = -1; } else { - trend = lj_ir_toint(J, trend); + trend = lj_opt_narrow_toint(J, trend); end = argv2int(J, &rd->argv[2]); } } else { /* string.byte(str, [,start [,end]]) */ if (J->base[1]) { start = argv2int(J, &rd->argv[1]); - trstart = lj_ir_toint(J, J->base[1]); + trstart = lj_opt_narrow_toint(J, J->base[1]); trend = J->base[2]; if (tref_isnil(trend)) { trend = trstart; end = start; } else { - trend = lj_ir_toint(J, trend); + trend = lj_opt_narrow_toint(J, trend); end = argv2int(J, &rd->argv[2]); } } else { diff --git a/src/lj_ir.c b/src/lj_ir.c index 1d57938e..721cfd0f 100644 --- a/src/lj_ir.c +++ b/src/lj_ir.c @@ -426,32 +426,6 @@ TRef LJ_FASTCALL lj_ir_tostr(jit_State *J, TRef tr) return tr; } -/* Convert from number or string to bitop operand (overflow wrapped). */ -TRef LJ_FASTCALL lj_ir_tobit(jit_State *J, TRef tr) -{ - if (!tref_isinteger(tr)) { - if (tref_isstr(tr)) - tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); - else if (!tref_isnum(tr)) - lj_trace_err(J, LJ_TRERR_BADTYPE); - tr = emitir(IRTI(IR_TOBIT), tr, lj_ir_knum_tobit(J)); - } - return tr; -} - -/* Convert from number or string to integer (overflow undefined). */ -TRef LJ_FASTCALL lj_ir_toint(jit_State *J, TRef tr) -{ - if (!tref_isinteger(tr)) { - if (tref_isstr(tr)) - tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); - else if (!tref_isnum(tr)) - lj_trace_err(J, LJ_TRERR_BADTYPE); - tr = emitir(IRTI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_ANY); - } - return tr; -} - /* -- Miscellaneous IR ops ------------------------------------------------ */ /* Evaluate numeric comparison. */ diff --git a/src/lj_ir.h b/src/lj_ir.h index 060cf562..c46bbbe0 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h @@ -124,7 +124,7 @@ _(XBAR, S , ___, ___) \ \ /* Type conversions. */ \ - _(CONV, N , ref, lit) \ + _(CONV, NW, ref, lit) \ _(TOBIT, N , ref, ref) \ _(TOSTR, N , ref, ___) \ _(STRTO, N , ref, ___) \ @@ -345,8 +345,8 @@ typedef enum { #define IRM_AW (IRM_A|IRM_W) #define IRM_LW (IRM_L|IRM_W) -#define irm_op1(m) (cast(IRMode, (m)&3)) -#define irm_op2(m) (cast(IRMode, ((m)>>2)&3)) +#define irm_op1(m) ((IRMode)((m)&3)) +#define irm_op2(m) ((IRMode)(((m)>>2)&3)) #define irm_iscomm(m) ((m) & IRM_C) #define irm_kind(m) ((m) & IRM_S) @@ -401,8 +401,8 @@ typedef struct IRType1 { uint8_t irt; } IRType1; #define IRTG(o, t) (IRT((o), IRT_GUARD|(t))) #define IRTGI(o) (IRT((o), IRT_GUARD|IRT_INT)) -#define irt_t(t) (cast(IRType, (t).irt)) -#define irt_type(t) (cast(IRType, (t).irt & IRT_TYPE)) +#define irt_t(t) ((IRType)(t).irt) +#define irt_type(t) ((IRType)((t).irt & IRT_TYPE)) #define irt_sametype(t1, t2) ((((t1).irt ^ (t2).irt) & IRT_TYPE) == 0) #define irt_typerange(t, first, last) \ ((uint32_t)((t).irt & IRT_TYPE) - (uint32_t)(first) <= (uint32_t)(last-first)) @@ -441,18 +441,30 @@ typedef struct IRType1 { uint8_t irt; } IRType1; static LJ_AINLINE IRType itype2irt(const TValue *tv) { - if (tvisnum(tv)) + if (tvisint(tv)) + return IRT_INT; + else if (tvisnum(tv)) return IRT_NUM; #if LJ_64 else if (tvislightud(tv)) return IRT_LIGHTUD; #endif else - return cast(IRType, ~itype(tv)); + return (IRType)~itype(tv); } -#define irt_toitype(t) \ - check_exp(!(LJ_64 && irt_islightud((t))), ~(uint32_t)irt_type((t))) +static LJ_AINLINE uint32_t irt_toitype_(IRType t) +{ + lua_assert(!LJ_64 || t != IRT_LIGHTUD); + if (LJ_DUALNUM && t > IRT_NUM) { + return LJ_TISNUM; + } else { + lua_assert(t <= IRT_NUM); + return ~(uint32_t)t; + } +} + +#define irt_toitype(t) irt_toitype_(irt_type((t))) #define irt_isguard(t) ((t).irt & IRT_GUARD) #define irt_ismarked(t) ((t).irt & IRT_MARK) diff --git a/src/lj_iropt.h b/src/lj_iropt.h index db99c118..1c94e91c 100644 --- a/src/lj_iropt.h +++ b/src/lj_iropt.h @@ -84,8 +84,6 @@ LJ_FUNC void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir); /* Convert IR operand types. */ LJ_FUNC TRef LJ_FASTCALL lj_ir_tonum(jit_State *J, TRef tr); LJ_FUNC TRef LJ_FASTCALL lj_ir_tostr(jit_State *J, TRef tr); -LJ_FUNC TRef LJ_FASTCALL lj_ir_tobit(jit_State *J, TRef tr); -LJ_FUNC TRef LJ_FASTCALL lj_ir_toint(jit_State *J, TRef tr); /* Miscellaneous IR ops. */ LJ_FUNC int lj_ir_numcmp(lua_Number a, lua_Number b, IROp op); @@ -134,9 +132,17 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_dse_xstore(jit_State *J); /* Narrowing. */ LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_convert(jit_State *J); +LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_index(jit_State *J, TRef key); +LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_toint(jit_State *J, TRef tr); +LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_tobit(jit_State *J, TRef tr); +#if LJ_HASFFI +LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef key); +#endif +LJ_FUNC TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc, + TValue *vb, TValue *vc, IROp op); LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc); LJ_FUNC TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc); -LJ_FUNC IRType lj_opt_narrow_forl(cTValue *forbase); +LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase); /* Optimization passes. */ LJ_FUNC void lj_opt_dce(jit_State *J); diff --git a/src/lj_meta.c b/src/lj_meta.c index 23f11f58..48cee510 100644 --- a/src/lj_meta.c +++ b/src/lj_meta.c @@ -393,13 +393,27 @@ void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o) lj_err_msg(L, LJ_ERR_FORLIM); if (!(tvisnumber(o+2) || (tvisstr(o+2) && lj_str_tonumber(strV(o+2), o+2)))) lj_err_msg(L, LJ_ERR_FORSTEP); -#if LJ_DUALNUM - /* Ensure all slots are integers or all slots are numbers. */ - if (!(tvisint(o) && tvisint(o+1) && tvisint(o+2))) { - if (tvisint(o)) setnumV(o, (lua_Number)intV(o)); - if (tvisint(o+1)) setnumV(o+1, (lua_Number)intV(o+1)); - if (tvisint(o+2)) setnumV(o+2, (lua_Number)intV(o+2)); + if (LJ_DUALNUM) { + /* Ensure all slots are integers or all slots are numbers. */ + int32_t k[3]; + int nint = 0; + ptrdiff_t i; + for (i = 0; i <= 2; i++) { + if (tvisint(o+i)) { + k[i] = intV(o+i); nint++; + } else { + k[i] = lj_num2int(numV(o+i)); nint += ((lua_Number)k[i] == numV(o+i)); + } + } + if (nint == 3) { /* Narrow to integers. */ + setintV(o, k[0]); + setintV(o+1, k[1]); + setintV(o+2, k[2]); + } else if (nint != 0) { /* Widen to numbers. */ + if (tvisint(o)) setnumV(o, (lua_Number)intV(o)); + if (tvisint(o+1)) setnumV(o+1, (lua_Number)intV(o+1)); + if (tvisint(o+2)) setnumV(o+2, (lua_Number)intV(o+2)); + } } -#endif } diff --git a/src/lj_meta.h b/src/lj_meta.h index 687e6c08..32b3dec3 100644 --- a/src/lj_meta.h +++ b/src/lj_meta.h @@ -29,6 +29,6 @@ LJ_FUNCA TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne); LJ_FUNCA TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins); LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op); LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top); -LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *base); +LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o); #endif diff --git a/src/lj_obj.h b/src/lj_obj.h index 88289f3e..19a2345f 100644 --- a/src/lj_obj.h +++ b/src/lj_obj.h @@ -325,8 +325,6 @@ typedef struct GCproto { #define proto_kgc(pt, idx) \ check_exp((uintptr_t)(intptr_t)(idx) >= (uintptr_t)-(intptr_t)(pt)->sizekgc, \ gcref(mref((pt)->k, GCRef)[(idx)])) -#define proto_knum(pt, idx) \ - check_exp((uintptr_t)(idx) < (pt)->sizekn, mref((pt)->k, lua_Number)[(idx)]) #define proto_knumtv(pt, idx) \ check_exp((uintptr_t)(idx) < (pt)->sizekn, &mref((pt)->k, TValue)[(idx)]) #define proto_bc(pt) ((BCIns *)((char *)(pt) + sizeof(GCproto))) diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 471a4b29..e2d5c517 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -558,7 +558,10 @@ LJFOLD(CONV KINT IRCONV_I64_INT) LJFOLD(CONV KINT IRCONV_U64_INT) LJFOLDF(kfold_conv_kint_i64) { - return INT64FOLD((uint64_t)(int64_t)fleft->i); + if ((fins->op2 & IRCONV_SEXT)) + return INT64FOLD((uint64_t)(int64_t)fleft->i); + else + return INT64FOLD((uint64_t)(int64_t)(uint32_t)fleft->i); } LJFOLD(CONV KINT64 IRCONV_NUM_I64) diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c index 559e579e..6dd06636 100644 --- a/src/lj_opt_loop.c +++ b/src/lj_opt_loop.c @@ -300,8 +300,11 @@ static void loop_unroll(jit_State *J) } /* Check all loop-carried dependencies for type instability. */ if (!irt_sametype(t, irr->t)) { - if (irt_isnum(t) && irt_isinteger(irr->t)) /* Fix int->num case. */ + if (irt_isnum(t) && irt_isinteger(irr->t)) /* Fix int->num. */ subst[ins] = tref_ref(emitir(IRTN(IR_CONV), ref, IRCONV_NUM_INT)); + else if (irt_isnum(irr->t) && irt_isinteger(t)) /* Fix num->int. */ + subst[ins] = tref_ref(emitir(IRTGI(IR_CONV), ref, + IRCONV_INT_NUM|IRCONV_CHECK)); else if (!(irt_isinteger(t) && irt_isinteger(irr->t))) lj_trace_err(J, LJ_TRERR_TYPEINS); } @@ -355,8 +358,8 @@ int lj_opt_loop(jit_State *J) int errcode = lj_vm_cpcall(J->L, NULL, J, cploop_opt); if (LJ_UNLIKELY(errcode)) { lua_State *L = J->L; - if (errcode == LUA_ERRRUN && tvisnum(L->top-1)) { /* Trace error? */ - int32_t e = lj_num2int(numV(L->top-1)); + if (errcode == LUA_ERRRUN && tvisnumber(L->top-1)) { /* Trace error? */ + int32_t e = numberVint(L->top-1); switch ((TraceError)e) { case LJ_TRERR_TYPEINS: /* Type instability. */ case LJ_TRERR_GFAIL: /* Guard would always fail. */ diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c index 0a2bb6cd..1727e9b5 100644 --- a/src/lj_opt_narrow.c +++ b/src/lj_opt_narrow.c @@ -1,5 +1,6 @@ /* ** NARROW: Narrowing of numbers to integers (double to int32_t). +** STRIPOV: Stripping of overflow checks. ** Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h */ @@ -16,6 +17,7 @@ #include "lj_jit.h" #include "lj_iropt.h" #include "lj_trace.h" +#include "lj_vm.h" /* Rationale for narrowing optimizations: ** @@ -57,24 +59,34 @@ ** ** A better solution is to keep all numbers as FP values and only narrow ** when it's beneficial to do so. LuaJIT uses predictive narrowing for -** induction variables and demand-driven narrowing for index expressions -** and bit operations. Additionally it can eliminate or hoists most of the -** resulting overflow checks. Regular arithmetic computations are never -** narrowed to integers. +** induction variables and demand-driven narrowing for index expressions, +** integer arguments and bit operations. Additionally it can eliminate or +** hoist most of the resulting overflow checks. Regular arithmetic +** computations are never narrowed to integers. ** ** The integer type in the IR has convenient wrap-around semantics and ** ignores overflow. Extra operations have been added for ** overflow-checking arithmetic (ADDOV/SUBOV) instead of an extra type. ** Apart from reducing overall complexity of the compiler, this also ** nicely solves the problem where you want to apply algebraic -** simplifications to ADD, but not to ADDOV. And the assembler can use lea -** instead of an add for integer ADD, but not for ADDOV (lea does not -** affect the flags, but it helps to avoid register moves). +** simplifications to ADD, but not to ADDOV. And the x86/x64 assembler can +** use lea instead of an add for integer ADD, but not for ADDOV (lea does +** not affect the flags, but it helps to avoid register moves). ** -** Note that all of the above has to be reconsidered if LuaJIT is to be -** ported to architectures with slow FP operations or with no hardware FPU -** at all. In the latter case an integer-only port may be the best overall -** solution (if this still meets user demands). +** +** All of the above has to be reconsidered for architectures with slow FP +** operations or without a hardware FPU. The dual-number mode of LuaJIT +** addresses this issue. Arithmetic operations are performed on integers +** as far as possible and overflow checks are added as needed. +** +** This implies that narrowing for integer arguments and bit operations +** should also strip overflow checks, e.g. replace ADDOV with ADD. The +** original overflow guards are weak and can be eliminated by DCE, if +** there's no other use. +** +** A slight twist is that it's usually beneficial to use overflow-checked +** integer arithmetics if all inputs are already integers. This is the only +** change that affects the single-number mode, too. */ /* Some local macros to save typing. Undef'd at the end. */ @@ -94,10 +106,10 @@ ** already takes care of eliminating simple redundant conversions like ** CONV.int.num(CONV.num.int(x)) ==> x. ** -** But the surrounding code is FP-heavy and all arithmetic operations are -** performed on FP numbers. Consider a common example such as 'x=t[i+1]', -** with 'i' already an integer (due to induction variable narrowing). The -** index expression would be recorded as +** But the surrounding code is FP-heavy and arithmetic operations are +** performed on FP numbers (for the single-number mode). Consider a common +** example such as 'x=t[i+1]', with 'i' already an integer (due to induction +** variable narrowing). The index expression would be recorded as ** CONV.int.num(ADD(CONV.num.int(i), 1)) ** which is clearly suboptimal. ** @@ -113,6 +125,9 @@ ** FP ops remain in the IR and are eliminated by DCE since all references to ** them are gone. ** +** [In dual-number mode the trace recorder already emits ADDOV etc., but +** this can be further reduced. See below.] +** ** Special care has to be taken to avoid narrowing across an operation ** which is potentially operating on non-integral operands. One obvious ** case is when an expression contains a non-integral constant, but ends @@ -221,6 +236,26 @@ static void narrow_bpc_set(jit_State *J, IRRef1 key, IRRef1 val, IRRef mode) bp->mode = mode; } +/* Backpropagate overflow stripping. */ +static void narrow_stripov_backprop(NarrowConv *nc, IRRef ref, int depth) +{ + jit_State *J = nc->J; + IRIns *ir = IR(ref); + if (ir->o == IR_ADDOV || ir->o == IR_SUBOV || + (ir->o == IR_MULOV && (nc->mode & IRCONV_CONVMASK) == IRCONV_ANY)) { + BPropEntry *bp = narrow_bpc_get(nc->J, ref, IRCONV_TOBIT); + if (bp) { + ref = bp->val; + } else if (++depth < NARROW_MAX_BACKPROP && nc->sp < nc->maxsp) { + narrow_stripov_backprop(nc, ir->op1, depth); + narrow_stripov_backprop(nc, ir->op2, depth); + *nc->sp++ = NARROWINS(IRT(ir->o - IR_ADDOV + IR_ADD, IRT_INT), ref); + return; + } + } + *nc->sp++ = NARROWINS(NARROW_REF, ref); +} + /* Backpropagate narrowing conversion. Return number of needed conversions. */ static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth) { @@ -230,24 +265,26 @@ static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth) /* Check the easy cases first. */ if (ir->o == IR_CONV && (ir->op2 & IRCONV_SRCMASK) == IRT_INT) { - if (nc->t == IRT_I64) - *nc->sp++ = NARROWINS(NARROW_SEXT, ir->op1); /* Reduce to sign-ext. */ + if ((nc->mode & IRCONV_CONVMASK) <= IRCONV_ANY) + narrow_stripov_backprop(nc, ir->op1, depth+1); else *nc->sp++ = NARROWINS(NARROW_REF, ir->op1); /* Undo conversion. */ + if (nc->t == IRT_I64) + *nc->sp++ = NARROWINS(NARROW_SEXT, 0); /* Sign-extend integer. */ return 0; } else if (ir->o == IR_KNUM) { /* Narrow FP constant. */ lua_Number n = ir_knum(ir)->n; if ((nc->mode & IRCONV_CONVMASK) == IRCONV_TOBIT) { /* Allows a wider range of constants. */ int64_t k64 = (int64_t)n; - if (n == cast_num(k64)) { /* Only if constant doesn't lose precision. */ + if (n == (lua_Number)k64) { /* Only if const doesn't lose precision. */ *nc->sp++ = NARROWINS(NARROW_INT, 0); *nc->sp++ = (NarrowIns)k64; /* But always truncate to 32 bits. */ return 0; } } else { int32_t k = lj_num2int(n); - if (n == cast_num(k)) { /* Only if constant is really an integer. */ + if (n == (lua_Number)k) { /* Only if constant is really an integer. */ *nc->sp++ = NARROWINS(NARROW_INT, 0); *nc->sp++ = (NarrowIns)k; return 0; @@ -287,7 +324,8 @@ static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth) mode = (IRT_INT<<5)|IRT_NUM|IRCONV_INDEX; bp = narrow_bpc_get(nc->J, (IRRef1)ref, mode); if (bp) { - *nc->sp++ = NARROWINS(NARROW_SEXT, bp->val); + *nc->sp++ = NARROWINS(NARROW_REF, bp->val); + *nc->sp++ = NARROWINS(NARROW_SEXT, 0); return 0; } } @@ -326,8 +364,9 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc) } else if (op == NARROW_CONV) { *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */ } else if (op == NARROW_SEXT) { - *sp++ = emitir(IRT(IR_CONV, IRT_I64), ref, - (IRT_I64<<5)|IRT_INT|IRCONV_SEXT); + lua_assert(sp >= nc->stack+1); + sp[-1] = emitir(IRT(IR_CONV, IRT_I64), sp[-1], + (IRT_I64<<5)|IRT_INT|IRCONV_SEXT); } else if (op == NARROW_INT) { lua_assert(next < last); *sp++ = nc->t == IRT_I64 ? @@ -340,7 +379,7 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc) /* Omit some overflow checks for array indexing. See comments above. */ if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) { if (next == last && irref_isk(narrow_ref(sp[0])) && - (uint32_t)IR(narrow_ref(sp[0]))->i + 0x40000000 < 0x80000000) + (uint32_t)IR(narrow_ref(sp[0]))->i + 0x40000000u < 0x80000000u) guardot = 0; else /* Otherwise cache a stronger check. */ mode += IRCONV_CHECK-IRCONV_INDEX; @@ -377,12 +416,123 @@ TRef LJ_FASTCALL lj_opt_narrow_convert(jit_State *J) return NEXTFOLD; } +/* -- Narrowing of implicit conversions ----------------------------------- */ + +/* Recursively strip overflow checks. */ +static TRef narrow_stripov(jit_State *J, TRef tr, int lastop, IRRef mode) +{ + IRRef ref = tref_ref(tr); + IRIns *ir = IR(ref); + int op = ir->o; + if (op >= IR_ADDOV && op <= lastop) { + BPropEntry *bp = narrow_bpc_get(J, ref, mode); + if (bp) { + return TREF(bp->val, irt_t(IR(bp->val)->t)); + } else { + IRRef op1 = ir->op1, op2 = ir->op2; /* The IR may be reallocated. */ + op1 = narrow_stripov(J, op1, lastop, mode); + op2 = narrow_stripov(J, op2, lastop, mode); + tr = emitir(IRT(op - IR_ADDOV + IR_ADD, + ((mode & IRCONV_DSTMASK) >> IRCONV_DSH)), op1, op2); + narrow_bpc_set(J, ref, tref_ref(tr), mode); + } + } else if (LJ_64 && (mode & IRCONV_SEXT) && !irt_is64(ir->t)) { + tr = emitir(IRT(IR_CONV, IRT_INTP), tr, mode); + } + return tr; +} + +/* Narrow array index. */ +TRef LJ_FASTCALL lj_opt_narrow_index(jit_State *J, TRef tr) +{ + IRIns *ir; + lua_assert(tref_isnumber(tr)); + if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */ + return emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_INDEX); + /* Omit some overflow checks for array indexing. See comments above. */ + ir = IR(tref_ref(tr)); + if ((ir->o == IR_ADDOV || ir->o == IR_SUBOV) && irref_isk(ir->op2) && + (uint32_t)IR(ir->op2)->i + 0x40000000u < 0x80000000u) + return emitir(IRTI(ir->o - IR_ADDOV + IR_ADD), ir->op1, ir->op2); + return tr; +} + +/* Narrow conversion to integer operand (overflow undefined). */ +TRef LJ_FASTCALL lj_opt_narrow_toint(jit_State *J, TRef tr) +{ + if (tref_isstr(tr)) + tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); + if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */ + return emitir(IRTI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_ANY); + if (!tref_isinteger(tr)) + lj_trace_err(J, LJ_TRERR_BADTYPE); + /* + ** Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. + ** Use IRCONV_TOBIT for the cache entries, since the semantics are the same. + */ + return narrow_stripov(J, tr, IR_MULOV, (IRT_INT<<5)|IRT_INT|IRCONV_TOBIT); +} + +/* Narrow conversion to bitop operand (overflow wrapped). */ +TRef LJ_FASTCALL lj_opt_narrow_tobit(jit_State *J, TRef tr) +{ + if (tref_isstr(tr)) + tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); + if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */ + return emitir(IRTI(IR_TOBIT), tr, lj_ir_knum_tobit(J)); + if (!tref_isinteger(tr)) + lj_trace_err(J, LJ_TRERR_BADTYPE); + /* + ** Wrapped overflow semantics allow stripping of ADDOV and SUBOV. + ** MULOV cannot be stripped due to precision widening. + */ + return narrow_stripov(J, tr, IR_SUBOV, (IRT_INT<<5)|IRT_INT|IRCONV_TOBIT); +} + +#if LJ_HASFFI +/* Narrow C array index (overflow undefined). */ +TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr) +{ + lua_assert(tref_isnumber(tr)); + if (tref_isnum(tr)) + return emitir(IRTI(IR_CONV), tr, + (IRT_INTP<<5)|IRT_NUM|IRCONV_TRUNC|IRCONV_ANY); + /* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */ + return narrow_stripov(J, tr, IR_MULOV, + LJ_64 ? ((IRT_INTP<<5)|IRT_INT|IRCONV_SEXT) : + ((IRT_INTP<<5)|IRT_INT|IRCONV_TOBIT)); +} +#endif + /* -- Narrowing of arithmetic operators ----------------------------------- */ /* Check whether a number fits into an int32_t (-0 is ok, too). */ static int numisint(lua_Number n) { - return (n == cast_num(lj_num2int(n))); + return (n == (lua_Number)lj_num2int(n)); +} + +/* Narrowing of arithmetic operations. */ +TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc, + TValue *vb, TValue *vc, IROp op) +{ + if (tref_isstr(rb)) { + rb = emitir(IRTG(IR_STRTO, IRT_NUM), rb, 0); + lj_str_tonum(strV(vb), vb); + } + if (tref_isstr(rc)) { + rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0); + lj_str_tonum(strV(vc), vc); + } + /* Must not narrow MUL in non-DUALNUM variant, because it loses -0. */ + if ((op >= IR_ADD && op <= (LJ_DUALNUM ? IR_MUL : IR_SUB)) && + tref_isinteger(rb) && tref_isinteger(rc) && + numisint(lj_vm_foldarith(numberVnum(vb), numberVnum(vc), + (int)op - (int)IR_ADD))) + return emitir(IRTGI((int)op - (int)IR_ADD + (int)IR_ADDOV), rb, rc); + if (!tref_isnum(rb)) rb = emitir(IRTN(IR_CONV), rb, IRCONV_NUM_INT); + if (!tref_isnum(rc)) rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT); + return emitir(IRTN(op), rb, rc); } /* Narrowing of modulo operator. */ @@ -409,16 +559,15 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc) /* Narrowing of power operator or math.pow. */ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc) { - lua_Number n; if (tvisstr(vc) && !lj_str_tonum(strV(vc), vc)) lj_trace_err(J, LJ_TRERR_BADTYPE); - n = numV(vc); /* Narrowing must be unconditional to preserve (-x)^i semantics. */ - if (numisint(n)) { + if (tvisint(vc) || numisint(numV(vc))) { int checkrange = 0; /* Split pow is faster for bigger exponents. But do this only for (+k)^i. */ if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) { - if (!(n >= -65536.0 && n <= 65536.0)) goto split_pow; + int32_t k = numberVint(vc); + if (!(k >= -65536 && k <= 65536)) goto split_pow; checkrange = 1; } if (!tref_isinteger(rc)) { @@ -448,20 +597,28 @@ split_pow: /* -- Predictive narrowing of induction variables ------------------------- */ -/* Narrow the FORL index type by looking at the runtime values. */ -IRType lj_opt_narrow_forl(cTValue *forbase) +/* Narrow a single runtime value. */ +static int narrow_forl(jit_State *J, cTValue *o) { - lua_assert(tvisnum(&forbase[FORL_IDX]) && - tvisnum(&forbase[FORL_STOP]) && - tvisnum(&forbase[FORL_STEP])); + if (tvisint(o)) return 1; + if (LJ_DUALNUM || (J->flags & JIT_F_OPT_NARROW)) return numisint(numV(o)); + return 0; +} + +/* Narrow the FORL index type by looking at the runtime values. */ +IRType lj_opt_narrow_forl(jit_State *J, cTValue *tv) +{ + lua_assert(tvisnumber(&tv[FORL_IDX]) && + tvisnumber(&tv[FORL_STOP]) && + tvisnumber(&tv[FORL_STEP])); /* Narrow only if the runtime values of start/stop/step are all integers. */ - if (numisint(numV(&forbase[FORL_IDX])) && - numisint(numV(&forbase[FORL_STOP])) && - numisint(numV(&forbase[FORL_STEP]))) { + if (narrow_forl(J, &tv[FORL_IDX]) && + narrow_forl(J, &tv[FORL_STOP]) && + narrow_forl(J, &tv[FORL_STEP])) { /* And if the loop index can't possibly overflow. */ - lua_Number step = numV(&forbase[FORL_STEP]); - lua_Number sum = numV(&forbase[FORL_STOP]) + step; - if (0 <= step ? sum <= 2147483647.0 : sum >= -2147483648.0) + lua_Number step = numberVnum(&tv[FORL_STEP]); + lua_Number sum = numberVnum(&tv[FORL_STOP]) + step; + if (0 <= step ? (sum <= 2147483647.0) : (sum >= -2147483648.0)) return IRT_INT; } return IRT_NUM; diff --git a/src/lj_record.c b/src/lj_record.c index 2bfd2608..613e458e 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -13,6 +13,7 @@ #include "lj_err.h" #include "lj_str.h" #include "lj_tab.h" +#include "lj_meta.h" #include "lj_frame.h" #include "lj_bc.h" #include "lj_ff.h" @@ -102,7 +103,7 @@ static void rec_check_slots(jit_State *J) lua_assert((J->slot[s+1] & TREF_FRAME)); depth++; } else { - if (tvisnum(tv)) + if (tvisnumber(tv)) lua_assert(tref_isnumber(tr)); /* Could be IRT_INT etc., too. */ else lua_assert(itype2irt(tv) == tref_type(tr)); @@ -197,6 +198,7 @@ typedef enum { static void canonicalize_slots(jit_State *J) { BCReg s; + if (LJ_DUALNUM) return; for (s = J->baseslot+J->maxslot-1; s >= 1; s--) { TRef tr = J->slot[s]; if (tref_isinteger(tr)) { @@ -254,16 +256,16 @@ static TRef find_kinit(jit_State *J, const BCIns *endpc, BCReg slot, IRType t) } if (op == BC_KSHORT) { int32_t k = (int32_t)(int16_t)bc_d(ins); - return t == IRT_INT ? lj_ir_kint(J, k) : lj_ir_knum(J, cast_num(k)); + return t == IRT_INT ? lj_ir_kint(J, k) : lj_ir_knum(J, (lua_Number)k); } else { - lua_Number n = proto_knum(J->pt, bc_d(ins)); + cTValue *tv = proto_knumtv(J->pt, bc_d(ins)); if (t == IRT_INT) { - int32_t k = lj_num2int(n); - if (n == cast_num(k)) /* -0 is ok here. */ + int32_t k = numberVint(tv); + if (tvisint(tv) || numV(tv) == (lua_Number)k) /* -0 is ok here. */ return lj_ir_kint(J, k); return 0; /* Type mismatch. */ } else { - return lj_ir_knum(J, n); + return lj_ir_knum(J, numberVnum(tv)); } } } @@ -273,41 +275,47 @@ static TRef find_kinit(jit_State *J, const BCIns *endpc, BCReg slot, IRType t) return 0; /* No assignment to this slot found? */ } +/* Load and optionally convert a FORI argument from a slot. */ +static TRef fori_load(jit_State *J, BCReg slot, IRType t, int mode) +{ + int conv = (tvisint(&J->L->base[slot]) != (t==IRT_INT)) ? IRSLOAD_CONVERT : 0; + return sloadt(J, (int32_t)slot, + t + (((mode & IRSLOAD_TYPECHECK) || + (conv && t == IRT_INT && !(mode >> 16))) ? + IRT_GUARD : 0), + mode + conv); +} + /* Peek before FORI to find a const initializer. Otherwise load from slot. */ -static TRef fori_arg(jit_State *J, const BCIns *fori, BCReg slot, IRType t) +static TRef fori_arg(jit_State *J, const BCIns *fori, BCReg slot, + IRType t, int mode) { TRef tr = J->base[slot]; if (!tr) { tr = find_kinit(J, fori, slot, t); if (!tr) - tr = sloadt(J, (int32_t)slot, - t == IRT_INT ? (IRT_INT|IRT_GUARD) : t, - t == IRT_INT ? (IRSLOAD_CONVERT|IRSLOAD_READONLY|IRSLOAD_INHERIT) : - (IRSLOAD_READONLY|IRSLOAD_INHERIT)); + tr = fori_load(J, slot, t, mode); } return tr; } -/* In-place coercion of FORI arguments. */ -static lua_Number for_coerce(jit_State *J, TValue *o) -{ - if (!tvisnum(o) && !(tvisstr(o) && lj_str_tonum(strV(o), o))) - lj_trace_err(J, LJ_TRERR_BADTYPE); - return numV(o); -} - -/* Simulate the runtime behavior of the FOR loop iterator. +/* Return the direction of the FOR loop iterator. ** It's important to exactly reproduce the semantics of the interpreter. */ -static LoopEvent for_iter(jit_State *J, IROp *op, BCReg ra, int isforl) +static int rec_for_direction(cTValue *o) { - TValue *forbase = &J->L->base[ra]; - lua_Number stopv = for_coerce(J, &forbase[FORL_STOP]); - lua_Number idxv = for_coerce(J, &forbase[FORL_IDX]); - lua_Number stepv = for_coerce(J, &forbase[FORL_STEP]); + return (tvisint(o) ? intV(o) : (int32_t)o->u32.hi) >= 0; +} + +/* Simulate the runtime behavior of the FOR loop iterator. */ +static LoopEvent rec_for_iter(IROp *op, cTValue *o, int isforl) +{ + lua_Number stopv = numberVnum(&o[FORL_STOP]); + lua_Number idxv = numberVnum(&o[FORL_IDX]); + lua_Number stepv = numberVnum(&o[FORL_STEP]); if (isforl) idxv += stepv; - if ((int32_t)forbase[FORL_STEP].u32.hi >= 0) { + if (rec_for_direction(&o[FORL_STEP])) { if (idxv <= stopv) { *op = IR_LE; return LOOPEV_ENTER; } *op = IR_GT; return LOOPEV_LEAVE; } else { @@ -316,44 +324,123 @@ static LoopEvent for_iter(jit_State *J, IROp *op, BCReg ra, int isforl) } } +/* Record checks for FOR loop overflow and step direction. */ +static void rec_for_check(jit_State *J, IRType t, int dir, TRef stop, TRef step) +{ + if (!tref_isk(step)) { + /* Non-constant step: need a guard for the direction. */ + TRef zero = (t == IRT_INT) ? lj_ir_kint(J, 0) : lj_ir_knum_zero(J); + emitir(IRTG(dir ? IR_GE : IR_LT, t), step, zero); + /* Add hoistable overflow checks for a narrowed FORL index. */ + if (t == IRT_INT) { + if (tref_isk(stop)) { + /* Constant stop: optimize check away or to a range check for step. */ + int32_t k = IR(tref_ref(stop))->i; + if (dir) { + if (k > 0) + emitir(IRTGI(IR_LE), step, lj_ir_kint(J, (int32_t)0x7fffffff-k)); + } else { + if (k < 0) + emitir(IRTGI(IR_GE), step, lj_ir_kint(J, (int32_t)0x80000000-k)); + } + } else { + /* Stop+step variable: need full overflow check. */ + TRef tr = emitir(IRTGI(IR_ADDOV), step, stop); + emitir(IRTI(IR_USE), tr, 0); /* ADDOV is weak. Avoid dead result. */ + } + } + } else if (t == IRT_INT && !tref_isk(stop)) { + /* Constant step: optimize overflow check to a range check for stop. */ + int32_t k = IR(tref_ref(step))->i; + k = (int32_t)(dir ? 0x7fffffff : 0x80000000) - k; + emitir(IRTGI(dir ? IR_LE : IR_GE), stop, lj_ir_kint(J, k)); + } +} + +/* Record a FORL instruction. */ +static void rec_for_loop(jit_State *J, const BCIns *fori, ScEvEntry *scev, + int init) +{ + BCReg ra = bc_a(*fori); + cTValue *tv = &J->L->base[ra]; + TRef idx = J->base[ra+FORL_IDX]; + IRType t = idx ? tref_type(idx) : + (init || LJ_DUALNUM) ? lj_opt_narrow_forl(J, tv) : IRT_NUM; + int mode = IRSLOAD_INHERIT + + ((!LJ_DUALNUM || tvisint(tv) == (t == IRT_INT)) ? IRSLOAD_READONLY : 0); + TRef stop = fori_arg(J, fori, ra+FORL_STOP, t, mode); + TRef step = fori_arg(J, fori, ra+FORL_STEP, t, mode); + int tc, dir = rec_for_direction(&tv[FORL_STEP]); + lua_assert(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI); + scev->t.irt = t; + scev->dir = dir; + scev->stop = tref_ref(stop); + scev->step = tref_ref(step); + if (init) + rec_for_check(J, t, dir, stop, step); + scev->start = tref_ref(find_kinit(J, fori, ra+FORL_IDX, IRT_INT)); + tc = (LJ_DUALNUM && + !(scev->start && irref_isk(scev->stop) && irref_isk(scev->step))) ? + IRSLOAD_TYPECHECK : 0; + if (tc) { + J->base[ra+FORL_STOP] = stop; + J->base[ra+FORL_STEP] = step; + } + if (!idx) + idx = fori_load(J, ra+FORL_IDX, t, + IRSLOAD_INHERIT + tc + (J->scev.start << 16)); + if (!init) + J->base[ra+FORL_IDX] = idx = emitir(IRT(IR_ADD, t), idx, step); + J->base[ra+FORL_EXT] = idx; + scev->idx = tref_ref(idx); + J->maxslot = ra+FORL_EXT+1; +} + /* Record FORL/JFORL or FORI/JFORI. */ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl) { BCReg ra = bc_a(*fori); - IROp op; - LoopEvent ev = for_iter(J, &op, ra, isforl); + TValue *tv = &J->L->base[ra]; TRef *tr = &J->base[ra]; - TRef idx, stop; + IROp op; + LoopEvent ev; + TRef stop; IRType t; if (isforl) { /* Handle FORL/JFORL opcodes. */ - TRef step; - idx = tr[FORL_IDX]; + TRef idx = tr[FORL_IDX]; if (tref_ref(idx) == J->scev.idx) { t = J->scev.t.irt; stop = J->scev.stop; - step = J->scev.step; + idx = emitir(IRT(IR_ADD, t), idx, J->scev.step); + tr[FORL_EXT] = tr[FORL_IDX] = idx; } else { - if (!idx) idx = sloadt(J, (int32_t)(ra+FORL_IDX), IRT_NUM, 0); - t = tref_type(idx); - stop = fori_arg(J, fori, ra+FORL_STOP, t); - step = fori_arg(J, fori, ra+FORL_STEP, t); + ScEvEntry scev; + rec_for_loop(J, fori, &scev, 0); + t = scev.t.irt; + stop = scev.stop; } - tr[FORL_IDX] = idx = emitir(IRT(IR_ADD, t), idx, step); } else { /* Handle FORI/JFORI opcodes. */ BCReg i; - t = IRT_NUM; + lj_meta_for(J->L, tv); + t = lj_opt_narrow_forl(J, tv); for (i = FORL_IDX; i <= FORL_STEP; i++) { - lua_assert(J->base[ra+i] != 0); /* Assumes the slots are already set. */ - tr[i] = lj_ir_tonum(J, J->base[ra+i]); + lua_assert(tref_isnumber_str(tr[i])); + if (tref_isstr(tr[i])) + tr[i] = emitir(IRTG(IR_STRTO, IRT_NUM), tr[i], 0); + if (t == IRT_INT) { + if (!tref_isinteger(tr[i])) + tr[i] = emitir(IRTI(IR_CONV), tr[i], IRCONV_INT_NUM|IRCONV_CHECK); + } else { + if (!tref_isnum(tr[i])) + tr[i] = emitir(IRTN(IR_CONV), tr[i], IRCONV_NUM_INT); + } } - idx = tr[FORL_IDX]; + tr[FORL_EXT] = tr[FORL_IDX]; stop = tr[FORL_STOP]; - if (!tref_isk(tr[FORL_STEP])) /* Non-const step: need direction guard. */ - emitir(IRTG(((op-IR_LT)>>1)+IR_LT, IRT_NUM), - tr[FORL_STEP], lj_ir_knum_zero(J)); + rec_for_check(J, t, rec_for_direction(&tv[FORL_STEP]), stop, tr[FORL_STEP]); } - tr[FORL_EXT] = idx; + ev = rec_for_iter(&op, tv, isforl); if (ev == LOOPEV_LEAVE) { J->maxslot = ra+FORL_EXT+1; J->pc = fori+1; @@ -363,7 +450,7 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl) } lj_snap_add(J); - emitir(IRTG(op, t), idx, stop); + emitir(IRTG(op, t), tr[FORL_IDX], stop); if (ev == LOOPEV_LEAVE) { J->maxslot = ra; @@ -870,7 +957,7 @@ static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize) if (ref == J->scev.idx) { int32_t stop; lua_assert(irt_isint(J->scev.t) && ir->o == IR_SLOAD); - stop = lj_num2int(numV(&(J->L->base - J->baseslot)[ir->op1 + FORL_STOP])); + stop = numberVint(&(J->L->base - J->baseslot)[ir->op1 + FORL_STOP]); /* Runtime value for stop of loop is within bounds? */ if ((int64_t)stop + ofs < (int64_t)asize) { /* Emit invariant bounds check for stop. */ @@ -897,15 +984,12 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix) /* Integer keys are looked up in the array part first. */ key = ix->key; if (tref_isnumber(key)) { - lua_Number n = numV(&ix->keyv); - int32_t k = lj_num2int(n); - lua_assert(tvisnum(&ix->keyv)); - /* Potential array key? */ - if ((MSize)k < LJ_MAX_ASIZE && n == cast_num(k)) { - TRef asizeref, ikey = key; - if (!tref_isinteger(ikey)) - ikey = emitir(IRTGI(IR_CONV), ikey, IRCONV_INT_NUM|IRCONV_INDEX); - asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE); + int32_t k = numberVint(&ix->keyv); + if (!tvisint(&ix->keyv) && numV(&ix->keyv) != (lua_Number)k) + k = LJ_MAX_ASIZE; + if ((MSize)k < LJ_MAX_ASIZE) { /* Potential array key? */ + TRef ikey = lj_opt_narrow_index(J, key); + TRef asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE); if ((MSize)k < t->asize) { /* Currently an array key? */ TRef arrayref; rec_idx_abc(J, asizeref, ikey, t->asize); @@ -1081,7 +1165,8 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) } else { keybarrier = 0; /* Previous non-nil value kept the key alive. */ } - if (tref_isinteger(ix->val)) /* Convert int to number before storing. */ + /* Convert int to number before storing. */ + if (!LJ_DUALNUM && tref_isinteger(ix->val)) ix->val = emitir(IRTN(IR_CONV), ix->val, IRCONV_NUM_INT); emitir(IRT(loadop+IRDELTA_L2S, tref_type(ix->val)), xref, ix->val); if (keybarrier || tref_isgcv(ix->val)) @@ -1135,7 +1220,8 @@ static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val) if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitive refs. */ return res; } else { /* Upvalue store. */ - if (tref_isinteger(val)) /* Convert int to number before storing. */ + /* Convert int to number before storing. */ + if (!LJ_DUALNUM && tref_isinteger(val)) val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); emitir(IRT(IR_USTORE, tref_type(val)), uref, val); if (needbarrier && tref_isgcv(val)) @@ -1455,16 +1541,15 @@ void lj_record_ins(jit_State *J) case BCMnone: rb = 0; rc = bc_d(ins); break; /* Upgrade rc to 'rd'. */ case BCMvar: copyTV(J->L, rbv, &lbase[rb]); ix.tab = rb = getslot(J, rb); break; - case BCMnum: { lua_Number n = proto_knum(J->pt, rb); - setnumV(rbv, n); ix.tab = rb = lj_ir_knumint(J, n); } break; default: break; /* Handled later. */ } switch (bcmode_c(op)) { case BCMvar: copyTV(J->L, rcv, &lbase[rc]); ix.key = rc = getslot(J, rc); break; case BCMpri: setitype(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break; - case BCMnum: { lua_Number n = proto_knum(J->pt, rc); - setnumV(rcv, n); ix.key = rc = lj_ir_knumint(J, n); } break; + case BCMnum: { cTValue *tv = proto_knumtv(J->pt, rc); + copyTV(J->L, rcv, tv); ix.key = rc = tvisint(tv) ? lj_ir_kint(J, intV(tv)) : + lj_ir_knumint(J, numV(tv)); } break; case BCMstr: { GCstr *s = gco2str(proto_kgc(J->pt, ~(ptrdiff_t)rc)); setstrV(J->L, rcv, s); ix.key = rc = lj_ir_kstr(J, s); } break; default: break; /* Handled later. */ @@ -1502,9 +1587,11 @@ void lj_record_ins(jit_State *J) irop = (int)op - (int)BC_ISLT + (int)IR_LT; if (ta == IRT_NUM) { if ((irop & 1)) irop ^= 4; /* ISGE/ISGT are unordered. */ - if (!lj_ir_numcmp(numV(rav), numV(rcv), (IROp)irop)) irop ^= 5; + if (!lj_ir_numcmp(numberVnum(rav), numberVnum(rcv), (IROp)irop)) + irop ^= 5; } else if (ta == IRT_INT) { - if (!lj_ir_numcmp(numV(rav), numV(rcv), (IROp)irop)) irop ^= 1; + if (!lj_ir_numcmp(numberVnum(rav), numberVnum(rcv), (IROp)irop)) + irop ^= 1; } else if (ta == IRT_STR) { if (!lj_ir_strcmp(strV(rav), strV(rcv), (IROp)irop)) irop ^= 1; ra = lj_ir_call(J, IRCALL_lj_str_cmp, ra, rc); @@ -1599,13 +1686,11 @@ void lj_record_ins(jit_State *J) case BC_ADDVN: case BC_SUBVN: case BC_MULVN: case BC_DIVVN: case BC_ADDVV: case BC_SUBVV: case BC_MULVV: case BC_DIVVV: { MMS mm = bcmode_mm(op); - if (tref_isnumber_str(rb) && tref_isnumber_str(rc)) { - rb = lj_ir_tonum(J, rb); - rc = lj_ir_tonum(J, rc); - rc = emitir(IRTN((int)mm - (int)MM_add + (int)IR_ADD), rb, rc); - } else { + if (tref_isnumber_str(rb) && tref_isnumber_str(rc)) + rc = lj_opt_narrow_arith(J, rb, rc, &ix.tabv, &ix.keyv, + (int)mm - (int)MM_add + (int)IR_ADD); + else rc = rec_mm_arith(J, &ix, mm); - } break; } @@ -1827,59 +1912,6 @@ void lj_record_ins(jit_State *J) /* -- Recording setup ----------------------------------------------------- */ -/* Setup recording for a FORL loop. */ -static void rec_setup_forl(jit_State *J, const BCIns *fori) -{ - BCReg ra = bc_a(*fori); - cTValue *forbase = &J->L->base[ra]; - IRType t = (J->flags & JIT_F_OPT_NARROW) ? lj_opt_narrow_forl(forbase) - : IRT_NUM; - TRef start; - TRef stop = fori_arg(J, fori, ra+FORL_STOP, t); - TRef step = fori_arg(J, fori, ra+FORL_STEP, t); - int dir = (0 <= numV(&forbase[FORL_STEP])); - lua_assert(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI); - J->scev.t.irt = t; - J->scev.dir = dir; - J->scev.stop = tref_ref(stop); - J->scev.step = tref_ref(step); - if (!tref_isk(step)) { - /* Non-constant step: need a guard for the direction. */ - TRef zero = (t == IRT_INT) ? lj_ir_kint(J, 0) : lj_ir_knum_zero(J); - emitir(IRTG(dir ? IR_GE : IR_LT, t), step, zero); - /* Add hoistable overflow checks for a narrowed FORL index. */ - if (t == IRT_INT) { - if (tref_isk(stop)) { - /* Constant stop: optimize check away or to a range check for step. */ - int32_t k = IR(tref_ref(stop))->i; - if (dir) { - if (k > 0) - emitir(IRTGI(IR_LE), step, lj_ir_kint(J, (int32_t)0x7fffffff-k)); - } else { - if (k < 0) - emitir(IRTGI(IR_GE), step, lj_ir_kint(J, (int32_t)0x80000000-k)); - } - } else { - /* Stop+step variable: need full overflow check. */ - TRef tr = emitir(IRTGI(IR_ADDOV), step, stop); - emitir(IRTI(IR_USE), tr, 0); /* ADDOV is weak. Avoid dead result. */ - } - } - } else if (t == IRT_INT && !tref_isk(stop)) { - /* Constant step: optimize overflow check to a range check for stop. */ - int32_t k = IR(tref_ref(step))->i; - k = (int32_t)(dir ? 0x7fffffff : 0x80000000) - k; - emitir(IRTGI(dir ? IR_LE : IR_GE), stop, lj_ir_kint(J, k)); - } - J->scev.start = tref_ref(find_kinit(J, fori, ra+FORL_IDX, IRT_INT)); - start = sloadt(J, (int32_t)(ra+FORL_IDX), - (t == IRT_INT && !J->scev.start) ? (IRT_INT|IRT_GUARD) : t, - t == IRT_INT ? (IRSLOAD_CONVERT|IRSLOAD_INHERIT) : IRSLOAD_INHERIT); - J->base[ra+FORL_EXT] = start; - J->scev.idx = tref_ref(start); - J->maxslot = ra+FORL_EXT+1; -} - /* Setup recording for a root trace started by a hot loop. */ static const BCIns *rec_setup_root(jit_State *J) { @@ -2033,7 +2065,7 @@ void lj_record_setup(jit_State *J) if (J->pc > proto_bc(J->pt) && bc_op(J->pc[-1]) == BC_JFORI && bc_d(J->pc[bc_j(J->pc[-1])-1]) == root) { lj_snap_add(J); - rec_setup_forl(J, J->pc-1); + rec_for_loop(J, J->pc-1, &J->scev, 1); goto sidecheck; } } else { @@ -2054,7 +2086,7 @@ void lj_record_setup(jit_State *J) */ lj_snap_add(J); if (bc_op(J->cur.startins) == BC_FORL) - rec_setup_forl(J, J->pc-1); + rec_for_loop(J, J->pc-1, &J->scev, 1); if (1 + J->pt->framesize >= LJ_MAX_JSLOTS) lj_trace_err(J, LJ_TRERR_STACKOV); } diff --git a/src/lj_snap.c b/src/lj_snap.c index 59435b20..70628a0e 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c @@ -68,7 +68,8 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) if (!(ir->op2 & IRSLOAD_INHERIT)) continue; /* No need to restore readonly slots and unmodified non-parent slots. */ - if ((ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) + if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) && + (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) sn |= SNAP_NORESTORE; } map[n++] = sn; diff --git a/src/lj_trace.c b/src/lj_trace.c index b67e8f75..69124542 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -495,8 +495,8 @@ static int trace_abort(jit_State *J) J->postproc = LJ_POST_NONE; lj_mcode_abort(J); - if (tvisnum(L->top-1)) - e = (TraceError)lj_num2int(numV(L->top-1)); + if (tvisnumber(L->top-1)) + e = (TraceError)numberVint(L->top-1); if (e == LJ_TRERR_MCODELM) { J->state = LJ_TRACE_ASM; return 1; /* Retry ASM with new MCode area. */ @@ -703,8 +703,12 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) setintV(L->top++, J->exitno); setintV(L->top++, RID_NUM_GPR); setintV(L->top++, RID_NUM_FPR); - for (i = 0; i < RID_NUM_GPR; i++) - setnumV(L->top++, cast_num(ex->gpr[i])); + for (i = 0; i < RID_NUM_GPR; i++) { + if (sizeof(ex->gpr[i]) == sizeof(int32_t)) + setintV(L->top++, (int32_t)ex->gpr[i]); + else + setnumV(L->top++, (lua_Number)ex->gpr[i]); + } for (i = 0; i < RID_NUM_FPR; i++) { setnumV(L->top, ex->fpr[i]); if (LJ_UNLIKELY(tvisnan(L->top)))