diff --git a/src/lj_asm.c b/src/lj_asm.c index 55bc814e..9cdbcf12 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -1893,9 +1893,8 @@ static void asm_sload(ASMState *as, IRIns *ir) IRType1 t = ir->t; Reg base; lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ - lua_assert(!irt_isguard(ir->t) == - !((ir->op2 & IRSLOAD_TYPECHECK) || irt_isint(t))); - if (irt_isint(t)) { + lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); + if (irt_isint(t) && irt_isguard(t)) { Reg left = ra_scratch(as, RSET_FPR); asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ base = ra_alloc1(as, REF_BASE, RSET_GPR); @@ -1904,9 +1903,12 @@ static void asm_sload(ASMState *as, IRIns *ir) } else if (ra_used(ir)) { RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; Reg dest = ra_dest(as, ir, allow); - lua_assert(irt_isnum(ir->t) || irt_isaddr(ir->t)); base = ra_alloc1(as, REF_BASE, RSET_GPR); - emit_movrmro(as, dest, base, ofs); + lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); + if (irt_isint(t)) + emit_rmro(as, XO_CVTSD2SI, dest, base, ofs); + else + emit_movrmro(as, dest, base, ofs); } else { if (!(ir->op2 & IRSLOAD_TYPECHECK)) return; /* No type check: avoid base alloc. */ diff --git a/src/lj_record.c b/src/lj_record.c index 2709ea01..deb5b2bb 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -249,33 +249,65 @@ nocanon: J->mergesnap = 1; /* In case recording continues. */ } -/* Peek before FORI to find a const initializer, otherwise load from slot. */ -static TRef fori_arg(jit_State *J, const BCIns *pc, BCReg slot, IRType t) +/* Search bytecode backwards for a int/num constant slot initializer. */ +static TRef find_kinit(jit_State *J, const BCIns *endpc, BCReg slot, IRType t) { - /* A store to slot-1 means there's no conditional assignment for slot. */ - if (bc_a(pc[-1]) == slot-1 && bcmode_a(bc_op(pc[-1])) == BCMdst) { - BCIns ins = pc[0]; - if (bc_a(ins) == slot) { - if (bc_op(ins) == BC_KSHORT) { - int32_t k = (int32_t)(int16_t)bc_d(ins); - if (t == IRT_INT) - return lj_ir_kint(J, k); - else - return lj_ir_knum(J, cast_num(k)); - } else if (bc_op(ins) == BC_KNUM) { - lua_Number n = proto_knum(J->pt, bc_d(ins)); - if (t == IRT_INT) - return lj_ir_kint(J, lj_num2int(n)); - else - return lj_ir_knum(J, n); + /* This algorithm is rather simplistic and assumes quite a bit about + ** how the bytecode is generated. It works fine for FORI initializers, + ** but it won't necessarily work in other cases (e.g. iterator arguments). + ** It doesn't do anything fancy, either (like backpropagating MOVs). + */ + const BCIns *pc, *startpc = proto_bc(J->pt); + for (pc = endpc-1; pc > startpc; pc--) { + BCIns ins = *pc; + BCOp op = bc_op(ins); + /* First try to find the last instruction that stores to this slot. */ + if (bcmode_a(op) == BCMbase && bc_a(ins) <= slot) { + return 0; /* Multiple results, e.g. from a CALL or KNIL. */ + } else if (bcmode_a(op) == BCMdst && bc_a(ins) == slot) { + if (op == BC_KSHORT || op == BC_KNUM) { /* Found const. initializer. */ + /* Now try to verify there's no forward jump across it. */ + const BCIns *kpc = pc; + for ( ; pc > startpc; pc--) + if (bc_op(*pc) == BC_JMP) { + const BCIns *target = pc+bc_j(*pc)+1; + if (target > kpc && target <= endpc) + return 0; /* Conditional assignment. */ + } + if (op == BC_KSHORT) { + int32_t k = (int32_t)(int16_t)bc_d(ins); + return t == IRT_INT ? lj_ir_kint(J, k) : lj_ir_knum(J, cast_num(k)); + } else { + lua_Number n = proto_knum(J->pt, bc_d(ins)); + if (t == IRT_INT) { + int32_t k = lj_num2int(n); + if (n == cast_num(k)) /* -0 is ok here. */ + return lj_ir_kint(J, k); + return 0; /* Type mismatch. */ + } else { + return lj_ir_knum(J, n); + } + } } + return 0; /* Non-constant initializer. */ } } - if (J->base[slot]) - return J->base[slot]; - if (t == IRT_INT) - t |= IRT_GUARD; - return sloadt(J, (int32_t)slot, t, IRSLOAD_READONLY|IRSLOAD_INHERIT); + return 0; /* No assignment to this slot found? */ +} + +/* Peek before FORI to find a const initializer. Otherwise load from slot. */ +static TRef fori_arg(jit_State *J, const BCIns *fori, BCReg slot, IRType t) +{ + TRef tr = find_kinit(J, fori, slot, t); + if (!tr) { + tr = J->base[slot]; + if (!tr) { + if (t == IRT_INT) + t |= IRT_GUARD; + tr = sloadt(J, (int32_t)slot, t, IRSLOAD_READONLY|IRSLOAD_INHERIT); + } + } + return tr; } /* Simulate the runtime behavior of the FOR loop iterator. @@ -311,8 +343,8 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl) idx = tr[FORL_IDX]; if (!idx) idx = sloadt(J, (int32_t)(ra+FORL_IDX), IRT_NUM, 0); t = tref_type(idx); - stop = fori_arg(J, fori-2, ra+FORL_STOP, t); - step = fori_arg(J, fori-1, ra+FORL_STEP, t); + stop = fori_arg(J, fori, ra+FORL_STOP, t); + step = fori_arg(J, fori, ra+FORL_STEP, t); tr[FORL_IDX] = idx = emitir(IRT(IR_ADD, t), idx, step); } else { /* Handle FORI/JFORI opcodes. */ BCReg i; @@ -2134,8 +2166,8 @@ static void rec_setup_forl(jit_State *J, const BCIns *fori) cTValue *forbase = &J->L->base[ra]; IRType t = (J->flags & JIT_F_OPT_NARROW) ? lj_opt_narrow_forl(forbase) : IRT_NUM; - TRef stop = fori_arg(J, fori-2, ra+FORL_STOP, t); - TRef step = fori_arg(J, fori-1, ra+FORL_STEP, t); + TRef stop = fori_arg(J, fori, ra+FORL_STOP, t); + TRef step = fori_arg(J, fori, ra+FORL_STEP, t); int dir = (0 <= numV(&forbase[FORL_STEP])); lua_assert(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI); if (!tref_isk(step)) { @@ -2165,7 +2197,7 @@ static void rec_setup_forl(jit_State *J, const BCIns *fori) k = (int32_t)(dir ? 0x7fffffff : 0x80000000) - k; emitir(IRTGI(dir ? IR_LE : IR_GE), stop, lj_ir_kint(J, k)); } - if (t == IRT_INT) + if (t == IRT_INT && !find_kinit(J, fori, ra+FORL_IDX, IRT_INT)) t |= IRT_GUARD; J->base[ra+FORL_EXT] = sloadt(J, (int32_t)(ra+FORL_IDX), t, IRSLOAD_INHERIT); J->maxslot = ra+FORL_EXT+1;