From df65b8b419c12327254dec0df116c62525aaabad Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 3 Feb 2011 04:13:51 +0100 Subject: [PATCH] FFI: Rename IR_CNEWP to IR_CNEWI and use it to box 64 bit integers. Generates smaller IR and DCE eliminates many intermediate boxes. Needs allocation sinking to eliminate the boxes kept alive by PHIs. --- src/lj_asm.c | 67 +++++++++++++++++++++++++++----------------- src/lj_crecord.c | 70 ++++++++++++++++++++++++++++------------------ src/lj_ir.h | 10 ++++--- src/lj_opt_fold.c | 29 +++++++++++++------ src/lj_opt_split.c | 50 ++++++++++++++++++++------------- 5 files changed, 142 insertions(+), 84 deletions(-) diff --git a/src/lj_asm.c b/src/lj_asm.c index 8864c9a3..77b55f0c 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -2518,7 +2518,7 @@ static void asm_cnew(ASMState *as, IRIns *ir) { CTState *cts = ctype_ctsG(J2G(as->J)); CTypeID typeid = (CTypeID)IR(ir->op1)->i; - CTSize sz = (ir->o == IR_CNEWP || ir->op2 == REF_NIL) ? + CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? lj_ctype_size(cts, typeid) : (CTSize)IR(ir->op2)->i; const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; IRRef args[2]; @@ -2529,33 +2529,45 @@ static void asm_cnew(ASMState *as, IRIns *ir) as->gcsteps++; asm_setupresult(as, ir, ci); /* GCcdata * */ - /* Initialize pointer cdata object. */ - if (ir->o == IR_CNEWP) { + /* Initialize immutable cdata object. */ + if (ir->o == IR_CNEWI) { + RegSet allow = (RSET_GPR & ~RSET_SCRATCH); +#if LJ_64 + Reg r64 = sz == 8 ? REX_64 : 0; if (irref_isk(ir->op2)) { IRIns *irk = IR(ir->op2); -#if LJ_64 - if (irk->o == IR_KINT64) { - uint64_t k = ir_k64(irk)->u64; - lua_assert(sz == 8); - if (checki32((int64_t)k)) { - emit_i32(as, (int32_t)k); - emit_rmro(as, XO_MOVmi, REX_64, RID_RET, sizeof(GCcdata)); - } else { - emit_movtomro(as, RID_ECX|REX_64, RID_RET, sizeof(GCcdata)); - emit_loadu64(as, RID_ECX, k); - } + uint64_t k = irk->o == IR_KINT64 ? ir_k64(irk)->u64 : + (uint64_t)(uint32_t)irk->i; + if (sz == 4 || checki32((int64_t)k)) { + emit_i32(as, (int32_t)k); + emit_rmro(as, XO_MOVmi, r64, RID_RET, sizeof(GCcdata)); } else { -#endif - lua_assert(sz == 4); - emit_movmroi(as, RID_RET, sizeof(GCcdata), irk->i); -#if LJ_64 + emit_movtomro(as, RID_ECX + r64, RID_RET, sizeof(GCcdata)); + emit_loadu64(as, RID_ECX, k); } -#endif } else { - Reg r = ra_alloc1(as, ir->op2, (RSET_GPR & ~RSET_SCRATCH)); - emit_movtomro(as, r + ((LJ_64 && sz == 8) ? REX_64 : 0), - RID_RET, sizeof(GCcdata)); + Reg r = ra_alloc1(as, ir->op2, allow); + emit_movtomro(as, r + r64, RID_RET, sizeof(GCcdata)); } +#else + int32_t ofs = sizeof(GCcdata); + if (LJ_HASFFI && sz == 8) { + ofs += 4; ir++; + lua_assert(ir->o == IR_HIOP); + } + do { + if (irref_isk(ir->op2)) { + emit_movmroi(as, RID_RET, ofs, IR(ir->op2)->i); + } else { + Reg r = ra_alloc1(as, ir->op2, allow); + emit_movtomro(as, r, RID_RET, ofs); + rset_clear(allow, r); + } + if (!LJ_HASFFI || ofs == sizeof(GCcdata)) break; + ofs -= 4; ir--; + } while (1); +#endif + lua_assert(sz == 4 || (sz == 8 && (LJ_64 || LJ_HASFFI))); } /* Combine initialization of marked, gct and typeid. */ @@ -3289,6 +3301,9 @@ static void asm_hiop(ASMState *as, IRIns *ir) if (!uselo) ra_allocref(as, ir->op1, RID2RSET(RID_RET)); /* Mark call as used. */ break; + case IR_CNEWI: + /* Nothing to do here. Handled by CNEWI itself. */ + break; default: lua_assert(0); break; } #else @@ -4057,7 +4072,7 @@ static void asm_ir(ASMState *as, IRIns *ir) case IR_SNEW: asm_snew(as, ir); break; case IR_TNEW: asm_tnew(as, ir); break; case IR_TDUP: asm_tdup(as, ir); break; - case IR_CNEW: case IR_CNEWP: asm_cnew(as, ir); break; + case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; /* Write barriers. */ case IR_TBAR: asm_tbar(as, ir); break; @@ -4164,8 +4179,10 @@ static void asm_setup_regsp(ASMState *as, GCtrace *T) } #if LJ_32 && LJ_HASFFI case IR_HIOP: - if ((ir-1)->o == IR_CALLN) + if ((ir-1)->o == IR_CALLN) { ir->prev = REGSP_HINT(RID_RETHI); + continue; + } break; #endif /* C calls evict all scratch regs and return results in RID_RET. */ @@ -4174,7 +4191,7 @@ static void asm_setup_regsp(ASMState *as, GCtrace *T) if (as->evenspill < 3) /* lj_str_new and lj_tab_newkey need 3 args. */ as->evenspill = 3; #endif - case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWP: case IR_TOSTR: + case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR: ir->prev = REGSP_HINT(RID_RET); if (inloop) as->modset = RSET_SCRATCH; diff --git a/src/lj_crecord.c b/src/lj_crecord.c index cd5c7d49..1ba98ae8 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c @@ -137,7 +137,7 @@ static int crec_isnonzero(CType *s, void *p) } } -static void crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp, +static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp, void *svisnz) { CTSize dsize = d->size, ssize = s->size; @@ -190,6 +190,7 @@ static void crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp, #endif xstore: if (dt == IRT_I64 || dt == IRT_U64) lj_needsplit(J); + if (dp == 0) return sp; emitir(IRT(IR_XSTORE, dt), dp, sp); break; case CCX(I, C): @@ -290,6 +291,7 @@ static void crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp, lj_trace_err(J, LJ_TRERR_NYICONV); break; } + return 0; } /* -- Convert C type to TValue (load) ------------------------------------- */ @@ -306,21 +308,18 @@ static TRef crec_tv_ct(jit_State *J, CType *s, CTypeID sid, TRef sp) goto err_nyi; /* NYI: copyval of >64 bit integers. */ tr = emitir(IRT(IR_XLOAD, t), sp, 0); if (t == IRT_FLOAT || t == IRT_U32) { /* Keep uint32_t/float as numbers. */ - tr = emitconv(tr, IRT_NUM, t, 0); + return emitconv(tr, IRT_NUM, t, 0); } else if (t == IRT_I64 || t == IRT_U64) { /* Box 64 bit integer. */ - TRef dp = emitir(IRTG(IR_CNEW, IRT_CDATA), lj_ir_kint(J, sid), TREF_NIL); - TRef ptr = emitir(IRT(IR_ADD, IRT_PTR), dp, - lj_ir_kintp(J, sizeof(GCcdata))); - emitir(IRT(IR_XSTORE, t), ptr, tr); + sp = tr; lj_needsplit(J); - return dp; } else if ((sinfo & CTF_BOOL)) { /* Assume not equal to zero. Fixup and emit pending guard later. */ lj_ir_set(J, IRTGI(IR_NE), tr, lj_ir_kint(J, 0)); J->postproc = LJ_POST_FIXGUARD; - tr = TREF_TRUE; + return TREF_TRUE; + } else { + return tr; } - return tr; } else if (ctype_isptr(sinfo)) { IRType t = (LJ_64 && s->size == 8) ? IRT_P64 : IRT_P32; sp = emitir(IRT(IR_XLOAD, t), sp, 0); @@ -345,13 +344,13 @@ static TRef crec_tv_ct(jit_State *J, CType *s, CTypeID sid, TRef sp) err_nyi: lj_trace_err(J, LJ_TRERR_NYICONV); } - /* Box pointer or ref. */ - return emitir(IRTG(IR_CNEWP, IRT_CDATA), lj_ir_kint(J, sid), sp); + /* Box pointer, ref or 64 bit integer. */ + return emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, sid), sp); } /* -- Convert TValue to C type (store) ------------------------------------ */ -static void crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, TValue *sval) +static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, TValue *sval) { CTState *cts = ctype_ctsG(J2G(J)); CTypeID sid = CTID_P_VOID; @@ -402,6 +401,12 @@ static void crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, TValue *sval) } else { goto doconv; /* The pointer value was loaded, don't load number. */ } + + } else if (ctype_isnum(s->info) && s->size == 8) { + IRType t = (s->info & CTF_UNSIGNED) ? IRT_U64 : IRT_I64; + sp = emitir(IRT(IR_FLOAD, t), sp, IRFL_CDATA_INT64); + lj_needsplit(J); + goto doconv; } else { sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCcdata))); } @@ -418,7 +423,7 @@ static void crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, TValue *sval) s = ctype_get(cts, sid); doconv: if (ctype_isenum(d->info)) d = ctype_child(cts, d); - crec_ct_ct(J, d, s, dp, sp, svisnz); + return crec_ct_ct(J, d, s, dp, sp, svisnz); } /* -- C data metamethods -------------------------------------------------- */ @@ -578,15 +583,18 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id) CTState *cts = ctype_ctsG(J2G(J)); CTSize sz; CTInfo info = lj_ctype_info(cts, id, &sz); + CType *d = ctype_raw(cts, id); TRef trid; if (sz == 0 || sz > 64 || (info & CTF_VLA) || ctype_align(info) > CT_MEMALIGN) lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: large/special allocations. */ trid = lj_ir_kint(J, id); - if (ctype_isptr(info)) { - TRef sp = J->base[1] ? J->base[1] : lj_ir_kptr(J, NULL); - J->base[0] = emitir(IRTG(IR_CNEWP, IRT_CDATA), trid, sp); + /* Use special instruction to box pointer or 64 bit integer. */ + if (ctype_isptr(info) || (ctype_isnum(info) && sz == 8)) { + TRef sp = J->base[1] ? crec_ct_tv(J, d, 0, J->base[1], &rd->argv[1]) : + ctype_isptr(info) ? lj_ir_kptr(J, NULL) : + (lj_needsplit(J), lj_ir_kint64(J, 0)); + J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, sp); } else { - CType *d = ctype_raw(cts, id); TRef trcd = emitir(IRTG(IR_CNEW, IRT_CDATA), trid, TREF_NIL); J->base[0] = trcd; if (J->base[1] && !J->base[2] && !lj_cconv_multi_init(d, &rd->argv[1])) { @@ -598,7 +606,7 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id) TValue tv; TValue *sval = &tv; MSize i; - setnumV(&tv, 0); + tv.u64 = 0; if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info))) lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init array of aggregates. */ for (i = 1, ofs = 0; ofs < sz; ofs += esize) { @@ -645,11 +653,16 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id) } } } else { - TRef sp, dp; + TRef dp; single_init: - sp = J->base[1] ? J->base[1] : lj_ir_kint(J, 0); dp = emitir(IRT(IR_ADD, IRT_PTR), trcd, lj_ir_kintp(J, sizeof(GCcdata))); - crec_ct_tv(J, d, dp, sp, &rd->argv[1]); + if (J->base[1]) { + crec_ct_tv(J, d, dp, J->base[1], &rd->argv[1]); + } else { + TValue tv; + tv.u64 = 0; + crec_ct_tv(J, d, dp, lj_ir_kint(J, 0), &tv); + } } } } @@ -669,7 +682,7 @@ static TRef crec_arith_int64(jit_State *J, TRef *sp, CType **s, MMS mm) if (ctype_isnum(s[0]->info) && ctype_isnum(s[1]->info)) { IRType dt; CTypeID id; - TRef tr, dp, ptr; + TRef tr; MSize i; lj_needsplit(J); if (((s[0]->info & CTF_UNSIGNED) && s[0]->size == 8) || @@ -702,10 +715,7 @@ static TRef crec_arith_int64(jit_State *J, TRef *sp, CType **s, MMS mm) } else { tr = emitir(IRT(mm+(int)IR_ADD-(int)MM_add, dt), sp[0], sp[1]); } - dp = emitir(IRTG(IR_CNEW, IRT_CDATA), lj_ir_kint(J, id), TREF_NIL); - ptr = emitir(IRT(IR_ADD, IRT_PTR), dp, lj_ir_kintp(J, sizeof(GCcdata))); - emitir(IRT(IR_XSTORE, dt), ptr, tr); - return dp; + return emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr); } return 0; } @@ -767,7 +777,7 @@ static TRef crec_arith_ptr(jit_State *J, TRef *sp, CType **s, MMS mm) tr = emitir(IRT(IR_ADD, IRT_PTR), sp[0], tr); id = lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|ctype_cid(ctp->info)), CTSIZE_PTR); - return emitir(IRTG(IR_CNEWP, IRT_CDATA), lj_ir_kint(J, id), tr); + return emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr); } } @@ -787,6 +797,11 @@ void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData *rd) IRType t = (LJ_64 && ct->size == 8) ? IRT_P64 : IRT_P32; if (ctype_isref(ct->info)) ct = ctype_rawchild(cts, ct); tr = emitir(IRT(IR_FLOAD, t), tr, IRFL_CDATA_PTR); + } else if (ctype_isnum(ct->info) && ct->size == 8) { + IRType t = (ct->info & CTF_UNSIGNED) ? IRT_U64 : IRT_I64; + tr = emitir(IRT(IR_FLOAD, t), tr, IRFL_CDATA_INT64); + lj_needsplit(J); + goto ok; } else { tr = emitir(IRT(IR_ADD, IRT_PTR), tr, lj_ir_kintp(J, sizeof(GCcdata))); } @@ -807,6 +822,7 @@ void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData *rd) } else if (!tref_isnum(tr)) { goto err_type; } + ok: s[i] = ct; sp[i] = tr; } diff --git a/src/lj_ir.h b/src/lj_ir.h index dfafc5db..bde0ac04 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h @@ -113,7 +113,7 @@ _(TNEW, AW, lit, lit) \ _(TDUP, AW, ref, ___) \ _(CNEW, AW, ref, ref) \ - _(CNEWP, NW, ref, ref) /* CSE is ok, not marked as A. */ \ + _(CNEWI, NW, ref, ref) /* CSE is ok, not marked as A. */ \ \ /* Write barriers. */ \ _(TBAR, S , ref, ___) \ @@ -188,7 +188,9 @@ IRFPMDEF(FPMENUM) _(UDATA_UDTYPE, offsetof(GCudata, udtype)) \ _(UDATA_FILE, sizeof(GCudata)) \ _(CDATA_TYPEID, offsetof(GCcdata, typeid)) \ - _(CDATA_PTR, sizeof(GCcdata)) + _(CDATA_PTR, sizeof(GCcdata)) \ + _(CDATA_INT64, sizeof(GCcdata)) \ + _(CDATA_INT64HI, sizeof(GCcdata) + 4) typedef enum { #define FLENUM(name, ofs) IRFL_##name, @@ -588,12 +590,12 @@ typedef union IRIns { #define ir_kptr(ir) \ check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, mref((ir)->ptr, void)) +LJ_STATIC_ASSERT((int)IRT_GUARD == (int)IRM_W); + /* A store or any other op with a non-weak guard has a side-effect. */ static LJ_AINLINE int ir_sideeff(IRIns *ir) { return (((ir->t.irt | ~IRT_GUARD) & lj_ir_mode[ir->o]) >= IRM_S); } -LJ_STATIC_ASSERT((int)IRT_GUARD == (int)IRM_W); - #endif diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index c3b0a082..28758013 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -154,7 +154,7 @@ typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J); #define gcstep_barrier(J, ref) \ ((ref) < J->chain[IR_LOOP] && \ (J->chain[IR_SNEW] || J->chain[IR_TNEW] || J->chain[IR_TDUP] || \ - J->chain[IR_CNEW] || J->chain[IR_CNEWP] || J->chain[IR_TOSTR])) + J->chain[IR_CNEW] || J->chain[IR_CNEWI] || J->chain[IR_TOSTR])) /* -- Constant folding for FP numbers ------------------------------------- */ @@ -307,7 +307,7 @@ static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op) case IR_BOR: k1 |= k2; break; case IR_BXOR: k1 ^= k2; break; #endif - default: lua_assert(0); break; + default: UNUSED(k2); lua_assert(0); break; } return k1; } @@ -1765,18 +1765,28 @@ LJFOLDF(fload_cdata_typeid_kgc) return NEXTFOLD; } -LJFOLD(FLOAD CNEW IRFL_CDATA_TYPEID) -LJFOLD(FLOAD CNEWP IRFL_CDATA_TYPEID) -LJFOLDF(fload_cdata_typeid_cnew) +/* The content of int64 cdata objects is immutable. */ +LJFOLD(FLOAD KGC IRFL_CDATA_INT64) +LJFOLDF(fload_cdata_int64_kgc) { if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) - return fleft->op1; /* No PHI barrier needed. CNEW/CNEWP op1 is const. */ + return INT64FOLD(*(uint64_t *)cdataptr(ir_kcdata(fleft))); return NEXTFOLD; } -/* Pointer cdata objects are immutable. */ -LJFOLD(FLOAD CNEWP IRFL_CDATA_PTR) -LJFOLDF(fload_cdata_ptr_cnew) +LJFOLD(FLOAD CNEW IRFL_CDATA_TYPEID) +LJFOLD(FLOAD CNEWI IRFL_CDATA_TYPEID) +LJFOLDF(fload_cdata_typeid_cnew) +{ + if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) + return fleft->op1; /* No PHI barrier needed. CNEW/CNEWI op1 is const. */ + return NEXTFOLD; +} + +/* Pointer and int64 cdata objects are immutable. */ +LJFOLD(FLOAD CNEWI IRFL_CDATA_PTR) +LJFOLD(FLOAD CNEWI IRFL_CDATA_INT64) +LJFOLDF(fload_cdata_ptr_int64_cnew) { if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) return fleft->op2; /* Fold even across PHI to avoid allocations. */ @@ -1786,6 +1796,7 @@ LJFOLDF(fload_cdata_ptr_cnew) LJFOLD(FLOAD any IRFL_STR_LEN) LJFOLD(FLOAD any IRFL_CDATA_TYPEID) LJFOLD(FLOAD any IRFL_CDATA_PTR) +LJFOLD(FLOAD any IRFL_CDATA_INT64) LJFOLD(VLOAD any any) /* Vararg loads have no corresponding stores. */ LJFOLDX(lj_opt_cse) diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c index f53616b3..90b2b49c 100644 --- a/src/lj_opt_split.c +++ b/src/lj_opt_split.c @@ -159,7 +159,8 @@ static void split_ir(jit_State *J) ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo); hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi); } else { - ir->prev = (IRRef1)ref; /* Identity substitution for loword. */ + ir->prev = ref; /* Identity substitution for loword. */ + hisubst[ref] = 0; } } @@ -168,6 +169,7 @@ static void split_ir(jit_State *J) IRIns *ir = &oir[ref]; IRRef nref = lj_ir_nextins(J); IRIns *nir = IR(nref); + IRRef hi = 0; /* Copy-substitute old instruction to new instruction. */ nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev; @@ -175,10 +177,11 @@ static void split_ir(jit_State *J) ir->prev = nref; /* Loword substitution. */ nir->o = ir->o; nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI); + hisubst[ref] = 0; /* Split 64 bit instructions. */ if (irt_isint64(ir->t)) { - IRRef hi = hisubst[ir->op1]; + IRRef hiref = hisubst[ir->op1]; nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ switch (ir->o) { case IR_ADD: @@ -186,13 +189,13 @@ static void split_ir(jit_State *J) /* Use plain op for hiword if loword cannot produce a carry/borrow. */ if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) { ir->prev = nir->op1; /* Pass through loword. */ - nir->op1 = hi; nir->op2 = hisubst[ir->op2]; + nir->op1 = hiref; nir->op2 = hisubst[ir->op2]; hi = nref; break; } /* fallthrough */ case IR_NEG: - hi = split_emit(J, IRTI(IR_HIOP), hi, hisubst[ir->op2]); + hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]); break; case IR_MUL: hi = split_call64(J, hisubst, oir, ir, IRCALL_lj_carith_mul64); @@ -212,6 +215,13 @@ static void split_ir(jit_State *J) irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : IRCALL_lj_carith_powu64); break; + case IR_FLOAD: + lua_assert(ir->op2 == IRFL_CDATA_INT64); + hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64HI); +#if LJ_BE + ir->prev = hi; hi = nref; +#endif + break; case IR_XLOAD: hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, nir->op1), ir->op2); #if LJ_BE @@ -220,19 +230,18 @@ static void split_ir(jit_State *J) break; case IR_XSTORE: #if LJ_LE - hi = hisubst[ir->op2]; + hiref = hisubst[ir->op2]; #else - hi = nir->op2; nir->op2 = hisubst[ir->op2]; + hiref = nir->op2; nir->op2 = hisubst[ir->op2]; #endif - split_emit(J, IRTI(IR_XSTORE), split_ptr(J, nir->op1), hi); - continue; + split_emit(J, IRTI(IR_XSTORE), split_ptr(J, nir->op1), hiref); + break; case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */ hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref); } else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */ /* Drop cast, since assembler doesn't care. */ - hisubst[ref] = hi; goto fwdlo; } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */ IRRef k31 = lj_ir_kint(J, 31); @@ -242,27 +251,26 @@ static void split_ir(jit_State *J) nir->op2 = k31; hi = nref; } else { /* Zero-extend to 64 bit. */ - hisubst[ref] = lj_ir_kint(J, 0); + hi = lj_ir_kint(J, 0); goto fwdlo; } break; } case IR_PHI: { - IRRef hi2; + IRRef hiref2; if ((irref_isk(nir->op1) && irref_isk(nir->op2)) || nir->op1 == nir->op2) J->cur.nins--; /* Drop useless PHIs. */ - hi2 = hisubst[ir->op2]; - if (!((irref_isk(hi) && irref_isk(hi2)) || hi == hi2)) - split_emit(J, IRTI(IR_PHI), hi, hi2); - continue; + hiref2 = hisubst[ir->op2]; + if (!((irref_isk(hiref) && irref_isk(hiref2)) || hiref == hiref2)) + split_emit(J, IRTI(IR_PHI), hiref, hiref2); + break; } default: - lua_assert(ir->o <= IR_NE); - split_emit(J, IRTGI(IR_HIOP), hi, hisubst[ir->op2]); /* Comparisons. */ - continue; + lua_assert(ir->o <= IR_NE); /* Comparisons. */ + split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]); + break; } - hisubst[ref] = hi; /* Store hiword substitution. */ } else if (ir->o == IR_CONV) { /* See above, too. */ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */ @@ -277,9 +285,13 @@ static void split_ir(jit_State *J) nir->op1 = nir->op2 = 0; } } + } else if (ir->o == IR_CNEWI) { + if (hisubst[ir->op2]) + split_emit(J, IRT(IR_HIOP, IRT_NIL), nref, hisubst[ir->op2]); } else if (ir->o == IR_LOOP) { J->loopref = nref; /* Needed by assembler. */ } + hisubst[ref] = hi; /* Store hiword substitution. */ } /* Add PHI marks. */