diff --git a/src/lj_asm.c b/src/lj_asm.c index 9b394beb..0b3e770a 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -334,7 +334,7 @@ static Reg ra_rematk(ASMState *as, IRRef ref) RA_DBGX((as, "remat $i $r", ir, r)); #if !LJ_SOFTFP if (ir->o == IR_KNUM) { - emit_loadn(as, r, ir_knum(ir)); + emit_loadk64(as, r, ir); } else #endif if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { @@ -695,15 +695,14 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref) if (ra_noreg(left)) { if (irref_isk(lref)) { if (ir->o == IR_KNUM) { - cTValue *tv = ir_knum(ir); /* FP remat needs a load except for +0. Still better than eviction. */ - if (tvispzero(tv) || !(as->freeset & RSET_FPR)) { - emit_loadn(as, dest, tv); + if (tvispzero(ir_knum(ir)) || !(as->freeset & RSET_FPR)) { + emit_loadk64(as, dest, ir); return; } #if LJ_64 } else if (ir->o == IR_KINT64) { - emit_loadu64(as, dest, ir_kint64(ir)->u64); + emit_loadk64(as, dest, ir); return; #endif } else if (ir->o != IR_KPRI) { @@ -1963,8 +1962,14 @@ static void asm_setup_regsp(ASMState *as) ra_setup(as); /* Clear reg/sp for constants. */ - for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) + for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) { ir->prev = REGSP_INIT; + if (irt_is64(ir->t) && ir->o != IR_KNULL) { + /* Make life easier for backends by putting address of constant in i. */ + ir->i = (int32_t)(intptr_t)(ir+1); + ir++; + } + } /* REF_BASE is used for implicit references to the BASE register. */ lastir->prev = REGSP_HINT(RID_BASE); diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 69d1256e..0361a965 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -306,6 +306,16 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow) } } +/* Fuse load of 64 bit IR constant into memory operand. */ +static Reg asm_fuseloadk64(ASMState *as, IRIns *ir) +{ + const uint64_t *k = &ir_k64(ir)->u64; + as->mrm.ofs = ptr2addr(k); + as->mrm.base = RID_NONE; + as->mrm.idx = RID_NONE; + return RID_MRM; +} + /* Fuse load into memory operand. */ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) { @@ -325,19 +335,13 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) if (ir->o == IR_KNUM) { RegSet avail = as->freeset & ~as->modset & RSET_FPR; lua_assert(allow != RSET_EMPTY); - if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */ - as->mrm.ofs = ptr2addr(ir_knum(ir)); - as->mrm.base = as->mrm.idx = RID_NONE; - return RID_MRM; - } + if (!(avail & (avail-1))) /* Fuse if less than two regs available. */ + return asm_fuseloadk64(as, ir); } else if (ir->o == IR_KINT64) { RegSet avail = as->freeset & ~as->modset & RSET_GPR; lua_assert(allow != RSET_EMPTY); - if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */ - as->mrm.ofs = ptr2addr(ir_kint64(ir)); - as->mrm.base = as->mrm.idx = RID_NONE; - return RID_MRM; - } + if (!(avail & (avail-1))) /* Fuse if less than two regs available. */ + return asm_fuseloadk64(as, ir); } else if (mayfuse(as, ref)) { RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR; if (ir->o == IR_SLOAD) { @@ -711,7 +715,7 @@ static void asm_conv(ASMState *as, IRIns *ir) emit_rr(as, XO_CVTSD2SS, dest, dest); emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */ emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */ - emit_loadn(as, bias, k); + emit_rma(as, XO_MOVSD, bias, k); emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR)); return; } else { /* Integer to FP conversion. */ diff --git a/src/lj_emit_arm.h b/src/lj_emit_arm.h index 47fee5fc..dff9fac4 100644 --- a/src/lj_emit_arm.h +++ b/src/lj_emit_arm.h @@ -219,8 +219,9 @@ static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p) #if !LJ_SOFTFP /* Load a number constant into an FPR. */ -static void emit_loadn(ASMState *as, Reg r, cTValue *tv) +static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) { + cTValue *tv = ir_knum(ir); int32_t i; if ((as->flags & JIT_F_VFPV3) && !tv->u32.lo) { uint32_t hi = tv->u32.hi; diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h index fdebe94b..29079ea3 100644 --- a/src/lj_emit_mips.h +++ b/src/lj_emit_mips.h @@ -112,8 +112,8 @@ static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow) emit_tsi(as, mi, r, base, i); } -#define emit_loadn(as, r, tv) \ - emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)(tv), RSET_GPR) +#define emit_loadk64(as, r, ir) \ + emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR) /* Get/set global_State fields. */ static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs) diff --git a/src/lj_emit_ppc.h b/src/lj_emit_ppc.h index 4eb933ea..5163012a 100644 --- a/src/lj_emit_ppc.h +++ b/src/lj_emit_ppc.h @@ -115,8 +115,8 @@ static void emit_lsptr(ASMState *as, PPCIns pi, Reg r, void *p, RegSet allow) emit_tai(as, pi, r, base, i); } -#define emit_loadn(as, r, tv) \ - emit_lsptr(as, PPCI_LFD, ((r) & 31), (void *)(tv), RSET_GPR) +#define emit_loadk64(as, r, ir) \ + emit_lsptr(as, PPCI_LFD, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR) /* Get/set global_State fields. */ static void emit_lsglptr(ASMState *as, PPCIns pi, Reg r, int32_t ofs) diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h index cbaf4e85..3d6f13f4 100644 --- a/src/lj_emit_x86.h +++ b/src/lj_emit_x86.h @@ -313,13 +313,23 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) } #endif -/* movsd r, [&tv->n] / xorps r, r */ -static void emit_loadn(ASMState *as, Reg r, cTValue *tv) +/* Load 64 bit IR constant into register. */ +static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) { - if (tvispzero(tv)) /* Use xor only for +0. */ - emit_rr(as, XO_XORPS, r, r); - else - emit_rma(as, XO_MOVSD, r, &tv->n); + const uint64_t *k = &ir_k64(ir)->u64; + if (rset_test(RSET_FPR, r)) { + if (*k == 0) { + emit_rr(as, XO_XORPS, r, r); + } else { + emit_rma(as, XO_MOVSD, r, k); + } + } else { + if (*k == 0) { + emit_rr(as, XO_ARITH(XOg_XOR), r, r); + } else { + emit_rma(as, XO_MOV, r | REX_64, k); + } + } } /* -- Emit control-flow instructions -------------------------------------- */ diff --git a/src/lj_gc.c b/src/lj_gc.c index 53f1d974..7c707462 100644 --- a/src/lj_gc.c +++ b/src/lj_gc.c @@ -238,6 +238,8 @@ static void gc_traverse_trace(global_State *g, GCtrace *T) IRIns *ir = &T->ir[ref]; if (ir->o == IR_KGC) gc_markobj(g, ir_kgc(ir)); + if (irt_is64(ir->t) && ir->o != IR_KNULL) + ref++; } if (T->link) gc_marktrace(g, T->link); if (T->nextroot) gc_marktrace(g, T->nextroot); diff --git a/src/lj_ir.c b/src/lj_ir.c index acb39463..124d5791 100644 --- a/src/lj_ir.c +++ b/src/lj_ir.c @@ -91,7 +91,7 @@ static void lj_ir_growbot(jit_State *J) IRIns *baseir = J->irbuf + J->irbotlim; MSize szins = J->irtoplim - J->irbotlim; lua_assert(szins != 0); - lua_assert(J->cur.nk == J->irbotlim); + lua_assert(J->cur.nk == J->irbotlim || J->cur.nk-1 == J->irbotlim); if (J->cur.nins + (szins >> 1) < J->irtoplim) { /* More than half of the buffer is free on top: shift up by a quarter. */ MSize ofs = szins >> 2; @@ -173,6 +173,18 @@ static LJ_AINLINE IRRef ir_nextk(jit_State *J) return ref; } +/* Get ref of next 64 bit IR constant and optionally grow IR. +** Note: this may invalidate all IRIns *! +*/ +static LJ_AINLINE IRRef ir_nextk64(jit_State *J) +{ + IRRef ref = J->cur.nk - 2; + lua_assert(J->state != LJ_TRACE_ASM); + if (LJ_UNLIKELY(ref < J->irbotlim)) lj_ir_growbot(J); + J->cur.nk = ref; + return ref; +} + /* Intern int32_t constant. */ TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k) { @@ -266,19 +278,18 @@ TValue *lj_ir_k64_reserve(jit_State *J) return ir_k64_add(J, kp, 0); /* Set to 0. Final value is set later. */ } -/* Intern 64 bit constant, given by its address. */ -TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv) +/* Intern 64 bit constant, given by its 64 bit pattern. */ +TRef lj_ir_k64(jit_State *J, IROp op, uint64_t u64) { IRIns *ir, *cir = J->cur.ir; IRRef ref; IRType t = op == IR_KNUM ? IRT_NUM : IRT_I64; for (ref = J->chain[op]; ref; ref = cir[ref].prev) - if (ir_k64(&cir[ref]) == tv) + if (ir_k64(&cir[ref])->u64 == u64) goto found; - ref = ir_nextk(J); + ref = ir_nextk64(J); ir = IR(ref); - lua_assert(checkptrGC(tv)); - setmref(ir->ptr, tv); + ir[1].tv.u64 = u64; ir->t.irt = t; ir->o = op; ir->prev = J->chain[op]; @@ -290,13 +301,13 @@ found: /* Intern FP constant, given by its 64 bit pattern. */ TRef lj_ir_knum_u64(jit_State *J, uint64_t u64) { - return lj_ir_k64(J, IR_KNUM, lj_ir_k64_find(J, u64)); + return lj_ir_k64(J, IR_KNUM, u64); } /* Intern 64 bit integer constant. */ TRef lj_ir_kint64(jit_State *J, uint64_t u64) { - return lj_ir_k64(J, IR_KINT64, lj_ir_k64_find(J, u64)); + return lj_ir_k64(J, IR_KINT64, u64); } /* Check whether a number is int and return it. -0 is NOT considered an int. */ @@ -367,7 +378,7 @@ TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr) IRRef ref; lua_assert((void *)(uintptr_t)u32ptr(ptr) == ptr); for (ref = J->chain[op]; ref; ref = cir[ref].prev) - if (mref(cir[ref].ptr, void) == ptr) + if (ir_kptr(&cir[ref]) == ptr) goto found; ref = ir_nextk(J); ir = IR(ref); @@ -432,9 +443,8 @@ void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir) case IR_KPRI: setpriV(tv, irt_toitype(ir->t)); break; case IR_KINT: setintV(tv, ir->i); break; case IR_KGC: setgcV(L, tv, ir_kgc(ir), irt_toitype(ir->t)); break; - case IR_KPTR: case IR_KKPTR: case IR_KNULL: - setlightudV(tv, mref(ir->ptr, void)); - break; + case IR_KPTR: case IR_KKPTR: setlightudV(tv, ir_kptr(ir)); break; + case IR_KNULL: setlightudV(tv, NULL); break; case IR_KNUM: setnumV(tv, ir_knum(ir)->n); break; #if LJ_HASFFI case IR_KINT64: { diff --git a/src/lj_ir.h b/src/lj_ir.h index 8a655b64..03377ec1 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h @@ -522,7 +522,9 @@ typedef uint32_t TRef; ** +-------+-------+---+---+---+---+ ** | op1 | op2 | t | o | r | s | ** +-------+-------+---+---+---+---+ -** | op12/i/gco | ot | prev | (alternative fields in union) +** | op12/i/gco32 | ot | prev | (alternative fields in union) +** +-------+-------+---+---+---+---+ +** | TValue/gco64 | (2nd IR slot for 64 bit constants) ** +---------------+-------+-------+ ** 32 16 16 ** @@ -550,8 +552,9 @@ typedef union IRIns { ) }; int32_t i; /* 32 bit signed integer literal (overlaps op12). */ - GCRef gcr; /* GCobj constant (overlaps op12). */ - MRef ptr; /* Pointer constant (overlaps op12). */ + GCRef gcr; /* GCobj constant (overlaps op12 or entire slot). */ + MRef ptr; /* Pointer constant (overlaps op12 or entire slot). */ + TValue tv; /* TValue constant (overlaps entire slot). */ } IRIns; /* TODO_GC64: major changes required. */ @@ -560,10 +563,10 @@ typedef union IRIns { #define ir_ktab(ir) (gco2tab(ir_kgc((ir)))) #define ir_kfunc(ir) (gco2func(ir_kgc((ir)))) #define ir_kcdata(ir) (gco2cd(ir_kgc((ir)))) -#define ir_knum(ir) check_exp((ir)->o == IR_KNUM, mref((ir)->ptr, cTValue)) -#define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, mref((ir)->ptr,cTValue)) +#define ir_knum(ir) check_exp((ir)->o == IR_KNUM, &(ir)[1].tv) +#define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, &(ir)[1].tv) #define ir_k64(ir) \ - check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64, mref((ir)->ptr,cTValue)) + check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64, &(ir)[1].tv) #define ir_kptr(ir) \ check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, mref((ir)->ptr, void)) diff --git a/src/lj_iropt.h b/src/lj_iropt.h index fdc5f0d2..219d391a 100644 --- a/src/lj_iropt.h +++ b/src/lj_iropt.h @@ -41,7 +41,7 @@ LJ_FUNC TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs); /* Interning of constants. */ LJ_FUNC TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k); LJ_FUNC void lj_ir_k64_freeall(jit_State *J); -LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv); +LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, uint64_t u64); LJ_FUNC TValue *lj_ir_k64_reserve(jit_State *J); LJ_FUNC cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64); LJ_FUNC TRef lj_ir_knum_u64(jit_State *J, uint64_t u64); diff --git a/src/lj_jit.h b/src/lj_jit.h index eafbc327..e9ab319e 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -355,8 +355,8 @@ enum { /* Fold state is used to fold instructions on-the-fly. */ typedef struct FoldState { IRIns ins; /* Currently emitted instruction. */ - IRIns left; /* Instruction referenced by left operand. */ - IRIns right; /* Instruction referenced by right operand. */ + IRIns left[2]; /* Instruction referenced by left operand. */ + IRIns right[2]; /* Instruction referenced by right operand. */ } FoldState; /* JIT compiler state. */ diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index c102f2db..73a368ed 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -136,8 +136,8 @@ /* Some local macros to save typing. Undef'd at the end. */ #define IR(ref) (&J->cur.ir[(ref)]) #define fins (&J->fold.ins) -#define fleft (&J->fold.left) -#define fright (&J->fold.right) +#define fleft (J->fold.left) +#define fright (J->fold.right) #define knumleft (ir_knum(fleft)->n) #define knumright (ir_knum(fright)->n) @@ -2393,10 +2393,14 @@ retry: if (fins->op1 >= J->cur.nk) { key += (uint32_t)IR(fins->op1)->o << 10; *fleft = *IR(fins->op1); + if (fins->op1 < REF_TRUE) + fleft[1] = IR(fins->op1)[1]; } if (fins->op2 >= J->cur.nk) { key += (uint32_t)IR(fins->op2)->o; *fright = *IR(fins->op2); + if (fins->op2 < REF_TRUE) + fright[1] = IR(fins->op2)[1]; } else { key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */ } diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c index 5549b0d0..92ecbb48 100644 --- a/src/lj_opt_mem.c +++ b/src/lj_opt_mem.c @@ -22,8 +22,8 @@ /* Some local macros to save typing. Undef'd at the end. */ #define IR(ref) (&J->cur.ir[(ref)]) #define fins (&J->fold.ins) -#define fleft (&J->fold.left) -#define fright (&J->fold.right) +#define fleft (J->fold.left) +#define fright (J->fold.right) /* ** Caveat #1: return value is not always a TRef -- only use with tref_ref(). diff --git a/src/lj_opt_sink.c b/src/lj_opt_sink.c index 49e13784..1b775f2d 100644 --- a/src/lj_opt_sink.c +++ b/src/lj_opt_sink.c @@ -220,6 +220,8 @@ static void sink_sweep_ins(jit_State *J) for (ir = IR(J->cur.nk); ir < irbase; ir++) { irt_clearmark(ir->t); ir->prev = REGSP_INIT; + if (irt_is64(ir->t) && ir->o != IR_KNULL) + ir++; } } diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c index 49c9ae47..19818660 100644 --- a/src/lj_opt_split.c +++ b/src/lj_opt_split.c @@ -354,6 +354,8 @@ static void split_ir(jit_State *J) ir->prev = ref; /* Identity substitution for loword. */ hisubst[ref] = 0; } + if (irt_is64(ir->t) && ir->o != IR_KNULL) + ref++; } /* Process old IR instructions. */ diff --git a/src/lj_record.c b/src/lj_record.c index b5fb6649..3b754897 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -61,7 +61,10 @@ static void rec_check_ir(jit_State *J) case IRMref: lua_assert(op1 >= nk); lua_assert(i >= REF_BIAS ? op1 < i : op1 > i); break; case IRMlit: break; - case IRMcst: lua_assert(i < REF_BIAS); continue; + case IRMcst: lua_assert(i < REF_BIAS); + if (irt_is64(ir->t) && ir->o != IR_KNULL) + i++; + continue; } switch (irm_op2(mode)) { case IRMnone: lua_assert(op2 == 0); break; diff --git a/src/lj_snap.c b/src/lj_snap.c index 8638d9ed..6199b1f0 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c @@ -371,8 +371,8 @@ static TRef snap_replay_const(jit_State *J, IRIns *ir) case IR_KPRI: return TREF_PRI(irt_type(ir->t)); case IR_KINT: return lj_ir_kint(J, ir->i); case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); - case IR_KNUM: return lj_ir_k64(J, IR_KNUM, ir_knum(ir)); - case IR_KINT64: return lj_ir_k64(J, IR_KINT64, ir_kint64(ir)); + case IR_KNUM: case IR_KINT64: + return lj_ir_k64(J, (IROp)ir->o, ir_k64(ir)->u64); case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */ default: lua_assert(0); return TREF_NIL; break; } @@ -555,8 +555,7 @@ void lj_snap_replay(jit_State *J, GCtrace *T) if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) { uint64_t k = (uint32_t)T->ir[irs->op2].i + ((uint64_t)T->ir[(irs+1)->op2].i << 32); - val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, - lj_ir_k64_find(J, k)); + val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, k); } else { val = emitir_raw(IRT(IR_HIOP, t), val, snap_pref(J, T, map, nent, seen, (irs+1)->op2)); @@ -651,7 +650,7 @@ static void snap_restoredata(GCtrace *T, ExitState *ex, uint64_t tmp; if (irref_isk(ref)) { if (ir->o == IR_KNUM || ir->o == IR_KINT64) { - src = mref(ir->ptr, int32_t); + src = (int32_t *)&ir[1]; } else if (sz == 8) { tmp = (uint64_t)(uint32_t)ir->i; src = (int32_t *)&tmp;