From 2868715d80b6ac497a7f08393ec325b60d71df8d Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 23 May 2016 06:01:54 +0200 Subject: [PATCH] x64/LJ_GC64: Add missing backend support and enable JIT compilation. Contributed by Peter Cawley. --- src/lj_arch.h | 2 +- src/lj_asm.c | 11 ++ src/lj_asm_x86.h | 430 ++++++++++++++++++++++++++++++++++++++------ src/lj_emit_x86.h | 121 ++++++++++--- src/lj_ffrecord.c | 5 + src/lj_ir.h | 8 +- src/lj_record.c | 6 + src/lj_snap.c | 9 +- src/lj_target_x86.h | 12 +- src/vm_x64.dasc | 3 +- 10 files changed, 517 insertions(+), 90 deletions(-) diff --git a/src/lj_arch.h b/src/lj_arch.h index 72622a21..3c3c98b1 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -453,7 +453,7 @@ #endif /* Disable or enable the JIT compiler. */ -#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) || LJ_GC64 +#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) #define LJ_HASJIT 0 #else #define LJ_HASJIT 1 diff --git a/src/lj_asm.c b/src/lj_asm.c index 5dd7ca3a..dba5c178 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -346,6 +346,12 @@ static Reg ra_rematk(ASMState *as, IRRef ref) #if LJ_64 } else if (ir->o == IR_KINT64) { emit_loadu64(as, r, ir_kint64(ir)->u64); +#if LJ_GC64 + } else if (ir->o == IR_KGC) { + emit_loadu64(as, r, (uintptr_t)ir_kgc(ir)); + } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { + emit_loadu64(as, r, (uintptr_t)ir_kptr(ir)); +#endif #endif } else { lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || @@ -1920,8 +1926,12 @@ static void asm_tail_link(ASMState *as) if (bc_isret(bc_op(*retpc))) pc = retpc; } +#if LJ_GC64 + emit_loadu64(as, RID_LPC, u64ptr(pc)); +#else ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); ra_allockreg(as, i32ptr(pc), RID_LPC); +#endif mres = (int32_t)(snap->nslots - baseslot - LJ_FR2); switch (bc_op(*pc)) { case BC_CALLM: case BC_CALLMT: @@ -2314,6 +2324,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T) as->curins = as->T->snap[0].ref; asm_snap_prep(as); /* The GC check is a guard. */ asm_gc_check(as); + as->curins = as->stopins; } ra_evictk(as); if (as->parent) diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 83fe22b2..7d07336a 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -21,12 +21,14 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) } /* Push the high byte of the exitno for each exit stub group. */ *mxp++ = XI_PUSHi8; *mxp++ = (MCode)((group*EXITSTUBS_PER_GROUP)>>8); +#if !LJ_GC64 /* Store DISPATCH at original stack slot 0. Account for the two push ops. */ *mxp++ = XI_MOVmi; *mxp++ = MODRM(XM_OFS8, 0, RID_ESP); *mxp++ = MODRM(XM_SCALE1, RID_ESP, RID_ESP); *mxp++ = 2*sizeof(void *); *(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4; +#endif /* Jump to exit handler which fills in the ExitState. */ *mxp++ = XI_JMP; mxp += 4; *((int32_t *)(mxp-4)) = jmprel(mxp, (MCode *)(void *)lj_vm_exit_handler); @@ -62,10 +64,14 @@ static void asm_guardcc(ASMState *as, int cc) target = p; cc ^= 1; if (as->realign) { + if (LJ_GC64 && LJ_UNLIKELY(as->mrm.base == RID_RIP)) + as->mrm.ofs += 2; /* Fixup RIP offset for pending fused load. */ emit_sjcc(as, cc, target); return; } } + if (LJ_GC64 && LJ_UNLIKELY(as->mrm.base == RID_RIP)) + as->mrm.ofs += 6; /* Fixup RIP offset for pending fused load. */ emit_jcc(as, cc, target); } @@ -79,6 +85,15 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k) { if (irref_isk(ref)) { IRIns *ir = IR(ref); +#if LJ_GC64 + if (ir->o == IR_KNULL || !irt_is64(ir->t)) { + *k = ir->i; + return 1; + } else if (checki32((int64_t)ir_k64(ir)->u64)) { + *k = (int32_t)ir_k64(ir)->u64; + return 1; + } +#else if (ir->o != IR_KINT64) { *k = ir->i; return 1; @@ -86,6 +101,7 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k) *k = (int32_t)ir_kint64(ir)->u64; return 1; } +#endif } return 0; } @@ -185,9 +201,19 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow) if (irref_isk(ir->op1)) { GCfunc *fn = ir_kfunc(IR(ir->op1)); GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv; +#if LJ_GC64 + int64_t ofs = dispofs(as, &uv->tv); + if (checki32(ofs) && checki32(ofs+4)) { + as->mrm.ofs = (int32_t)ofs; + as->mrm.base = RID_DISPATCH; + as->mrm.idx = RID_NONE; + return; + } +#else as->mrm.ofs = ptr2addr(&uv->tv); as->mrm.base = as->mrm.idx = RID_NONE; return; +#endif } break; default: @@ -207,17 +233,38 @@ static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow) lua_assert(ir->o == IR_FLOAD || ir->o == IR_FREF); as->mrm.idx = RID_NONE; if (ir->op1 == REF_NIL) { +#if LJ_GC64 + as->mrm.ofs = (int32_t)ir->op2 - GG_OFS(dispatch); + as->mrm.base = RID_DISPATCH; +#else as->mrm.ofs = (int32_t)ir->op2 + ptr2addr(J2GG(as->J)); as->mrm.base = RID_NONE; +#endif return; } as->mrm.ofs = field_ofs[ir->op2]; if (irref_isk(ir->op1)) { - as->mrm.ofs += IR(ir->op1)->i; + IRIns *op1 = IR(ir->op1); +#if LJ_GC64 + if (ir->op1 == REF_NIL) { + as->mrm.ofs -= GG_OFS(dispatch); + as->mrm.base = RID_DISPATCH; + return; + } else if (op1->o == IR_KPTR || op1->o == IR_KKPTR) { + intptr_t ofs = dispofs(as, ir_kptr(op1)); + if (checki32(as->mrm.ofs + ofs)) { + as->mrm.ofs += (int32_t)ofs; + as->mrm.base = RID_DISPATCH; + return; + } + } +#else + as->mrm.ofs += op1->i; as->mrm.base = RID_NONE; - } else { - as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow); + return; +#endif } + as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow); } /* Fuse string reference into memory operand. */ @@ -228,7 +275,7 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow) as->mrm.base = as->mrm.idx = RID_NONE; as->mrm.scale = XM_SCALE1; as->mrm.ofs = sizeof(GCstr); - if (irref_isk(ir->op1)) { + if (!LJ_GC64 && irref_isk(ir->op1)) { as->mrm.ofs += IR(ir->op1)->i; } else { Reg r = ra_alloc1(as, ir->op1, allow); @@ -260,10 +307,20 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow) IRIns *ir = IR(ref); as->mrm.idx = RID_NONE; if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { +#if LJ_GC64 + intptr_t ofs = dispofs(as, ir_kptr(ir)); + if (checki32(ofs)) { + as->mrm.ofs = (int32_t)ofs; + as->mrm.base = RID_DISPATCH; + return; + } + } if (0) { +#else as->mrm.ofs = ir->i; as->mrm.base = RID_NONE; } else if (ir->o == IR_STRREF) { asm_fusestrref(as, ir, allow); +#endif } else { as->mrm.ofs = 0; if (canfuse(as, ir) && ir->o == IR_ADD && ra_noreg(ir->r)) { @@ -310,13 +367,41 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow) static Reg asm_fuseloadk64(ASMState *as, IRIns *ir) { const uint64_t *k = &ir_k64(ir)->u64; - as->mrm.ofs = ptr2addr(k); - as->mrm.base = RID_NONE; + if (!LJ_GC64 || checki32((intptr_t)k)) { + as->mrm.ofs = ptr2addr(k); + as->mrm.base = RID_NONE; +#if LJ_GC64 + } else if (checki32(dispofs(as, k))) { + as->mrm.ofs = (int32_t)dispofs(as, k); + as->mrm.base = RID_DISPATCH; + } else if (checki32(mcpofs(as, k)) && checki32(mcpofs(as, k+1)) && + checki32(mctopofs(as, k)) && checki32(mctopofs(as, k+1))) { + as->mrm.ofs = (int32_t)mcpofs(as, k); + as->mrm.base = RID_RIP; + } else { + if (ir->i) { + lua_assert(*k == *(uint64_t*)(as->mctop - ir->i)); + } else { + while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3; + *(uint64_t*)as->mcbot = *k; + ir->i = (int32_t)(as->mctop - as->mcbot); + as->mcbot += 8; + as->mclim = as->mcbot + MCLIM_REDZONE; + } + as->mrm.ofs = (int32_t)mcpofs(as, as->mctop - ir->i); + as->mrm.base = RID_RIP; +#endif + } as->mrm.idx = RID_NONE; return RID_MRM; } -/* Fuse load into memory operand. */ +/* Fuse load into memory operand. +** +** Important caveat: this may emit RIP-relative loads! So don't place any +** code emitters between this function and the use of its result. +** The only permitted exception is asm_guardcc(). +*/ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) { IRIns *ir = IR(ref); @@ -346,7 +431,8 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR; if (ir->o == IR_SLOAD) { if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) && - noconflict(as, ref, IR_RETF, 0)) { + noconflict(as, ref, IR_RETF, 0) && + !(LJ_GC64 && irt_isaddr(ir->t))) { as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow); as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) + (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0); @@ -361,7 +447,8 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) return RID_MRM; } } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) { - if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0)) { + if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0) && + !(LJ_GC64 && irt_isaddr(ir->t))) { asm_fuseahuref(as, ir->op1, xallow); return RID_MRM; } @@ -374,7 +461,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) asm_fusexref(as, ir->op1, xallow); return RID_MRM; } - } else if (ir->o == IR_VLOAD) { + } else if (ir->o == IR_VLOAD && !(LJ_GC64 && irt_isaddr(ir->t))) { asm_fuseahuref(as, ir->op1, xallow); return RID_MRM; } @@ -499,8 +586,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) if (r) { /* Argument is in a register. */ if (r < RID_MAX_GPR && ref < ASMREF_TMP1) { #if LJ_64 - if (ir->o == IR_KINT64) - emit_loadu64(as, r, ir_kint64(ir)->u64); + if (LJ_GC64 ? ir->o != IR_KINT : ir->o == IR_KINT64) + emit_loadu64(as, r, ir_k64(ir)->u64); else #endif emit_loadi(as, r, ir->i); @@ -668,7 +755,7 @@ static void asm_retf(ASMState *as, IRIns *ir) emit_addptr(as, base, -8*delta); asm_guardcc(as, CC_NE); #if LJ_FR2 - emit_rmro(as, XO_CMP, rpc, base, -8); + emit_rmro(as, XO_CMP, rpc|REX_GC64, base, -8); emit_loadu64(as, rpc, u64ptr(pc)); #else emit_gmroi(as, XG_ARITHi(XOg_CMP), base, -4, ptr2addr(pc)); @@ -696,8 +783,9 @@ static void asm_tobit(ASMState *as, IRIns *ir) Reg tmp = ra_noreg(IR(ir->op1)->r) ? ra_alloc1(as, ir->op1, RSET_FPR) : ra_scratch(as, RSET_FPR); - Reg right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp)); + Reg right; emit_rr(as, XO_MOVDto, tmp, dest); + right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp)); emit_mrm(as, XO_ADDSD, tmp, right); ra_left(as, tmp, ir->op1); } @@ -768,13 +856,12 @@ static void asm_conv(ASMState *as, IRIns *ir) emit_rr(as, op, dest|REX_64, tmp); ra_left(as, tmp, lref); } else { - Reg left = asm_fuseload(as, lref, RSET_FPR); if (LJ_64 && irt_isu32(ir->t)) emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */ emit_mrm(as, op, dest|((LJ_64 && (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0), - left); + asm_fuseload(as, lref, RSET_FPR)); } } } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ @@ -952,6 +1039,24 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) emit_rmro(as, XO_LEA, dest|REX_64, RID_ESP, ra_spill(as, ir)); } else { /* Otherwise use g->tmptv to hold the TValue. */ +#if LJ_GC64 + if (irref_isk(ref)) { + TValue k; + lj_ir_kvalue(as->J->L, &k, ir); + emit_movmroi(as, dest, 4, k.u32.hi); + emit_movmroi(as, dest, 0, k.u32.lo); + } else { + /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */ + Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest)); + if (irt_is64(ir->t)) { + emit_u32(as, irt_toitype(ir->t) << 15); + emit_rmro(as, XO_ARITHi, XOg_OR, dest, 4); + } else { + emit_movmroi(as, dest, 4, (irt_toitype(ir->t) << 15) | 0x7fff); + } + emit_movtomro(as, REX_64IR(ir, src), dest, 0); + } +#else if (!irref_isk(ref)) { Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest)); emit_movtomro(as, REX_64IR(ir, src), dest, 0); @@ -960,6 +1065,7 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) } if (!(LJ_64 && irt_islightud(ir->t))) emit_movmroi(as, dest, 4, irt_toitype(ir->t)); +#endif emit_loada(as, dest, &J2G(as->J)->tmptv); } } @@ -969,9 +1075,9 @@ static void asm_aref(ASMState *as, IRIns *ir) Reg dest = ra_dest(as, ir, RSET_GPR); asm_fusearef(as, ir, RSET_GPR); if (!(as->mrm.idx == RID_NONE && as->mrm.ofs == 0)) - emit_mrm(as, XO_LEA, dest, RID_MRM); + emit_mrm(as, XO_LEA, dest|REX_GC64, RID_MRM); else if (as->mrm.base != dest) - emit_rr(as, XO_MOV, dest, as->mrm.base); + emit_rr(as, XO_MOV, dest|REX_GC64, as->mrm.base); } /* Inlined hash lookup. Specialized for key type and for const keys. @@ -998,7 +1104,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) if (!isk) { rset_clear(allow, tab); key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow); - if (!irt_isstr(kt)) + if (LJ_GC64 || !irt_isstr(kt)) tmp = ra_scratch(as, rset_exclude(allow, key)); } @@ -1011,8 +1117,8 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) /* Follow hash chain until the end. */ l_loop = emit_sjcc_label(as, CC_NZ); - emit_rr(as, XO_TEST, dest, dest); - emit_rmro(as, XO_MOV, dest, dest, offsetof(Node, next)); + emit_rr(as, XO_TEST, dest|REX_GC64, dest); + emit_rmro(as, XO_MOV, dest|REX_GC64, dest, offsetof(Node, next)); l_next = emit_label(as); /* Type and value comparison. */ @@ -1033,7 +1139,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n)); emit_sjcc(as, CC_AE, l_next); /* The type check avoids NaN penalties and complaints from Valgrind. */ -#if LJ_64 +#if LJ_64 && !LJ_GC64 emit_u32(as, LJ_TISNUM); emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it)); #else @@ -1041,10 +1147,27 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); #endif } -#if LJ_64 +#if LJ_64 && !LJ_GC64 } else if (irt_islightud(kt)) { emit_rmro(as, XO_CMP, key|REX_64, dest, offsetof(Node, key.u64)); -#endif +#elif LJ_GC64 + } else if (irt_isaddr(kt)) { + if (isk) { + TValue k; + k.u64 = ((uint64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; + emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.lo), + k.u32.lo); + emit_sjcc(as, CC_NE, l_next); + emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.hi), + k.u32.hi); + } else { + emit_rmro(as, XO_CMP, tmp|REX_64, dest, offsetof(Node, key.u64)); + } + } else { + lua_assert(irt_ispri(kt) && !irt_isnil(kt)); + emit_u32(as, (irt_toitype(kt)<<15)|0x7fff); + emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it)); +#else } else { if (!irt_ispri(kt)) { lua_assert(irt_isaddr(kt)); @@ -1058,16 +1181,23 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) lua_assert(!irt_isnil(kt)); emit_i8(as, irt_toitype(kt)); emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); +#endif } emit_sfixup(as, l_loop); checkmclim(as); +#if LJ_GC64 + if (!isk && irt_isaddr(kt)) { + emit_rr(as, XO_OR, tmp|REX_64, key); + emit_loadu64(as, tmp, (uint64_t)irt_toitype(kt) << 47); + } +#endif /* Load main position relative to tab->node into dest. */ khash = isk ? ir_khash(irkey) : 1; if (khash == 0) { - emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, node)); + emit_rmro(as, XO_MOV, dest|REX_GC64, tab, offsetof(GCtab, node)); } else { - emit_rmro(as, XO_ARITH(XOg_ADD), dest, tab, offsetof(GCtab, node)); + emit_rmro(as, XO_ARITH(XOg_ADD), dest|REX_GC64, tab, offsetof(GCtab,node)); if ((as->flags & JIT_F_PREFER_IMUL)) { emit_i8(as, sizeof(Node)); emit_rr(as, XO_IMULi8, dest, dest); @@ -1122,11 +1252,11 @@ static void asm_hrefk(ASMState *as, IRIns *ir) if (ra_hasreg(dest)) { if (ofs != 0) { if (dest == node && !(as->flags & JIT_F_LEA_AGU)) - emit_gri(as, XG_ARITHi(XOg_ADD), dest, ofs); + emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, ofs); else - emit_rmro(as, XO_LEA, dest, node, ofs); + emit_rmro(as, XO_LEA, dest|REX_GC64, node, ofs); } else if (dest != node) { - emit_rr(as, XO_MOV, dest, node); + emit_rr(as, XO_MOV, dest|REX_GC64, node); } } asm_guardcc(as, CC_NE); @@ -1138,13 +1268,24 @@ static void asm_hrefk(ASMState *as, IRIns *ir) lua_assert(irt_isnum(irkey->t) || irt_isgcv(irkey->t)); /* Assumes -0.0 is already canonicalized to +0.0. */ emit_loadu64(as, key, irt_isnum(irkey->t) ? ir_knum(irkey)->u64 : +#if LJ_GC64 + ((uint64_t)irt_toitype(irkey->t) << 47) | + (uint64_t)ir_kgc(irkey)); +#else ((uint64_t)irt_toitype(irkey->t) << 32) | (uint64_t)(uint32_t)ptr2addr(ir_kgc(irkey))); +#endif } else { lua_assert(!irt_isnil(irkey->t)); +#if LJ_GC64 + emit_i32(as, (irt_toitype(irkey->t)<<15)|0x7fff); + emit_rmro(as, XO_ARITHi, XOg_CMP, node, + ofs + (int32_t)offsetof(Node, key.it)); +#else emit_i8(as, irt_toitype(irkey->t)); emit_rmro(as, XO_ARITHi8, XOg_CMP, node, ofs + (int32_t)offsetof(Node, key.it)); +#endif } #else l_exit = emit_label(as); @@ -1179,20 +1320,21 @@ static void asm_uref(ASMState *as, IRIns *ir) if (irref_isk(ir->op1)) { GCfunc *fn = ir_kfunc(IR(ir->op1)); MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; - emit_rma(as, XO_MOV, dest, v); + emit_rma(as, XO_MOV, dest|REX_GC64, v); } else { Reg uv = ra_scratch(as, RSET_GPR); Reg func = ra_alloc1(as, ir->op1, RSET_GPR); if (ir->o == IR_UREFC) { - emit_rmro(as, XO_LEA, dest, uv, offsetof(GCupval, tv)); + emit_rmro(as, XO_LEA, dest|REX_GC64, uv, offsetof(GCupval, tv)); asm_guardcc(as, CC_NE); emit_i8(as, 1); emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed)); } else { - emit_rmro(as, XO_MOV, dest, uv, offsetof(GCupval, v)); + emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v)); } - emit_rmro(as, XO_MOV, uv, func, - (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); + emit_rmro(as, XO_MOV, uv|REX_GC64, func, + (int32_t)offsetof(GCfuncL, uvptr) + + (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); } } @@ -1210,9 +1352,9 @@ static void asm_strref(ASMState *as, IRIns *ir) if (as->mrm.base == RID_NONE) emit_loadi(as, dest, as->mrm.ofs); else if (as->mrm.base == dest && as->mrm.idx == RID_NONE) - emit_gri(as, XG_ARITHi(XOg_ADD), dest, as->mrm.ofs); + emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, as->mrm.ofs); else - emit_mrm(as, XO_LEA, dest, RID_MRM); + emit_mrm(as, XO_LEA, dest|REX_GC64, RID_MRM); } /* -- Loads and stores ---------------------------------------------------- */ @@ -1281,7 +1423,7 @@ static void asm_fxstore(ASMState *as, IRIns *ir) case IRT_I16: case IRT_U16: xo = XO_MOVtow; break; case IRT_NUM: xo = XO_MOVSDto; break; case IRT_FLOAT: xo = XO_MOVSSto; break; -#if LJ_64 +#if LJ_64 && !LJ_GC64 case IRT_LIGHTUD: lua_assert(0); /* NYI: mask 64 bit lightuserdata. */ #endif default: @@ -1313,7 +1455,7 @@ static void asm_fxstore(ASMState *as, IRIns *ir) #define asm_fstore(as, ir) asm_fxstore(as, ir) #define asm_xstore(as, ir) asm_fxstore(as, ir) -#if LJ_64 +#if LJ_64 && !LJ_GC64 static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) { if (ra_used(ir) || typecheck) { @@ -1335,9 +1477,12 @@ static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) static void asm_ahuvload(ASMState *as, IRIns *ir) { +#if LJ_GC64 + Reg tmp = RID_NONE; +#endif lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isint(ir->t))); -#if LJ_64 +#if LJ_64 && !LJ_GC64 if (irt_islightud(ir->t)) { Reg dest = asm_load_lightud64(as, ir, 1); if (ra_hasreg(dest)) { @@ -1351,20 +1496,64 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; Reg dest = ra_dest(as, ir, allow); asm_fuseahuref(as, ir->op1, RSET_GPR); +#if LJ_GC64 + if (irt_isaddr(ir->t)) { + emit_shifti(as, XOg_SHR|REX_64, dest, 17); + asm_guardcc(as, CC_NE); + emit_i8(as, irt_toitype(ir->t)); + emit_rr(as, XO_ARITHi8, XOg_CMP, dest); + emit_i8(as, XI_O16); + if ((as->flags & JIT_F_BMI2)) { + emit_i8(as, 47); + emit_mrm(as, XV_RORX|VEX_64, dest, RID_MRM); + } else { + emit_shifti(as, XOg_ROR|REX_64, dest, 47); + emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM); + } + return; + } else +#endif emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM); } else { - asm_fuseahuref(as, ir->op1, RSET_GPR); + RegSet gpr = RSET_GPR; +#if LJ_GC64 + if (irt_isaddr(ir->t)) { + tmp = ra_scratch(as, RSET_GPR); + gpr = rset_exclude(gpr, tmp); + } +#endif + asm_fuseahuref(as, ir->op1, gpr); } /* Always do the type check, even if the load result is unused. */ as->mrm.ofs += 4; asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE); if (LJ_64 && irt_type(ir->t) >= IRT_NUM) { lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t)); +#if LJ_GC64 + emit_u32(as, LJ_TISNUM << 15); +#else emit_u32(as, LJ_TISNUM); +#endif emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM); +#if LJ_GC64 + } else if (irt_isaddr(ir->t)) { + as->mrm.ofs -= 4; + emit_i8(as, irt_toitype(ir->t)); + emit_mrm(as, XO_ARITHi8, XOg_CMP, tmp); + emit_shifti(as, XOg_SAR|REX_64, tmp, 47); + emit_mrm(as, XO_MOV, tmp|REX_64, RID_MRM); + } else if (irt_isnil(ir->t)) { + as->mrm.ofs -= 4; + emit_i8(as, -1); + emit_mrm(as, XO_ARITHi8, XOg_CMP|REX_64, RID_MRM); + } else { + emit_u32(as, (irt_toitype(ir->t) << 15) | 0x7fff); + emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM); +#else } else { emit_i8(as, irt_toitype(ir->t)); emit_mrm(as, XO_ARITHi8, XOg_CMP, RID_MRM); +#endif } } @@ -1376,11 +1565,27 @@ static void asm_ahustore(ASMState *as, IRIns *ir) Reg src = ra_alloc1(as, ir->op2, RSET_FPR); asm_fuseahuref(as, ir->op1, RSET_GPR); emit_mrm(as, XO_MOVSDto, src, RID_MRM); -#if LJ_64 +#if LJ_64 && !LJ_GC64 } else if (irt_islightud(ir->t)) { Reg src = ra_alloc1(as, ir->op2, RSET_GPR); asm_fuseahuref(as, ir->op1, rset_exclude(RSET_GPR, src)); emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM); +#endif +#if LJ_GC64 + } else if (irref_isk(ir->op2)) { + TValue k; + lj_ir_kvalue(as->J->L, &k, IR(ir->op2)); + asm_fuseahuref(as, ir->op1, RSET_GPR); + if (tvisnil(&k)) { + emit_i32(as, -1); + emit_mrm(as, XO_MOVmi, REX_64, RID_MRM); + } else { + emit_u32(as, k.u32.lo); + emit_mrm(as, XO_MOVmi, 0, RID_MRM); + as->mrm.ofs += 4; + emit_u32(as, k.u32.hi); + emit_mrm(as, XO_MOVmi, 0, RID_MRM); + } #endif } else { IRIns *irr = IR(ir->op2); @@ -1392,6 +1597,17 @@ static void asm_ahustore(ASMState *as, IRIns *ir) } asm_fuseahuref(as, ir->op1, allow); if (ra_hasreg(src)) { +#if LJ_GC64 + if (!(LJ_DUALNUM && irt_isinteger(ir->t))) { + /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */ + as->mrm.ofs += 4; + emit_u32(as, irt_toitype(ir->t) << 15); + emit_mrm(as, XO_ARITHi, XOg_OR, RID_MRM); + as->mrm.ofs -= 4; + emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM); + return; + } +#endif emit_mrm(as, XO_MOVto, src, RID_MRM); } else if (!irt_ispri(irr->t)) { lua_assert(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t))); @@ -1399,7 +1615,12 @@ static void asm_ahustore(ASMState *as, IRIns *ir) emit_mrm(as, XO_MOVmi, 0, RID_MRM); } as->mrm.ofs += 4; +#if LJ_GC64 + lua_assert(LJ_DUALNUM && irt_isinteger(ir->t)); + emit_i32(as, LJ_TNUMX << 15); +#else emit_i32(as, (int32_t)irt_toitype(ir->t)); +#endif emit_mrm(as, XO_MOVmi, 0, RID_MRM); } } @@ -1420,7 +1641,7 @@ static void asm_sload(ASMState *as, IRIns *ir) base = ra_alloc1(as, REF_BASE, RSET_GPR); emit_rmro(as, XO_MOVSD, left, base, ofs); t.irt = IRT_NUM; /* Continue with a regular number type check. */ -#if LJ_64 +#if LJ_64 && !LJ_GC64 } else if (irt_islightud(t)) { Reg dest = asm_load_lightud64(as, ir, (ir->op2 & IRSLOAD_TYPECHECK)); if (ra_hasreg(dest)) { @@ -1438,6 +1659,36 @@ static void asm_sload(ASMState *as, IRIns *ir) t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs); } else { +#if LJ_GC64 + if (irt_isaddr(t)) { + /* LJ_GC64 type check + tag removal without BMI2 and with BMI2: + ** + ** mov r64, [addr] rorx r64, [addr], 47 + ** ror r64, 47 + ** cmp r16, itype cmp r16, itype + ** jne ->exit jne ->exit + ** shr r64, 16 shr r64, 16 + */ + emit_shifti(as, XOg_SHR|REX_64, dest, 17); + if ((ir->op2 & IRSLOAD_TYPECHECK)) { + asm_guardcc(as, CC_NE); + emit_i8(as, irt_toitype(t)); + emit_rr(as, XO_ARITHi8, XOg_CMP, dest); + emit_i8(as, XI_O16); + } + if ((as->flags & JIT_F_BMI2)) { + emit_i8(as, 47); + emit_rmro(as, XV_RORX|VEX_64, dest, base, ofs); + } else { + if ((ir->op2 & IRSLOAD_TYPECHECK)) + emit_shifti(as, XOg_ROR|REX_64, dest, 47); + else + emit_shifti(as, XOg_SHL|REX_64, dest, 17); + emit_rmro(as, XO_MOV, dest|REX_64, base, ofs); + } + return; + } else +#endif emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs); } } else { @@ -1450,11 +1701,42 @@ static void asm_sload(ASMState *as, IRIns *ir) asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE); if (LJ_64 && irt_type(t) >= IRT_NUM) { lua_assert(irt_isinteger(t) || irt_isnum(t)); +#if LJ_GC64 + emit_u32(as, LJ_TISNUM << 15); +#else emit_u32(as, LJ_TISNUM); +#endif emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4); +#if LJ_GC64 + } else if (irt_isnil(t)) { + /* LJ_GC64 type check for nil: + ** + ** cmp qword [addr], -1 + ** jne ->exit + */ + emit_i8(as, -1); + emit_rmro(as, XO_ARITHi8, XOg_CMP|REX_64, base, ofs); + } else if (irt_ispri(t)) { + emit_u32(as, (irt_toitype(t) << 15) | 0x7fff); + emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4); + } else { + /* LJ_GC64 type check only: + ** + ** mov r64, [addr] + ** sar r64, 47 + ** cmp r32, itype + ** jne ->exit + */ + Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, base)); + emit_i8(as, irt_toitype(t)); + emit_rr(as, XO_ARITHi8, XOg_CMP, tmp); + emit_shifti(as, XOg_SAR|REX_64, tmp, 47); + emit_rmro(as, XO_MOV, tmp|REX_64, base, ofs+4); +#else } else { emit_i8(as, irt_toitype(t)); emit_rmro(as, XO_ARITHi8, XOg_CMP, base, ofs+4); +#endif } } } @@ -1548,7 +1830,7 @@ static void asm_tbar(ASMState *as, IRIns *ir) Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, tab)); MCLabel l_end = emit_label(as); - emit_movtomro(as, tmp, tab, offsetof(GCtab, gclist)); + emit_movtomro(as, tmp|REX_GC64, tab, offsetof(GCtab, gclist)); emit_setgl(as, tab, gc.grayagain); emit_getgl(as, tmp, gc.grayagain); emit_i8(as, ~LJ_GC_BLACK); @@ -2084,7 +2366,6 @@ static void asm_comp(ASMState *as, IRIns *ir) cc ^= (VCC_PS|(5<<4)); /* A <-> B, AE <-> BE, PS <-> none */ } left = ra_alloc1(as, lref, RSET_FPR); - right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left)); l_around = emit_label(as); asm_guardcc(as, cc >> 4); if (cc & VCC_P) { /* Extra CC_P branch required? */ @@ -2101,6 +2382,7 @@ static void asm_comp(ASMState *as, IRIns *ir) emit_jcc(as, CC_P, as->mcp); } } + right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left)); emit_mrm(as, XO_UCOMISD, left, right); } else { IRRef lref = ir->op1, rref = ir->op2; @@ -2377,13 +2659,18 @@ static void asm_stack_check(ASMState *as, BCReg topslot, emit_rmro(as, XO_MOV, r|REX_64, RID_ESP, 0); else ra_modified(as, r); - emit_gri(as, XG_ARITHi(XOg_CMP), r, (int32_t)(8*topslot)); + emit_gri(as, XG_ARITHi(XOg_CMP), r|REX_GC64, (int32_t)(8*topslot)); if (ra_hasreg(pbase) && pbase != r) - emit_rr(as, XO_ARITH(XOg_SUB), r, pbase); + emit_rr(as, XO_ARITH(XOg_SUB), r|REX_GC64, pbase); else +#if LJ_GC64 + emit_rmro(as, XO_ARITH(XOg_SUB), r|REX_64, RID_DISPATCH, + (int32_t)dispofs(as, &J2G(as->J)->jit_base)); +#else emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE, ptr2addr(&J2G(as->J)->jit_base)); - emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack)); +#endif + emit_rmro(as, XO_MOV, r|REX_GC64, r, offsetof(lua_State, maxstack)); emit_getgl(as, r, cur_L); if (allow == RSET_EMPTY) /* Spill temp. register. */ emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0); @@ -2414,18 +2701,44 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) (LJ_DUALNUM && irt_isinteger(ir->t))); if (!irref_isk(ref)) { Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); +#if LJ_GC64 + if (irt_is64(ir->t)) { + /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */ + emit_u32(as, irt_toitype(ir->t) << 15); + emit_rmro(as, XO_ARITHi, XOg_OR, RID_BASE, ofs+4); + } else if (LJ_DUALNUM && irt_isinteger(ir->t)) { + emit_movmroi(as, RID_BASE, ofs+4, LJ_TISNUM << 15); + } else { + emit_movmroi(as, RID_BASE, ofs+4, (irt_toitype(ir->t)<<15)|0x7fff); + } +#endif emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs); +#if LJ_GC64 + } else { + TValue k; + lj_ir_kvalue(as->J->L, &k, ir); + if (tvisnil(&k)) { + emit_i32(as, -1); + emit_rmro(as, XO_MOVmi, REX_64, RID_BASE, ofs); + } else { + emit_movmroi(as, RID_BASE, ofs+4, k.u32.hi); + emit_movmroi(as, RID_BASE, ofs, k.u32.lo); + } +#else } else if (!irt_ispri(ir->t)) { emit_movmroi(as, RID_BASE, ofs, ir->i); +#endif } if ((sn & (SNAP_CONT|SNAP_FRAME))) { #if !LJ_FR2 if (s != 0) /* Do not overwrite link to previous frame. */ emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--)); #endif +#if !LJ_GC64 } else { if (!(LJ_64 && irt_islightud(ir->t))) emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); +#endif } } checkmclim(as); @@ -2451,11 +2764,15 @@ static void asm_gc_check(ASMState *as) args[1] = ASMREF_TMP2; /* MSize steps */ asm_gencall(as, ci, args); tmp = ra_releasetmp(as, ASMREF_TMP1); +#if LJ_GC64 + emit_rmro(as, XO_LEA, tmp|REX_64, RID_DISPATCH, GG_DISP2G); +#else emit_loada(as, tmp, J2G(as->J)); +#endif emit_loadi(as, ra_releasetmp(as, ASMREF_TMP2), as->gcsteps); /* Jump around GC step if GC total < GC threshold. */ emit_sjcc(as, CC_B, l_end); - emit_opgl(as, XO_ARITH(XOg_CMP), tmp, gc.threshold); + emit_opgl(as, XO_ARITH(XOg_CMP), tmp|REX_GC64, gc.threshold); emit_getgl(as, tmp, gc.total); as->gcsteps = 0; checkmclim(as); @@ -2520,7 +2837,7 @@ static void asm_head_root_base(ASMState *as) if (rset_test(as->modset, r) || irt_ismarked(ir->t)) ir->r = RID_INIT; /* No inheritance for modified BASE register. */ if (r != RID_BASE) - emit_rr(as, XO_MOV, r, RID_BASE); + emit_rr(as, XO_MOV, r|REX_GC64, RID_BASE); } } @@ -2536,8 +2853,9 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) if (irp->r == r) { rset_clear(allow, r); /* Mark same BASE register as coalesced. */ } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { + /* Move from coalesced parent reg. */ rset_clear(allow, irp->r); - emit_rr(as, XO_MOV, r, irp->r); /* Move from coalesced parent reg. */ + emit_rr(as, XO_MOV, r|REX_GC64, irp->r); } else { emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ } @@ -2750,13 +3068,19 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) MSize len = T->szmcode; MCode *px = exitstub_addr(J, exitno) - 6; MCode *pe = p+len-6; - uint32_t stateaddr = u32ptr(&J2G(J)->vmstate); +#if LJ_GC64 + uint32_t statei = (uint32_t)(GG_OFS(g.vmstate) - GG_OFS(dispatch)); +#else + uint32_t statei = u32ptr(&J2G(J)->vmstate); +#endif if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px) *(int32_t *)(p+len-4) = jmprel(p+len, target); /* Do not patch parent exit for a stack check. Skip beyond vmstate update. */ - for (; p < pe; p += asm_x86_inslen(p)) - if (*(uint32_t *)(p+(LJ_64 ? 3 : 2)) == stateaddr && p[0] == XI_MOVmi) + for (; p < pe; p += asm_x86_inslen(p)) { + intptr_t ofs = LJ_GC64 ? (p[0] & 0xf0) == 0x40 : LJ_64; + if (*(uint32_t *)(p+2+ofs) == statei && p[ofs+LJ_GC64-LJ_64] == XI_MOVmi) break; + } lua_assert(p < pe); for (; p < pe; p += asm_x86_inslen(p)) if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px) diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h index 3d6f13f4..f0bca938 100644 --- a/src/lj_emit_x86.h +++ b/src/lj_emit_x86.h @@ -20,6 +20,11 @@ #define REX_64 0 #define VEX_64 0 #endif +#if LJ_GC64 +#define REX_GC64 REX_64 +#else +#define REX_GC64 0 +#endif #define emit_i8(as, i) (*--as->mcp = (MCode)(i)) #define emit_i32(as, i) (*(int32_t *)(as->mcp-4) = (i), as->mcp -= 4) @@ -94,26 +99,17 @@ static int32_t ptr2addr(const void *p) #define ptr2addr(p) (i32ptr((p))) #endif -/* op r, [addr] */ -static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr) -{ - MCode *p = as->mcp; - *(int32_t *)(p-4) = ptr2addr(addr); -#if LJ_64 - p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP); - as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5); -#else - as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4); -#endif -} - /* op r, [base+ofs] */ static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs) { MCode *p = as->mcp; x86Mode mode; if (ra_hasreg(rb)) { - if (ofs == 0 && (rb&7) != RID_EBP) { + if (LJ_GC64 && rb == RID_RIP) { + mode = XM_OFS0; + p -= 4; + *(int32_t *)p = ofs; + } else if (ofs == 0 && (rb&7) != RID_EBP) { mode = XM_OFS0; } else if (checki8(ofs)) { *--p = (MCode)ofs; @@ -211,6 +207,11 @@ static void emit_mrm(ASMState *as, x86Op xo, Reg rr, Reg rb) *--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP); rb = RID_ESP; #endif + } else if (LJ_GC64 && rb == RID_RIP) { + lua_assert(as->mrm.idx == RID_NONE); + mode = XM_OFS0; + p -= 4; + *(int32_t *)p = as->mrm.ofs; } else { if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) { mode = XM_OFS0; @@ -264,8 +265,8 @@ static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) /* Get/set global_State fields. */ #define emit_opgl(as, xo, r, field) \ emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field) -#define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r), field) -#define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r), field) +#define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r)|REX_GC64, field) +#define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r)|REX_GC64, field) #define emit_setvmstate(as, i) \ (emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, vmstate)) @@ -288,9 +289,21 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i) } } +#if LJ_GC64 +#define dispofs(as, k) \ + ((intptr_t)((uintptr_t)(k) - (uintptr_t)J2GG(as->J)->dispatch)) +#define mcpofs(as, k) \ + ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mcp)) +#define mctopofs(as, k) \ + ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mctop)) +/* mov r, addr */ +#define emit_loada(as, r, addr) \ + emit_loadu64(as, (r), (uintptr_t)(addr)) +#else /* mov r, addr */ #define emit_loada(as, r, addr) \ emit_loadi(as, (r), ptr2addr((addr))) +#endif #if LJ_64 /* mov r, imm64 or shorter 32 bit extended load. */ @@ -302,6 +315,15 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) MCode *p = as->mcp; *(int32_t *)(p-4) = (int32_t)u64; as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4); +#if LJ_GC64 + } else if (checki32(dispofs(as, u64))) { + emit_rmro(as, XO_LEA, r|REX_64, RID_DISPATCH, (int32_t)dispofs(as, u64)); + } else if (checki32(mcpofs(as, u64)) && checki32(mctopofs(as, u64))) { + /* Since as->realign assumes the code size doesn't change, check + ** RIP-relative addressing reachability for both as->mcp and as->mctop. + */ + emit_rmro(as, XO_LEA, r|REX_64, RID_RIP, (int32_t)mcpofs(as, u64)); +#endif } else { /* Full-size 64 bit load. */ MCode *p = as->mcp; *(uint64_t *)(p-8) = u64; @@ -313,22 +335,69 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) } #endif +/* op r, [addr] */ +static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr) +{ +#if LJ_GC64 + if (checki32(dispofs(as, addr))) { + emit_rmro(as, xo, rr, RID_DISPATCH, (int32_t)dispofs(as, addr)); + } else if (checki32(mcpofs(as, addr)) && checki32(mctopofs(as, addr))) { + emit_rmro(as, xo, rr, RID_RIP, (int32_t)mcpofs(as, addr)); + } else if (!checki32((intptr_t)addr) && (xo == XO_MOV || xo == XO_MOVSD)) { + emit_rmro(as, xo, rr, rr, 0); + emit_loadu64(as, rr, (uintptr_t)addr); + } else +#endif + { + MCode *p = as->mcp; + *(int32_t *)(p-4) = ptr2addr(addr); +#if LJ_64 + p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP); + as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5); +#else + as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4); +#endif + } +} + /* Load 64 bit IR constant into register. */ static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) { + Reg r64; + x86Op xo; const uint64_t *k = &ir_k64(ir)->u64; if (rset_test(RSET_FPR, r)) { - if (*k == 0) { - emit_rr(as, XO_XORPS, r, r); - } else { - emit_rma(as, XO_MOVSD, r, k); - } + r64 = r; + xo = XO_MOVSD; } else { - if (*k == 0) { - emit_rr(as, XO_ARITH(XOg_XOR), r, r); + r64 = r | REX_64; + xo = XO_MOV; + } + if (*k == 0) { + emit_rr(as, rset_test(RSET_FPR, r) ? XO_XORPS : XO_ARITH(XOg_XOR), r, r); +#if LJ_GC64 + } else if (checki32((intptr_t)k) || checki32(dispofs(as, k)) || + (checki32(mcpofs(as, k)) && checki32(mctopofs(as, k)))) { + emit_rma(as, xo, r64, k); + } else { + if (ir->i) { + lua_assert(*k == *(uint64_t*)(as->mctop - ir->i)); + } else if (as->curins <= as->stopins && rset_test(RSET_GPR, r)) { + emit_loadu64(as, r, *k); + return; } else { - emit_rma(as, XO_MOV, r | REX_64, k); + /* If all else fails, add the FP constant at the MCode area bottom. */ + while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3; + *(uint64_t *)as->mcbot = *k; + ir->i = (int32_t)(as->mctop - as->mcbot); + as->mcbot += 8; + as->mclim = as->mcbot + MCLIM_REDZONE; } + emit_rmro(as, xo, r64, RID_RIP, (int32_t)mcpofs(as, as->mctop - ir->i)); +#else + } else { + emit_rma(as, xo, r64, k); +#endif } } @@ -470,9 +539,9 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs) { if (ofs) { if ((as->flags & JIT_F_LEA_AGU)) - emit_rmro(as, XO_LEA, r, r, ofs); + emit_rmro(as, XO_LEA, r|REX_GC64, r, ofs); else - emit_gri(as, XG_ARITHi(XOg_ADD), r, ofs); + emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs); } } diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index 64a9a65d..6d141a20 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c @@ -1114,8 +1114,13 @@ static TRef recff_io_fp(jit_State *J, TRef *udp, int32_t id) { TRef tr, ud, fp; if (id) { /* io.func() */ +#if LJ_GC64 + /* TODO: fix ARM32 asm_fload(), so we can use this for all archs. */ + ud = lj_ir_ggfload(J, IRT_UDATA, GG_OFS(g.gcroot[id])); +#else tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]); ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0); +#endif } else { /* fp:method() */ ud = J->base[0]; if (!tref_isudata(ud)) diff --git a/src/lj_ir.h b/src/lj_ir.h index 4e9c85c7..e77f7b99 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h @@ -412,7 +412,7 @@ static LJ_AINLINE IRType itype2irt(const TValue *tv) static LJ_AINLINE uint32_t irt_toitype_(IRType t) { - lua_assert(!LJ_64 || t != IRT_LIGHTUD); + lua_assert(!LJ_64 || LJ_GC64 || t != IRT_LIGHTUD); if (LJ_DUALNUM && t > IRT_NUM) { return LJ_TISNUM; } else { @@ -568,7 +568,11 @@ typedef union IRIns { #define ir_knum(ir) check_exp((ir)->o == IR_KNUM, &(ir)[1].tv) #define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, &(ir)[1].tv) #define ir_k64(ir) \ - check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64, &(ir)[1].tv) + check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64 || \ + (LJ_GC64 && \ + ((ir)->o == IR_KGC || \ + (ir)->o == IR_KPTR || (ir)->o == IR_KKPTR)), \ + &(ir)[1].tv) #define ir_kptr(ir) \ check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, \ mref((ir)[LJ_GC64].ptr, void)) diff --git a/src/lj_record.c b/src/lj_record.c index f0481050..3c67e1a0 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -976,7 +976,13 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm) } /* The cdata metatable is treated as immutable. */ if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt; +#if LJ_GC64 + /* TODO: fix ARM32 asm_fload(), so we can use this for all archs. */ + ix->mt = mix.tab = lj_ir_ggfload(J, IRT_TAB, + GG_OFS(g.gcroot[GCROOT_BASEMT+itypemap(&ix->tabv)])); +#else ix->mt = mix.tab = lj_ir_ktab(J, mt); +#endif goto nocheck; } ix->mt = mt ? mix.tab : TREF_NIL; diff --git a/src/lj_snap.c b/src/lj_snap.c index 33c058be..0a08d4d4 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c @@ -623,7 +623,6 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, } if (LJ_UNLIKELY(bloomtest(rfilt, ref))) rs = snap_renameref(T, snapno, ref, rs); - lua_assert(!LJ_GC64); /* TODO_GC64: handle 64 bit references. */ if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */ int32_t *sps = &ex->spill[regsp_spill(rs)]; if (irt_isinteger(t)) { @@ -632,9 +631,11 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, } else if (irt_isnum(t)) { o->u64 = *(uint64_t *)sps; #endif - } else if (LJ_64 && irt_islightud(t)) { +#if LJ_64 && !LJ_GC64 + } else if (irt_islightud(t)) { /* 64 bit lightuserdata which may escape already has the tag bits. */ o->u64 = *(uint64_t *)sps; +#endif } else { lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t)); @@ -652,9 +653,11 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, } else if (irt_isnum(t)) { setnumV(o, ex->fpr[r-RID_MIN_FPR]); #endif - } else if (LJ_64 && irt_is64(t)) { +#if LJ_64 && !LJ_GC64 + } else if (irt_is64(t)) { /* 64 bit values that already have the tag bits. */ o->u64 = ex->gpr[r-RID_MIN_GPR]; +#endif } else if (irt_ispri(t)) { setpriV(o, irt_toitype(t)); } else { diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index e29f4748..d5429597 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h @@ -22,7 +22,7 @@ _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) #endif #define VRIDDEF(_) \ - _(MRM) + _(MRM) _(RIP) #define RIDENUM(name) RID_##name, @@ -31,6 +31,7 @@ enum { FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ RID_MAX, RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */ + RID_RIP = RID_MAX+1, /* Pseudo-id for RIP (x64 only). */ /* Calling conventions. */ RID_SP = RID_ESP, @@ -63,8 +64,10 @@ enum { /* -- Register sets ------------------------------------------------------- */ -/* Make use of all registers, except the stack pointer. */ -#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR)-RID2RSET(RID_ESP)) +/* Make use of all registers, except the stack pointer (and maybe DISPATCH). */ +#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) \ + - RID2RSET(RID_ESP) \ + - LJ_GC64*RID2RSET(RID_DISPATCH)) #define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) #define RSET_ALL (RSET_GPR|RSET_FPR) #define RSET_INIT RSET_ALL @@ -200,6 +203,7 @@ typedef struct { */ typedef enum { /* Fixed length opcodes. XI_* prefix. */ + XI_O16 = 0x66, XI_NOP = 0x90, XI_XCHGa = 0x90, XI_CALL = 0xe8, @@ -217,6 +221,7 @@ typedef enum { XI_PUSHi8 = 0x6a, XI_TESTb = 0x84, XI_TEST = 0x85, + XI_INT3 = 0xcc, XI_MOVmi = 0xc7, XI_GROUP5 = 0xff, @@ -243,6 +248,7 @@ typedef enum { XV_SHRX = XV_f20f38(f7), /* Variable-length opcodes. XO_* prefix. */ + XO_OR = XO_(0b), XO_MOV = XO_(8b), XO_MOVto = XO_(89), XO_MOVtow = XO_66(89), diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index 759e30ec..d38ac907 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc @@ -2401,8 +2401,7 @@ static void build_subroutines(BuildCtx *ctx) | movzx RCd, byte [rbp-8] // Reconstruct exit number. | mov RCH, byte [rbp-16] | mov [rbp-8], r15; mov [rbp-16], r14 - | // Caveat: DISPATCH is rbx. - | mov DISPATCH, [ebp] + | // DISPATCH is preserved on-trace in LJ_GC64 mode. | mov RAd, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number. | set_vmstate EXIT | mov [DISPATCH+DISPATCH_J(exitno)], RCd