x64/LJ_GC64: Enable JIT compilation.

Under LJ_GC64, RID_DISPATCH is removed from the pool of available general purpose
registers, and instead retains its role as a pointer to the dispatch table
thoughout JIT code. This guarantees that members of the global_State and
the jit_State can always be encoded in modrm. If the memory allocator is
kind, it also allows for various KGC and KPTR values to be encoded as
32-bit offsets from RID_DISPATCH. Likewise, when SSE instructions want to
use a KNUM as a memory operand, it often transpires that the address of
the KNUM's 64-bit payload can be expressed as 32-bit offset from
RID_DISPATCH.

In some cases the recording logic has been tweaked to encode constants
as relative to RID_DISPATCH instead of as absolute addresses. This is done
via calls to lj_ir_ggfload.

LJ_GC64 also introduces a new pseudo-register: RID_RIP. If the memory
allocator isn't kind enough to put things within a 32-bit range of the
dispatch table, it is sometimes kind enough to instead put things within a
32-bit range of the mcode pointer. Furthermore, for constants which we
want (or need) to be loaded via memory operands, the constant's payload can be
copied to the low part of an mcode region, at which point it is guaranteed
to be representable as a RIP-relative operand. Fused loads can result in
an mrm referencing RID_RIP. In such cases, the fusing is only valid for
the next emitted instruction - though as a special case, one asm_guardcc call is
permitted between the fusing and the instruction into which the fusion
result is inserted.

TValue detagging is notable under LJ_GC64. The basic code pattern is:
    mov r64, [addr]
    ror r64, 47
    cmp r16, itype
    jnz ->exit
    shr r64, 17
If BMI2 is available, mov/ror are fused to be a single rorx. If BMI2 isn't
available, and a type test isn't required, ror47 becomes shl17 (and the
cmp/jnz are dropped). The type test is interesting as it only considers 16
bits of tag, despite the TValues in question nominally consisting of 47
bits of pointer and 17 bits of tag. The 16 considered bits are sufficient
to verify that the TValue is a NaN (11 bits), is a QNaN (1 bit), and has
the correct itype (4 bits). The one unconsidered bit is the sign bit of
the NaN. LuaJIT operates under the assumption that all NaNs in the system
are either canonical NaNs (as generated by the FPU) or are NaN-packed
TValues. In both cases, the sign bit of the NaN is set, and therefore does
not need to be verified during detagging. The cmp instruction encodes the
itype as an imm8, thus avoiding the LCP stall which using an imm16 would
result in. False LCP stalls are still an issue, and could be trivially
worked-around by sometimes inserting an extra nop instruction, but this
could break loop realignment (as the realigned code might be one byte
larger or one byte smaller, and loop realignment operates under the
assumption that a sequence of emitted instructions always occupies the
same number of bytes, regardless of where it is emitted [1]).

[1] This assumption also results in rip-relative operands being even more
slippery. A-priori, the realigned code might be able to reach things it
previously couldn't, or conversely not reach things it previously could.
To prevent this from happening, checki32/mcpofs is paired with
checki32/mctopofs: if a given address is reachable with a 32-bit
displacement from both of these points, then it'll also be reachable with
a 32-bit displacement from a realigned mcp.
This commit is contained in:
Peter Cawley 2016-03-26 15:42:53 +00:00
parent 79021951e5
commit 7213658b33
10 changed files with 463 additions and 75 deletions

View File

@ -453,7 +453,7 @@
#endif
/* Disable or enable the JIT compiler. */
#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) || LJ_GC64
#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT)
#define LJ_HASJIT 0
#else
#define LJ_HASJIT 1

View File

@ -346,6 +346,12 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
#if LJ_64
} else if (ir->o == IR_KINT64) {
emit_loadu64(as, r, ir_kint64(ir)->u64);
#if LJ_GC64
} else if (ir->o == IR_KGC) {
emit_loadu64(as, r, (uintptr_t)ir_kgc(ir));
} else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
emit_loadu64(as, r, (uintptr_t)ir_kptr(ir));
#endif
#endif
} else {
lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
@ -1927,8 +1933,12 @@ static void asm_tail_link(ASMState *as)
if (bc_isret(bc_op(*retpc)))
pc = retpc;
}
#if LJ_GC64
emit_loadu64(as, RID_LPC, u64ptr(pc));
#else
ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH);
ra_allockreg(as, i32ptr(pc), RID_LPC);
#endif
mres = (int32_t)(snap->nslots - baseslot - LJ_FR2);
switch (bc_op(*pc)) {
case BC_CALLM: case BC_CALLMT:
@ -2289,6 +2299,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
as->curins = as->T->snap[0].ref;
asm_snap_prep(as); /* The GC check is a guard. */
asm_gc_check(as);
as->curins = as->stopins;
}
ra_evictk(as);
if (as->parent)

View File

@ -21,12 +21,14 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group)
}
/* Push the high byte of the exitno for each exit stub group. */
*mxp++ = XI_PUSHi8; *mxp++ = (MCode)((group*EXITSTUBS_PER_GROUP)>>8);
#if !LJ_GC64
/* Store DISPATCH at original stack slot 0. Account for the two push ops. */
*mxp++ = XI_MOVmi;
*mxp++ = MODRM(XM_OFS8, 0, RID_ESP);
*mxp++ = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
*mxp++ = 2*sizeof(void *);
*(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4;
#endif
/* Jump to exit handler which fills in the ExitState. */
*mxp++ = XI_JMP; mxp += 4;
*((int32_t *)(mxp-4)) = jmprel(mxp, (MCode *)(void *)lj_vm_exit_handler);
@ -62,10 +64,18 @@ static void asm_guardcc(ASMState *as, int cc)
target = p;
cc ^= 1;
if (as->realign) {
#if LJ_GC64
if (LJ_UNLIKELY(as->mrm.base == RID_RIP))
as->mrm.ofs += 2;
#endif
emit_sjcc(as, cc, target);
return;
}
}
#if LJ_GC64
if (LJ_UNLIKELY(as->mrm.base == RID_RIP))
as->mrm.ofs += 6;
#endif
emit_jcc(as, cc, target);
}
@ -79,6 +89,15 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
{
if (irref_isk(ref)) {
IRIns *ir = IR(ref);
#if LJ_GC64
if (ir->o == IR_KNULL || !irt_is64(ir->t)) {
*k = ir->i;
return 1;
} else if (checki32((int64_t)ir[1].tv.u64)) {
*k = (int32_t)ir[1].tv.u64;
return 1;
}
#else
if (ir->o != IR_KINT64) {
*k = ir->i;
return 1;
@ -86,6 +105,7 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
*k = (int32_t)ir_kint64(ir)->u64;
return 1;
}
#endif
}
return 0;
}
@ -185,9 +205,19 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow)
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv;
#if LJ_GC64
int64_t ofs = dispofs(as, &uv->tv);
if (checki32(ofs) && checki32(ofs+4)) {
as->mrm.ofs = (int32_t)ofs;
as->mrm.base = RID_DISPATCH;
as->mrm.idx = RID_NONE;
return;
}
#else
as->mrm.ofs = ptr2addr(&uv->tv);
as->mrm.base = as->mrm.idx = RID_NONE;
return;
#endif
}
break;
default:
@ -207,17 +237,38 @@ static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow)
lua_assert(ir->o == IR_FLOAD || ir->o == IR_FREF);
as->mrm.idx = RID_NONE;
if (ir->op1 == REF_NIL) {
#if LJ_GC64
as->mrm.ofs = (int32_t)ir->op2 - GG_OFS(dispatch);
as->mrm.base = RID_DISPATCH;
#else
as->mrm.ofs = (int32_t)ir->op2 + ptr2addr(J2GG(as->J));
as->mrm.base = RID_NONE;
#endif
return;
}
as->mrm.ofs = field_ofs[ir->op2];
if (irref_isk(ir->op1)) {
as->mrm.ofs += IR(ir->op1)->i;
IRIns *op1 = IR(ir->op1);
#if LJ_GC64
if (ir->op1 == REF_NIL) {
as->mrm.ofs -= GG_OFS(dispatch);
as->mrm.base = RID_DISPATCH;
return;
} else if (op1->o == IR_KPTR || op1->o == IR_KKPTR) {
intptr_t ofs = dispofs(as, ir_kptr(op1));
if (checki32(as->mrm.ofs + ofs)) {
as->mrm.ofs += (int32_t)ofs;
as->mrm.base = RID_DISPATCH;
return;
}
}
#else
as->mrm.ofs += op1->i;
as->mrm.base = RID_NONE;
} else {
as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow);
return;
#endif
}
as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow);
}
/* Fuse string reference into memory operand. */
@ -228,7 +279,7 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow)
as->mrm.base = as->mrm.idx = RID_NONE;
as->mrm.scale = XM_SCALE1;
as->mrm.ofs = sizeof(GCstr);
if (irref_isk(ir->op1)) {
if (!LJ_GC64 && irref_isk(ir->op1)) {
as->mrm.ofs += IR(ir->op1)->i;
} else {
Reg r = ra_alloc1(as, ir->op1, allow);
@ -260,10 +311,20 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow)
IRIns *ir = IR(ref);
as->mrm.idx = RID_NONE;
if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
#if LJ_GC64
intptr_t ofs = dispofs(as, ir_kptr(ir));
if (checki32(ofs)) {
as->mrm.ofs = (int32_t)ofs;
as->mrm.base = RID_DISPATCH;
return;
}
} if (0) {
#else
as->mrm.ofs = ir->i;
as->mrm.base = RID_NONE;
} else if (ir->o == IR_STRREF) {
asm_fusestrref(as, ir, allow);
#endif
} else {
as->mrm.ofs = 0;
if (canfuse(as, ir) && ir->o == IR_ADD && ra_noreg(ir->r)) {
@ -310,8 +371,31 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow)
static Reg asm_fuseloadk64(ASMState *as, IRIns *ir)
{
const uint64_t *k = &ir[1].tv.u64;
as->mrm.ofs = ptr2addr(k);
as->mrm.base = RID_NONE;
if (!LJ_GC64 || checki32((intptr_t)k)) {
as->mrm.ofs = ptr2addr(k);
as->mrm.base = RID_NONE;
#if LJ_GC64
} else if (checki32(dispofs(as, k))) {
as->mrm.ofs = (int32_t)dispofs(as, k);
as->mrm.base = RID_DISPATCH;
} else if (checki32(mcpofs(as, k)) && checki32(mcpofs(as, k+1))
&& checki32(mctopofs(as, k)) && checki32(mctopofs(as, k+1))) {
as->mrm.ofs = (int32_t)mcpofs(as, k);
as->mrm.base = RID_RIP;
} else {
if (ir->i) {
lua_assert(*k == *(uint64_t*)(as->mctop - ir->i));
} else {
while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3;
*(uint64_t*)as->mcbot = *k;
ir->i = (int32_t)(as->mctop - as->mcbot);
as->mcbot += 8;
as->mclim = as->mcbot + MCLIM_REDZONE;
}
as->mrm.ofs = (int32_t)mcpofs(as, as->mctop - ir->i);
as->mrm.base = RID_RIP;
#endif
}
as->mrm.idx = RID_NONE;
return RID_MRM;
}
@ -346,9 +430,11 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR;
if (ir->o == IR_SLOAD) {
if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) &&
noconflict(as, ref, IR_RETF, 0)) {
noconflict(as, ref, IR_RETF, 0) &&
!(LJ_GC64 && irt_isaddr(ir->t))) {
int32_t op1ofs = 8*((int32_t)ir->op1-1-LJ_FR2);
as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow);
as->mrm.ofs = 8*((int32_t)ir->op1-1) + (!LJ_FR2&&(ir->op2&IRSLOAD_FRAME)?4:0);
as->mrm.ofs = op1ofs + (!LJ_FR2&&(ir->op2&IRSLOAD_FRAME)?4:0);
as->mrm.idx = RID_NONE;
return RID_MRM;
}
@ -360,7 +446,8 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
return RID_MRM;
}
} else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) {
if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0)) {
if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0) &&
!(LJ_GC64 && irt_isaddr(ir->t))) {
asm_fuseahuref(as, ir->op1, xallow);
return RID_MRM;
}
@ -373,7 +460,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
asm_fusexref(as, ir->op1, xallow);
return RID_MRM;
}
} else if (ir->o == IR_VLOAD) {
} else if (ir->o == IR_VLOAD && !(LJ_GC64 && irt_isaddr(ir->t))) {
asm_fuseahuref(as, ir->op1, xallow);
return RID_MRM;
}
@ -501,6 +588,13 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
if (ir->o == IR_KINT64)
emit_loadu64(as, r, ir_kint64(ir)->u64);
else
#if LJ_GC64
if (ir->o == IR_KGC)
emit_loadu64(as, r, (uintptr_t)ir_kgc(ir));
else if (ir->o == IR_KPTR || ir->o == IR_KKPTR)
emit_loadu64(as, r, (uintptr_t)ir_kptr(ir));
else
#endif
#endif
emit_loadi(as, r, ir->i);
} else {
@ -667,7 +761,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
emit_addptr(as, base, -8*delta);
asm_guardcc(as, CC_NE);
#if LJ_FR2
emit_rmro(as, XO_CMP, rpc, base, -8);
emit_rmro(as, XO_CMP, rpc|REX_GC64, base, -8);
emit_loadu64(as, rpc, u64ptr(pc));
#else
emit_gmroi(as, XG_ARITHi(XOg_CMP), base, -4, ptr2addr(pc));
@ -695,8 +789,9 @@ static void asm_tobit(ASMState *as, IRIns *ir)
Reg tmp = ra_noreg(IR(ir->op1)->r) ?
ra_alloc1(as, ir->op1, RSET_FPR) :
ra_scratch(as, RSET_FPR);
Reg right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp));
Reg right;
emit_rr(as, XO_MOVDto, tmp, dest);
right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp));
emit_mrm(as, XO_ADDSD, tmp, right);
ra_left(as, tmp, ir->op1);
}
@ -769,13 +864,12 @@ static void asm_conv(ASMState *as, IRIns *ir)
emit_rr(as, op, dest|REX_64, tmp);
ra_left(as, tmp, lref);
} else {
Reg left = asm_fuseload(as, lref, RSET_FPR);
if (LJ_64 && irt_isu32(ir->t))
emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */
emit_mrm(as, op,
dest|((LJ_64 &&
(irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0),
left);
asm_fuseload(as, lref, RSET_FPR));
}
}
} else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
@ -953,6 +1047,23 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
emit_rmro(as, XO_LEA, dest|REX_64, RID_ESP, ra_spill(as, ir));
} else {
/* Otherwise use g->tmptv to hold the TValue. */
#if LJ_GC64
if (irref_isk(ref)) {
TValue k;
lj_ir_kvalue(as->J->L, &k, ir);
emit_movmroi(as, dest, 4, k.u32.hi);
emit_movmroi(as, dest, 0, k.u32.lo);
} else {
Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
if (irt_is64(ir->t)) {
emit_u32(as, irt_toitype(ir->t) << 15);
emit_rmro(as, XO_ARITHi, XOg_OR, dest, 4);
} else {
emit_movmroi(as, dest, 4, (irt_toitype(ir->t) << 15) | 0x7fff);
}
emit_movtomro(as, REX_64IR(ir, src), dest, 0);
}
#else
if (!irref_isk(ref)) {
Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
emit_movtomro(as, REX_64IR(ir, src), dest, 0);
@ -961,6 +1072,7 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
}
if (!(LJ_64 && irt_islightud(ir->t)))
emit_movmroi(as, dest, 4, irt_toitype(ir->t));
#endif
emit_loada(as, dest, &J2G(as->J)->tmptv);
}
}
@ -970,9 +1082,9 @@ static void asm_aref(ASMState *as, IRIns *ir)
Reg dest = ra_dest(as, ir, RSET_GPR);
asm_fusearef(as, ir, RSET_GPR);
if (!(as->mrm.idx == RID_NONE && as->mrm.ofs == 0))
emit_mrm(as, XO_LEA, dest, RID_MRM);
emit_mrm(as, XO_LEA, dest|REX_GC64, RID_MRM);
else if (as->mrm.base != dest)
emit_rr(as, XO_MOV, dest, as->mrm.base);
emit_rr(as, XO_MOV, dest|REX_GC64, as->mrm.base);
}
/* Inlined hash lookup. Specialized for key type and for const keys.
@ -999,7 +1111,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
if (!isk) {
rset_clear(allow, tab);
key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow);
if (!irt_isstr(kt))
if (LJ_GC64 || !irt_isstr(kt))
tmp = ra_scratch(as, rset_exclude(allow, key));
}
@ -1012,8 +1124,8 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
/* Follow hash chain until the end. */
l_loop = emit_sjcc_label(as, CC_NZ);
emit_rr(as, XO_TEST, dest, dest);
emit_rmro(as, XO_MOV, dest, dest, offsetof(Node, next));
emit_rr(as, XO_TEST, dest|REX_GC64, dest);
emit_rmro(as, XO_MOV, dest|REX_GC64, dest, offsetof(Node, next));
l_next = emit_label(as);
/* Type and value comparison. */
@ -1034,7 +1146,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n));
emit_sjcc(as, CC_AE, l_next);
/* The type check avoids NaN penalties and complaints from Valgrind. */
#if LJ_64
#if LJ_64 && !LJ_GC64
emit_u32(as, LJ_TISNUM);
emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it));
#else
@ -1042,10 +1154,27 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it));
#endif
}
#if LJ_64
#if LJ_64 && !LJ_GC64
} else if (irt_islightud(kt)) {
emit_rmro(as, XO_CMP, key|REX_64, dest, offsetof(Node, key.u64));
#endif
#elif LJ_GC64
} else if (irt_isaddr(kt)) {
if (isk) {
TValue k;
k.u64 = ((uint64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64;
emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.lo),
k.u32.lo);
emit_sjcc(as, CC_NE, l_next);
emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.hi),
k.u32.hi);
} else {
emit_rmro(as, XO_CMP, tmp|REX_64, dest, offsetof(Node, key.u64));
}
} else {
lua_assert(irt_ispri(kt) && !irt_isnil(kt));
emit_u32(as, (irt_toitype(kt)<<15)|0x7fff);
emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it));
#else
} else {
if (!irt_ispri(kt)) {
lua_assert(irt_isaddr(kt));
@ -1059,16 +1188,23 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
lua_assert(!irt_isnil(kt));
emit_i8(as, irt_toitype(kt));
emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it));
#endif
}
emit_sfixup(as, l_loop);
checkmclim(as);
#if LJ_GC64
if (!isk && irt_isaddr(kt)) {
emit_rr(as, XO_OR, tmp|REX_64, key);
emit_loadu64(as, tmp, (uint64_t)irt_toitype(kt) << 47);
}
#endif
/* Load main position relative to tab->node into dest. */
khash = isk ? ir_khash(irkey) : 1;
if (khash == 0) {
emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, node));
emit_rmro(as, XO_MOV, dest|REX_GC64, tab, offsetof(GCtab, node));
} else {
emit_rmro(as, XO_ARITH(XOg_ADD), dest, tab, offsetof(GCtab, node));
emit_rmro(as, XO_ARITH(XOg_ADD), dest|REX_GC64, tab, offsetof(GCtab,node));
if ((as->flags & JIT_F_PREFER_IMUL)) {
emit_i8(as, sizeof(Node));
emit_rr(as, XO_IMULi8, dest, dest);
@ -1123,11 +1259,11 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
if (ra_hasreg(dest)) {
if (ofs != 0) {
if (dest == node && !(as->flags & JIT_F_LEA_AGU))
emit_gri(as, XG_ARITHi(XOg_ADD), dest, ofs);
emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, ofs);
else
emit_rmro(as, XO_LEA, dest, node, ofs);
emit_rmro(as, XO_LEA, dest|REX_GC64, node, ofs);
} else if (dest != node) {
emit_rr(as, XO_MOV, dest, node);
emit_rr(as, XO_MOV, dest|REX_GC64, node);
}
}
asm_guardcc(as, CC_NE);
@ -1139,13 +1275,24 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
lua_assert(irt_isnum(irkey->t) || irt_isgcv(irkey->t));
/* Assumes -0.0 is already canonicalized to +0.0. */
emit_loadu64(as, key, irt_isnum(irkey->t) ? ir_knum(irkey)->u64 :
#if LJ_GC64
((uint64_t)irt_toitype(irkey->t) << 47) |
(uint64_t)ir_kgc(irkey));
#else
((uint64_t)irt_toitype(irkey->t) << 32) |
(uint64_t)(uint32_t)ptr2addr(ir_kgc(irkey)));
#endif
} else {
lua_assert(!irt_isnil(irkey->t));
#if LJ_GC64
emit_i32(as, (irt_toitype(irkey->t)<<15)|0x7fff);
emit_rmro(as, XO_ARITHi, XOg_CMP, node,
ofs + (int32_t)offsetof(Node, key.it));
#else
emit_i8(as, irt_toitype(irkey->t));
emit_rmro(as, XO_ARITHi8, XOg_CMP, node,
ofs + (int32_t)offsetof(Node, key.it));
#endif
}
#else
l_exit = emit_label(as);
@ -1181,20 +1328,20 @@ static void asm_uref(ASMState *as, IRIns *ir)
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_rma(as, XO_MOV, dest, v);
emit_rma(as, XO_MOV, dest|REX_GC64, v);
} else {
Reg uv = ra_scratch(as, RSET_GPR);
Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
if (ir->o == IR_UREFC) {
emit_rmro(as, XO_LEA, dest, uv, offsetof(GCupval, tv));
emit_rmro(as, XO_LEA, dest|REX_GC64, uv, offsetof(GCupval, tv));
asm_guardcc(as, CC_NE);
emit_i8(as, 1);
emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed));
} else {
emit_rmro(as, XO_MOV, dest, uv, offsetof(GCupval, v));
emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v));
}
emit_rmro(as, XO_MOV, uv, func,
(int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
emit_rmro(as, XO_MOV, uv|REX_GC64, func, (int32_t)offsetof(GCfuncL, uvptr)
+ (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
}
}
@ -1212,9 +1359,9 @@ static void asm_strref(ASMState *as, IRIns *ir)
if (as->mrm.base == RID_NONE)
emit_loadi(as, dest, as->mrm.ofs);
else if (as->mrm.base == dest && as->mrm.idx == RID_NONE)
emit_gri(as, XG_ARITHi(XOg_ADD), dest, as->mrm.ofs);
emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, as->mrm.ofs);
else
emit_mrm(as, XO_LEA, dest, RID_MRM);
emit_mrm(as, XO_LEA, dest|REX_GC64, RID_MRM);
}
/* -- Loads and stores ---------------------------------------------------- */
@ -1283,7 +1430,7 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
case IRT_I16: case IRT_U16: xo = XO_MOVtow; break;
case IRT_NUM: xo = XO_MOVSDto; break;
case IRT_FLOAT: xo = XO_MOVSSto; break;
#if LJ_64
#if LJ_64 && !LJ_GC64
case IRT_LIGHTUD: lua_assert(0); /* NYI: mask 64 bit lightuserdata. */
#endif
default:
@ -1315,7 +1462,7 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
#define asm_fstore(as, ir) asm_fxstore(as, ir)
#define asm_xstore(as, ir) asm_fxstore(as, ir)
#if LJ_64
#if LJ_64 && !LJ_GC64
static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
{
if (ra_used(ir) || typecheck) {
@ -1337,9 +1484,12 @@ static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
static void asm_ahuvload(ASMState *as, IRIns *ir)
{
#if LJ_GC64
Reg tmp = RID_NONE;
#endif
lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
(LJ_DUALNUM && irt_isint(ir->t)));
#if LJ_64
#if LJ_64 && !LJ_GC64
if (irt_islightud(ir->t)) {
Reg dest = asm_load_lightud64(as, ir, 1);
if (ra_hasreg(dest)) {
@ -1353,20 +1503,59 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
Reg dest = ra_dest(as, ir, allow);
asm_fuseahuref(as, ir->op1, RSET_GPR);
#if LJ_GC64
if (irt_isaddr(ir->t)) {
emit_shifti(as, XOg_SHR|REX_64, dest, 17);
asm_guardcc(as, CC_NE);
emit_i8(as, irt_toitype(ir->t));
emit_rr(as, XO_ARITHi8, XOg_CMP, dest);
emit_i8(as, 0x66);
if ((as->flags & JIT_F_BMI2)) {
emit_i8(as, 47);
emit_mrm(as, XV_RORX|VEX_64, dest, RID_MRM);
} else {
emit_shifti(as, XOg_ROR|REX_64, dest, 47);
emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM);
}
return;
} else
#endif
emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM);
} else {
asm_fuseahuref(as, ir->op1, RSET_GPR);
RegSet gpr = RSET_GPR;
#if LJ_GC64
if (irt_isaddr(ir->t)) {
tmp = ra_scratch(as, RSET_GPR);
gpr = rset_exclude(gpr, tmp);
}
#endif
asm_fuseahuref(as, ir->op1, gpr);
}
/* Always do the type check, even if the load result is unused. */
as->mrm.ofs += 4;
asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE);
if (LJ_64 && irt_type(ir->t) >= IRT_NUM) {
lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t));
#if LJ_GC64
emit_u32(as, LJ_TISNUM << 15);
#else
emit_u32(as, LJ_TISNUM);
#endif
emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM);
#if LJ_GC64
} else if (irt_isaddr(ir->t)) {
emit_i8(as, irt_toitype(ir->t));
emit_mrm(as, XO_ARITHi8, XOg_CMP, tmp);
emit_shifti(as, XOg_SAR, tmp, 15);
emit_mrm(as, XO_MOV, tmp, RID_MRM);
} else {
emit_u32(as, (irt_toitype(ir->t) << 15) | 0x7fff);
emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM);
#else
} else {
emit_i8(as, irt_toitype(ir->t));
emit_mrm(as, XO_ARITHi8, XOg_CMP, RID_MRM);
#endif
}
}
@ -1378,11 +1567,22 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
Reg src = ra_alloc1(as, ir->op2, RSET_FPR);
asm_fuseahuref(as, ir->op1, RSET_GPR);
emit_mrm(as, XO_MOVSDto, src, RID_MRM);
#if LJ_64
#if LJ_64 && !LJ_GC64
} else if (irt_islightud(ir->t)) {
Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
asm_fuseahuref(as, ir->op1, rset_exclude(RSET_GPR, src));
emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM);
#endif
#if LJ_GC64
} else if (irref_isk(ir->op2)) {
TValue k;
lj_ir_kvalue(as->J->L, &k, IR(ir->op2));
asm_fuseahuref(as, ir->op1, RSET_GPR);
emit_u32(as, k.u32.lo);
emit_mrm(as, XO_MOVmi, 0, RID_MRM);
as->mrm.ofs += 4;
emit_u32(as, k.u32.hi);
emit_mrm(as, XO_MOVmi, 0, RID_MRM);
#endif
} else {
IRIns *irr = IR(ir->op2);
@ -1394,6 +1594,16 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
}
asm_fuseahuref(as, ir->op1, allow);
if (ra_hasreg(src)) {
#if LJ_GC64
if (!(LJ_DUALNUM && irt_isinteger(ir->t))) {
as->mrm.ofs += 4;
emit_u32(as, irt_toitype(ir->t) << 15);
emit_mrm(as, XO_ARITHi, XOg_OR, RID_MRM);
as->mrm.ofs -= 4;
emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM);
return;
}
#endif
emit_mrm(as, XO_MOVto, src, RID_MRM);
} else if (!irt_ispri(irr->t)) {
lua_assert(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t)));
@ -1401,7 +1611,12 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
emit_mrm(as, XO_MOVmi, 0, RID_MRM);
}
as->mrm.ofs += 4;
#if LJ_GC64
lua_assert(LJ_DUALNUM && irt_isinteger(ir->t));
emit_i32(as, LJ_TNUMX << 15);
#else
emit_i32(as, (int32_t)irt_toitype(ir->t));
#endif
emit_mrm(as, XO_MOVmi, 0, RID_MRM);
}
}
@ -1425,7 +1640,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
base = ra_alloc1(as, REF_BASE, RSET_GPR);
emit_rmro(as, XO_MOVSD, left, base, ofs);
t.irt = IRT_NUM; /* Continue with a regular number type check. */
#if LJ_64
#if LJ_64 && !LJ_GC64
} else if (irt_islightud(t)) {
Reg dest = asm_load_lightud64(as, ir, (ir->op2 & IRSLOAD_TYPECHECK));
if (ra_hasreg(dest)) {
@ -1443,6 +1658,28 @@ static void asm_sload(ASMState *as, IRIns *ir)
t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */
emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs);
} else {
#if LJ_GC64
if (irt_isaddr(t)) {
emit_shifti(as, XOg_SHR|REX_64, dest, 17);
if ((ir->op2 & IRSLOAD_TYPECHECK)) {
asm_guardcc(as, CC_NE);
emit_i8(as, irt_toitype(t));
emit_rr(as, XO_ARITHi8, XOg_CMP, dest);
emit_i8(as, 0x66);
}
if ((as->flags & JIT_F_BMI2)) {
emit_i8(as, 47);
emit_rmro(as, XV_RORX|VEX_64, dest, base, ofs);
} else {
if ((ir->op2 & IRSLOAD_TYPECHECK))
emit_shifti(as, XOg_ROR|REX_64, dest, 47);
else
emit_shifti(as, XOg_SHL|REX_64, dest, 17);
emit_rmro(as, XO_MOV, dest|REX_64, base, ofs);
}
return;
} else
#endif
emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs);
}
} else {
@ -1455,11 +1692,26 @@ static void asm_sload(ASMState *as, IRIns *ir)
asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE);
if (LJ_64 && irt_type(t) >= IRT_NUM) {
lua_assert(irt_isinteger(t) || irt_isnum(t));
#if LJ_GC64
emit_u32(as, LJ_TISNUM << 15);
#else
emit_u32(as, LJ_TISNUM);
#endif
emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4);
} else if (LJ_GC64 && irt_ispri(t)) {
emit_u32(as, (irt_toitype(t) << 15) | 0x7fff);
emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4);
} else {
#if LJ_GC64
Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, base));
emit_i8(as, irt_toitype(t));
emit_rr(as, XO_ARITHi8, XOg_CMP, tmp);
emit_shifti(as, XOg_SAR, tmp, 15);
emit_rmro(as, XO_MOV, tmp, base, ofs+4);
#else
emit_i8(as, irt_toitype(t));
emit_rmro(as, XO_ARITHi8, XOg_CMP, base, ofs+4);
#endif
}
}
}
@ -1553,7 +1805,7 @@ static void asm_tbar(ASMState *as, IRIns *ir)
Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, tab));
MCLabel l_end = emit_label(as);
emit_movtomro(as, tmp, tab, offsetof(GCtab, gclist));
emit_movtomro(as, tmp|REX_GC64, tab, offsetof(GCtab, gclist));
emit_setgl(as, tab, gc.grayagain);
emit_getgl(as, tmp, gc.grayagain);
emit_i8(as, ~LJ_GC_BLACK);
@ -2089,7 +2341,6 @@ static void asm_comp(ASMState *as, IRIns *ir)
cc ^= (VCC_PS|(5<<4)); /* A <-> B, AE <-> BE, PS <-> none */
}
left = ra_alloc1(as, lref, RSET_FPR);
right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left));
l_around = emit_label(as);
asm_guardcc(as, cc >> 4);
if (cc & VCC_P) { /* Extra CC_P branch required? */
@ -2106,6 +2357,7 @@ static void asm_comp(ASMState *as, IRIns *ir)
emit_jcc(as, CC_P, as->mcp);
}
}
right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left));
emit_mrm(as, XO_UCOMISD, left, right);
} else {
IRRef lref = ir->op1, rref = ir->op2;
@ -2382,13 +2634,18 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
emit_rmro(as, XO_MOV, r|REX_64, RID_ESP, 0);
else
ra_modified(as, r);
emit_gri(as, XG_ARITHi(XOg_CMP), r, (int32_t)(8*topslot));
emit_gri(as, XG_ARITHi(XOg_CMP), r|REX_GC64, (int32_t)(8*topslot));
if (ra_hasreg(pbase) && pbase != r)
emit_rr(as, XO_ARITH(XOg_SUB), r, pbase);
emit_rr(as, XO_ARITH(XOg_SUB), r|REX_GC64, pbase);
else
#if LJ_GC64
emit_rmro(as, XO_ARITH(XOg_SUB), r|REX_64, RID_DISPATCH,
(int32_t)dispofs(as, &J2G(as->J)->jit_base));
#else
emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE,
ptr2addr(&J2G(as->J)->jit_base));
emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack));
#endif
emit_rmro(as, XO_MOV, r|REX_GC64, r, offsetof(lua_State, maxstack));
emit_getgl(as, r, cur_L);
if (allow == RSET_EMPTY) /* Spill temp. register. */
emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0);
@ -2417,18 +2674,38 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
(LJ_DUALNUM && irt_isinteger(ir->t)));
if (!irref_isk(ref)) {
Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
#if LJ_GC64
if (irt_is64(ir->t)) {
emit_u32(as, irt_toitype(ir->t) << 15);
emit_rmro(as, XO_ARITHi, XOg_OR, RID_BASE, ofs+4);
} else if (LJ_DUALNUM && irt_isinteger(ir->t)) {
emit_movmroi(as, RID_BASE, ofs+4, LJ_TISNUM << 15);
} else {
emit_movmroi(as, RID_BASE, ofs+4, (irt_toitype(ir->t)<<15)|0x7fff);
}
#endif
emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs);
#if LJ_GC64
} else {
TValue k;
lj_ir_kvalue(as->J->L, &k, ir);
emit_movmroi(as, RID_BASE, ofs+4, k.u32.hi);
emit_movmroi(as, RID_BASE, ofs, k.u32.lo);
#else
} else if (!irt_ispri(ir->t)) {
emit_movmroi(as, RID_BASE, ofs, ir->i);
#endif
}
if ((sn & (SNAP_CONT|SNAP_FRAME))) {
#if !LJ_FR2
if (s != 0) /* Do not overwrite link to previous frame. */
emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--));
#endif
#if !LJ_GC64
} else {
if (!(LJ_64 && irt_islightud(ir->t)))
emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t));
#endif
}
}
checkmclim(as);
@ -2454,11 +2731,15 @@ static void asm_gc_check(ASMState *as)
args[1] = ASMREF_TMP2; /* MSize steps */
asm_gencall(as, ci, args);
tmp = ra_releasetmp(as, ASMREF_TMP1);
#if LJ_GC64
emit_rmro(as, XO_LEA, tmp|REX_64, RID_DISPATCH, GG_DISP2G);
#else
emit_loada(as, tmp, J2G(as->J));
#endif
emit_loadi(as, ra_releasetmp(as, ASMREF_TMP2), as->gcsteps);
/* Jump around GC step if GC total < GC threshold. */
emit_sjcc(as, CC_B, l_end);
emit_opgl(as, XO_ARITH(XOg_CMP), tmp, gc.threshold);
emit_opgl(as, XO_ARITH(XOg_CMP), tmp|REX_GC64, gc.threshold);
emit_getgl(as, tmp, gc.total);
as->gcsteps = 0;
checkmclim(as);
@ -2523,7 +2804,7 @@ static void asm_head_root_base(ASMState *as)
if (rset_test(as->modset, r) || irt_ismarked(ir->t))
ir->r = RID_INIT; /* No inheritance for modified BASE register. */
if (r != RID_BASE)
emit_rr(as, XO_MOV, r, RID_BASE);
emit_rr(as, XO_MOV, r|REX_GC64, RID_BASE);
}
}
@ -2540,7 +2821,7 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
rset_clear(allow, r); /* Mark same BASE register as coalesced. */
} else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
rset_clear(allow, irp->r);
emit_rr(as, XO_MOV, r, irp->r); /* Move from coalesced parent reg. */
emit_rr(as, XO_MOV, r|REX_GC64, irp->r); /* Move from coalesced parent reg. */
} else {
emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */
}
@ -2753,12 +3034,16 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
MSize len = T->szmcode;
MCode *px = exitstub_addr(J, exitno) - 6;
MCode *pe = p+len-6;
uint32_t stateaddr = u32ptr(&J2G(J)->vmstate);
#if LJ_GC64
uint32_t statei = (uint32_t)(GG_OFS(g.vmstate) - GG_OFS(dispatch));
#else
uint32_t statei = u32ptr(&J2G(J)->vmstate);
#endif
if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px)
*(int32_t *)(p+len-4) = jmprel(p+len, target);
/* Do not patch parent exit for a stack check. Skip beyond vmstate update. */
for (; p < pe; p += asm_x86_inslen(p))
if (*(uint32_t *)(p+(LJ_64 ? 3 : 2)) == stateaddr && p[0] == XI_MOVmi)
if (*(uint32_t*)(p+2+(LJ_64!=LJ_GC64)) == statei && p[0] == XI_MOVmi)
break;
lua_assert(p < pe);
for (; p < pe; p += asm_x86_inslen(p))

View File

@ -20,6 +20,11 @@
#define REX_64 0
#define VEX_64 0
#endif
#if LJ_GC64
#define REX_GC64 REX_64
#else
#define REX_GC64 0
#endif
#define emit_i8(as, i) (*--as->mcp = (MCode)(i))
#define emit_i32(as, i) (*(int32_t *)(as->mcp-4) = (i), as->mcp -= 4)
@ -94,25 +99,19 @@ static int32_t ptr2addr(const void *p)
#define ptr2addr(p) (i32ptr((p)))
#endif
/* op r, [addr] */
static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr)
{
MCode *p = as->mcp;
*(int32_t *)(p-4) = ptr2addr(addr);
#if LJ_64
p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5);
#else
as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4);
#endif
}
/* op r, [base+ofs] */
static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs)
{
MCode *p = as->mcp;
x86Mode mode;
if (ra_hasreg(rb)) {
#if LJ_GC64
if (rb == RID_RIP) {
mode = XM_OFS0;
p -= 4;
*(int32_t *)p = ofs;
} else
#endif
if (ofs == 0 && (rb&7) != RID_EBP) {
mode = XM_OFS0;
} else if (checki8(ofs)) {
@ -210,6 +209,13 @@ static void emit_mrm(ASMState *as, x86Op xo, Reg rr, Reg rb)
#if LJ_64
*--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
rb = RID_ESP;
#endif
#if LJ_GC64
} else if (rb == RID_RIP) {
lua_assert(as->mrm.idx == RID_NONE);
mode = XM_OFS0;
p -= 4;
*(int32_t *)p = as->mrm.ofs;
#endif
} else {
if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) {
@ -264,8 +270,8 @@ static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i)
/* Get/set global_State fields. */
#define emit_opgl(as, xo, r, field) \
emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field)
#define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r), field)
#define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r), field)
#define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r)|REX_GC64, field)
#define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r)|REX_GC64, field)
#define emit_setvmstate(as, i) \
(emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, vmstate))
@ -288,9 +294,21 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i)
}
}
#if LJ_GC64
#define dispofs(as, k) \
((intptr_t)((uintptr_t)(k) - (uintptr_t)J2GG(as->J)->dispatch))
#define mcpofs(as, k) \
((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mcp))
#define mctopofs(as, k) \
((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mctop))
/* mov r, addr */
#define emit_loada(as, r, addr) \
emit_loadu64(as, (r), (uintptr_t)(addr))
#else
/* mov r, addr */
#define emit_loada(as, r, addr) \
emit_loadi(as, (r), ptr2addr((addr)))
#endif
#if LJ_64
/* mov r, imm64 or shorter 32 bit extended load. */
@ -302,6 +320,12 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
MCode *p = as->mcp;
*(int32_t *)(p-4) = (int32_t)u64;
as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4);
#if LJ_GC64
} else if (checki32(dispofs(as, u64))) {
emit_rmro(as, XO_LEA, r|REX_64, RID_DISPATCH, (int32_t)dispofs(as, u64));
} else if (checki32(mcpofs(as, u64)) && checki32(mctopofs(as, u64))) {
emit_rmro(as, XO_LEA, r|REX_64, RID_RIP, (int32_t)mcpofs(as, u64));
#endif
} else { /* Full-size 64 bit load. */
MCode *p = as->mcp;
*(uint64_t *)(p-8) = u64;
@ -313,6 +337,31 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
}
#endif
/* op r, [addr] */
static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr)
{
#if LJ_GC64
if (checki32(dispofs(as, addr))) {
emit_rmro(as, xo, rr, RID_DISPATCH, (int32_t)dispofs(as, addr));
} else if (checki32(mcpofs(as, addr)) && checki32(mctopofs(as, addr))) {
emit_rmro(as, xo, rr, RID_RIP, (int32_t)mcpofs(as, addr));
} else if (!checki32((intptr_t)addr) && (xo == XO_MOV || xo == XO_MOVSD)) {
emit_rmro(as, xo, rr, rr, 0);
emit_loadu64(as, rr, (uintptr_t)addr);
} else
#endif
{
MCode *p = as->mcp;
*(int32_t *)(p-4) = ptr2addr(addr);
#if LJ_64
p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5);
#else
as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4);
#endif
}
}
/* Load 64-bit IR constant into register. */
static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
{
@ -328,8 +377,28 @@ static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
}
if (!*k) {
emit_rr(as, rset_test(RSET_FPR, r) ? XO_XORPS : XO_ARITH(XOg_XOR), r, r);
#if LJ_GC64
} else if (checki32((intptr_t)k) || checki32(dispofs(as, k)) ||
(checki32(mcpofs(as, k)) && checki32(mctopofs(as, k)))) {
emit_rma(as, xo, r64, k);
} else {
if (ir->i) {
lua_assert(*k == *(uint64_t*)(as->mctop - ir->i));
} else if (as->curins <= as->stopins && rset_test(RSET_GPR, r)) {
emit_loadu64(as, r, *k);
return;
} else {
while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3;
*(uint64_t*)as->mcbot = *k;
ir->i = (int32_t)(as->mctop - as->mcbot);
as->mcbot += 8;
as->mclim = as->mcbot + MCLIM_REDZONE;
}
emit_rmro(as, xo, r64, RID_RIP, (int32_t)mcpofs(as, as->mctop - ir->i));
#else
} else {
emit_rma(as, xo, r64, k);
#endif
}
}
@ -471,9 +540,9 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
{
if (ofs) {
if ((as->flags & JIT_F_LEA_AGU))
emit_rmro(as, XO_LEA, r, r, ofs);
emit_rmro(as, XO_LEA, r|REX_GC64, r, ofs);
else
emit_gri(as, XG_ARITHi(XOg_ADD), r, ofs);
emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs);
}
}

View File

@ -1114,8 +1114,12 @@ static TRef recff_io_fp(jit_State *J, TRef *udp, int32_t id)
{
TRef tr, ud, fp;
if (id) { /* io.func() */
#if LJ_GC64
ud = lj_ir_ggfload(J, IRT_UDATA, GG_OFS(g.gcroot[id]));
#else
tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]);
ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0);
#endif
} else { /* fp:method() */
ud = J->base[0];
if (!tref_isudata(ud))

View File

@ -412,7 +412,7 @@ static LJ_AINLINE IRType itype2irt(const TValue *tv)
static LJ_AINLINE uint32_t irt_toitype_(IRType t)
{
lua_assert(!LJ_64 || t != IRT_LIGHTUD);
lua_assert(!LJ_64 || LJ_GC64 || t != IRT_LIGHTUD);
if (LJ_DUALNUM && t > IRT_NUM) {
return LJ_TISNUM;
} else {

View File

@ -976,7 +976,12 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm)
}
/* The cdata metatable is treated as immutable. */
if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt;
#if LJ_GC64
ix->mt = mix.tab = lj_ir_ggfload(J, IRT_TAB,
GG_OFS(g.gcroot[GCROOT_BASEMT+itypemap(&ix->tabv)]));
#else
ix->mt = mix.tab = lj_ir_ktab(J, mt);
#endif
goto nocheck;
}
ix->mt = mt ? mix.tab : TREF_NIL;

View File

@ -630,7 +630,6 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
}
if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
rs = snap_renameref(T, snapno, ref, rs);
lua_assert(!LJ_GC64); /* TODO_GC64: handle 64 bit references. */
if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */
int32_t *sps = &ex->spill[regsp_spill(rs)];
if (irt_isinteger(t)) {
@ -639,9 +638,11 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
} else if (irt_isnum(t)) {
o->u64 = *(uint64_t *)sps;
#endif
} else if (LJ_64 && irt_islightud(t)) {
#if LJ_64 && !LJ_GC64
} else if (irt_islightud(t)) {
/* 64 bit lightuserdata which may escape already has the tag bits. */
o->u64 = *(uint64_t *)sps;
#endif
} else {
lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */
setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t));
@ -659,9 +660,11 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
} else if (irt_isnum(t)) {
setnumV(o, ex->fpr[r-RID_MIN_FPR]);
#endif
} else if (LJ_64 && irt_is64(t)) {
#if LJ_64 && !LJ_GC64
} else if (irt_is64(t)) {
/* 64 bit values that already have the tag bits. */
o->u64 = ex->gpr[r-RID_MIN_GPR];
#endif
} else if (irt_ispri(t)) {
setpriV(o, irt_toitype(t));
} else {

View File

@ -21,8 +21,13 @@
#define FPRDEF(_) \
_(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7)
#endif
#if LJ_GC64
#define VRIDDEF(_) \
_(MRM) _(BAD) _(BAD) _(BAD) _(BAD) _(RIP)
#else
#define VRIDDEF(_) \
_(MRM)
#endif
#define RIDENUM(name) RID_##name,
@ -31,6 +36,9 @@ enum {
FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
RID_MAX,
RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */
#if LJ_GC64
RID_RIP = 0x25, /* Pseudo-id for RIP. */
#endif
/* Calling conventions. */
RID_SP = RID_ESP,
@ -63,8 +71,10 @@ enum {
/* -- Register sets ------------------------------------------------------- */
/* Make use of all registers, except the stack pointer. */
#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR)-RID2RSET(RID_ESP))
/* Make use of all registers, except the stack pointer (and maybe DISPATCH). */
#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR)\
- RID2RSET(RID_ESP)\
- LJ_GC64*RID2RSET(RID_DISPATCH))
#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR))
#define RSET_ALL (RSET_GPR|RSET_FPR)
#define RSET_INIT RSET_ALL
@ -217,6 +227,7 @@ typedef enum {
XI_PUSHi8 = 0x6a,
XI_TESTb = 0x84,
XI_TEST = 0x85,
XI_INT3 = 0xcc,
XI_MOVmi = 0xc7,
XI_GROUP5 = 0xff,
@ -243,6 +254,7 @@ typedef enum {
XV_SHRX = XV_f20f38(f7),
/* Variable-length opcodes. XO_* prefix. */
XO_OR = XO_(0b),
XO_MOV = XO_(8b),
XO_MOVto = XO_(89),
XO_MOVtow = XO_66(89),

View File

@ -2402,7 +2402,6 @@ static void build_subroutines(BuildCtx *ctx)
| mov RCH, byte [rbp-16]
| mov [rbp-8], r15; mov [rbp-16], r14
| // Caveat: DISPATCH is rbx.
| mov DISPATCH, [ebp]
| mov RAd, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number.
| set_vmstate EXIT
| mov [DISPATCH+DISPATCH_J(exitno)], RCd