mirror of
https://github.com/LuaJIT/LuaJIT.git
synced 2025-02-08 23:44:08 +00:00
x64/LJ_GC64: Enable JIT compilation.
Under LJ_GC64, RID_DISPATCH is removed from the pool of available general purpose registers, and instead retains its role as a pointer to the dispatch table thoughout JIT code. This guarantees that members of the global_State and the jit_State can always be encoded in modrm. If the memory allocator is kind, it also allows for various KGC and KPTR values to be encoded as 32-bit offsets from RID_DISPATCH. Likewise, when SSE instructions want to use a KNUM as a memory operand, it often transpires that the address of the KNUM's 64-bit payload can be expressed as 32-bit offset from RID_DISPATCH. In some cases the recording logic has been tweaked to encode constants as relative to RID_DISPATCH instead of as absolute addresses. This is done via calls to lj_ir_ggfload. LJ_GC64 also introduces a new pseudo-register: RID_RIP. If the memory allocator isn't kind enough to put things within a 32-bit range of the dispatch table, it is sometimes kind enough to instead put things within a 32-bit range of the mcode pointer. Furthermore, for constants which we want (or need) to be loaded via memory operands, the constant's payload can be copied to the low part of an mcode region, at which point it is guaranteed to be representable as a RIP-relative operand. Fused loads can result in an mrm referencing RID_RIP. In such cases, the fusing is only valid for the next emitted instruction - though as a special case, one asm_guardcc call is permitted between the fusing and the instruction into which the fusion result is inserted. TValue detagging is notable under LJ_GC64. The basic code pattern is: mov r64, [addr] ror r64, 47 cmp r16, itype jnz ->exit shr r64, 17 If BMI2 is available, mov/ror are fused to be a single rorx. If BMI2 isn't available, and a type test isn't required, ror47 becomes shl17 (and the cmp/jnz are dropped). The type test is interesting as it only considers 16 bits of tag, despite the TValues in question nominally consisting of 47 bits of pointer and 17 bits of tag. The 16 considered bits are sufficient to verify that the TValue is a NaN (11 bits), is a QNaN (1 bit), and has the correct itype (4 bits). The one unconsidered bit is the sign bit of the NaN. LuaJIT operates under the assumption that all NaNs in the system are either canonical NaNs (as generated by the FPU) or are NaN-packed TValues. In both cases, the sign bit of the NaN is set, and therefore does not need to be verified during detagging. The cmp instruction encodes the itype as an imm8, thus avoiding the LCP stall which using an imm16 would result in. False LCP stalls are still an issue, and could be trivially worked-around by sometimes inserting an extra nop instruction, but this could break loop realignment (as the realigned code might be one byte larger or one byte smaller, and loop realignment operates under the assumption that a sequence of emitted instructions always occupies the same number of bytes, regardless of where it is emitted [1]). [1] This assumption also results in rip-relative operands being even more slippery. A-priori, the realigned code might be able to reach things it previously couldn't, or conversely not reach things it previously could. To prevent this from happening, checki32/mcpofs is paired with checki32/mctopofs: if a given address is reachable with a 32-bit displacement from both of these points, then it'll also be reachable with a 32-bit displacement from a realigned mcp.
This commit is contained in:
parent
79021951e5
commit
7213658b33
@ -453,7 +453,7 @@
|
||||
#endif
|
||||
|
||||
/* Disable or enable the JIT compiler. */
|
||||
#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) || LJ_GC64
|
||||
#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT)
|
||||
#define LJ_HASJIT 0
|
||||
#else
|
||||
#define LJ_HASJIT 1
|
||||
|
11
src/lj_asm.c
11
src/lj_asm.c
@ -346,6 +346,12 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
|
||||
#if LJ_64
|
||||
} else if (ir->o == IR_KINT64) {
|
||||
emit_loadu64(as, r, ir_kint64(ir)->u64);
|
||||
#if LJ_GC64
|
||||
} else if (ir->o == IR_KGC) {
|
||||
emit_loadu64(as, r, (uintptr_t)ir_kgc(ir));
|
||||
} else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
|
||||
emit_loadu64(as, r, (uintptr_t)ir_kptr(ir));
|
||||
#endif
|
||||
#endif
|
||||
} else {
|
||||
lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
|
||||
@ -1927,8 +1933,12 @@ static void asm_tail_link(ASMState *as)
|
||||
if (bc_isret(bc_op(*retpc)))
|
||||
pc = retpc;
|
||||
}
|
||||
#if LJ_GC64
|
||||
emit_loadu64(as, RID_LPC, u64ptr(pc));
|
||||
#else
|
||||
ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH);
|
||||
ra_allockreg(as, i32ptr(pc), RID_LPC);
|
||||
#endif
|
||||
mres = (int32_t)(snap->nslots - baseslot - LJ_FR2);
|
||||
switch (bc_op(*pc)) {
|
||||
case BC_CALLM: case BC_CALLMT:
|
||||
@ -2289,6 +2299,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
|
||||
as->curins = as->T->snap[0].ref;
|
||||
asm_snap_prep(as); /* The GC check is a guard. */
|
||||
asm_gc_check(as);
|
||||
as->curins = as->stopins;
|
||||
}
|
||||
ra_evictk(as);
|
||||
if (as->parent)
|
||||
|
385
src/lj_asm_x86.h
385
src/lj_asm_x86.h
@ -21,12 +21,14 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group)
|
||||
}
|
||||
/* Push the high byte of the exitno for each exit stub group. */
|
||||
*mxp++ = XI_PUSHi8; *mxp++ = (MCode)((group*EXITSTUBS_PER_GROUP)>>8);
|
||||
#if !LJ_GC64
|
||||
/* Store DISPATCH at original stack slot 0. Account for the two push ops. */
|
||||
*mxp++ = XI_MOVmi;
|
||||
*mxp++ = MODRM(XM_OFS8, 0, RID_ESP);
|
||||
*mxp++ = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
|
||||
*mxp++ = 2*sizeof(void *);
|
||||
*(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4;
|
||||
#endif
|
||||
/* Jump to exit handler which fills in the ExitState. */
|
||||
*mxp++ = XI_JMP; mxp += 4;
|
||||
*((int32_t *)(mxp-4)) = jmprel(mxp, (MCode *)(void *)lj_vm_exit_handler);
|
||||
@ -62,10 +64,18 @@ static void asm_guardcc(ASMState *as, int cc)
|
||||
target = p;
|
||||
cc ^= 1;
|
||||
if (as->realign) {
|
||||
#if LJ_GC64
|
||||
if (LJ_UNLIKELY(as->mrm.base == RID_RIP))
|
||||
as->mrm.ofs += 2;
|
||||
#endif
|
||||
emit_sjcc(as, cc, target);
|
||||
return;
|
||||
}
|
||||
}
|
||||
#if LJ_GC64
|
||||
if (LJ_UNLIKELY(as->mrm.base == RID_RIP))
|
||||
as->mrm.ofs += 6;
|
||||
#endif
|
||||
emit_jcc(as, cc, target);
|
||||
}
|
||||
|
||||
@ -79,6 +89,15 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
|
||||
{
|
||||
if (irref_isk(ref)) {
|
||||
IRIns *ir = IR(ref);
|
||||
#if LJ_GC64
|
||||
if (ir->o == IR_KNULL || !irt_is64(ir->t)) {
|
||||
*k = ir->i;
|
||||
return 1;
|
||||
} else if (checki32((int64_t)ir[1].tv.u64)) {
|
||||
*k = (int32_t)ir[1].tv.u64;
|
||||
return 1;
|
||||
}
|
||||
#else
|
||||
if (ir->o != IR_KINT64) {
|
||||
*k = ir->i;
|
||||
return 1;
|
||||
@ -86,6 +105,7 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
|
||||
*k = (int32_t)ir_kint64(ir)->u64;
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -185,9 +205,19 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow)
|
||||
if (irref_isk(ir->op1)) {
|
||||
GCfunc *fn = ir_kfunc(IR(ir->op1));
|
||||
GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv;
|
||||
#if LJ_GC64
|
||||
int64_t ofs = dispofs(as, &uv->tv);
|
||||
if (checki32(ofs) && checki32(ofs+4)) {
|
||||
as->mrm.ofs = (int32_t)ofs;
|
||||
as->mrm.base = RID_DISPATCH;
|
||||
as->mrm.idx = RID_NONE;
|
||||
return;
|
||||
}
|
||||
#else
|
||||
as->mrm.ofs = ptr2addr(&uv->tv);
|
||||
as->mrm.base = as->mrm.idx = RID_NONE;
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
default:
|
||||
@ -207,17 +237,38 @@ static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow)
|
||||
lua_assert(ir->o == IR_FLOAD || ir->o == IR_FREF);
|
||||
as->mrm.idx = RID_NONE;
|
||||
if (ir->op1 == REF_NIL) {
|
||||
#if LJ_GC64
|
||||
as->mrm.ofs = (int32_t)ir->op2 - GG_OFS(dispatch);
|
||||
as->mrm.base = RID_DISPATCH;
|
||||
#else
|
||||
as->mrm.ofs = (int32_t)ir->op2 + ptr2addr(J2GG(as->J));
|
||||
as->mrm.base = RID_NONE;
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
as->mrm.ofs = field_ofs[ir->op2];
|
||||
if (irref_isk(ir->op1)) {
|
||||
as->mrm.ofs += IR(ir->op1)->i;
|
||||
IRIns *op1 = IR(ir->op1);
|
||||
#if LJ_GC64
|
||||
if (ir->op1 == REF_NIL) {
|
||||
as->mrm.ofs -= GG_OFS(dispatch);
|
||||
as->mrm.base = RID_DISPATCH;
|
||||
return;
|
||||
} else if (op1->o == IR_KPTR || op1->o == IR_KKPTR) {
|
||||
intptr_t ofs = dispofs(as, ir_kptr(op1));
|
||||
if (checki32(as->mrm.ofs + ofs)) {
|
||||
as->mrm.ofs += (int32_t)ofs;
|
||||
as->mrm.base = RID_DISPATCH;
|
||||
return;
|
||||
}
|
||||
}
|
||||
#else
|
||||
as->mrm.ofs += op1->i;
|
||||
as->mrm.base = RID_NONE;
|
||||
} else {
|
||||
as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow);
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow);
|
||||
}
|
||||
|
||||
/* Fuse string reference into memory operand. */
|
||||
@ -228,7 +279,7 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow)
|
||||
as->mrm.base = as->mrm.idx = RID_NONE;
|
||||
as->mrm.scale = XM_SCALE1;
|
||||
as->mrm.ofs = sizeof(GCstr);
|
||||
if (irref_isk(ir->op1)) {
|
||||
if (!LJ_GC64 && irref_isk(ir->op1)) {
|
||||
as->mrm.ofs += IR(ir->op1)->i;
|
||||
} else {
|
||||
Reg r = ra_alloc1(as, ir->op1, allow);
|
||||
@ -260,10 +311,20 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow)
|
||||
IRIns *ir = IR(ref);
|
||||
as->mrm.idx = RID_NONE;
|
||||
if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
|
||||
#if LJ_GC64
|
||||
intptr_t ofs = dispofs(as, ir_kptr(ir));
|
||||
if (checki32(ofs)) {
|
||||
as->mrm.ofs = (int32_t)ofs;
|
||||
as->mrm.base = RID_DISPATCH;
|
||||
return;
|
||||
}
|
||||
} if (0) {
|
||||
#else
|
||||
as->mrm.ofs = ir->i;
|
||||
as->mrm.base = RID_NONE;
|
||||
} else if (ir->o == IR_STRREF) {
|
||||
asm_fusestrref(as, ir, allow);
|
||||
#endif
|
||||
} else {
|
||||
as->mrm.ofs = 0;
|
||||
if (canfuse(as, ir) && ir->o == IR_ADD && ra_noreg(ir->r)) {
|
||||
@ -310,8 +371,31 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow)
|
||||
static Reg asm_fuseloadk64(ASMState *as, IRIns *ir)
|
||||
{
|
||||
const uint64_t *k = &ir[1].tv.u64;
|
||||
as->mrm.ofs = ptr2addr(k);
|
||||
as->mrm.base = RID_NONE;
|
||||
if (!LJ_GC64 || checki32((intptr_t)k)) {
|
||||
as->mrm.ofs = ptr2addr(k);
|
||||
as->mrm.base = RID_NONE;
|
||||
#if LJ_GC64
|
||||
} else if (checki32(dispofs(as, k))) {
|
||||
as->mrm.ofs = (int32_t)dispofs(as, k);
|
||||
as->mrm.base = RID_DISPATCH;
|
||||
} else if (checki32(mcpofs(as, k)) && checki32(mcpofs(as, k+1))
|
||||
&& checki32(mctopofs(as, k)) && checki32(mctopofs(as, k+1))) {
|
||||
as->mrm.ofs = (int32_t)mcpofs(as, k);
|
||||
as->mrm.base = RID_RIP;
|
||||
} else {
|
||||
if (ir->i) {
|
||||
lua_assert(*k == *(uint64_t*)(as->mctop - ir->i));
|
||||
} else {
|
||||
while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3;
|
||||
*(uint64_t*)as->mcbot = *k;
|
||||
ir->i = (int32_t)(as->mctop - as->mcbot);
|
||||
as->mcbot += 8;
|
||||
as->mclim = as->mcbot + MCLIM_REDZONE;
|
||||
}
|
||||
as->mrm.ofs = (int32_t)mcpofs(as, as->mctop - ir->i);
|
||||
as->mrm.base = RID_RIP;
|
||||
#endif
|
||||
}
|
||||
as->mrm.idx = RID_NONE;
|
||||
return RID_MRM;
|
||||
}
|
||||
@ -346,9 +430,11 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
|
||||
RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR;
|
||||
if (ir->o == IR_SLOAD) {
|
||||
if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) &&
|
||||
noconflict(as, ref, IR_RETF, 0)) {
|
||||
noconflict(as, ref, IR_RETF, 0) &&
|
||||
!(LJ_GC64 && irt_isaddr(ir->t))) {
|
||||
int32_t op1ofs = 8*((int32_t)ir->op1-1-LJ_FR2);
|
||||
as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow);
|
||||
as->mrm.ofs = 8*((int32_t)ir->op1-1) + (!LJ_FR2&&(ir->op2&IRSLOAD_FRAME)?4:0);
|
||||
as->mrm.ofs = op1ofs + (!LJ_FR2&&(ir->op2&IRSLOAD_FRAME)?4:0);
|
||||
as->mrm.idx = RID_NONE;
|
||||
return RID_MRM;
|
||||
}
|
||||
@ -360,7 +446,8 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
|
||||
return RID_MRM;
|
||||
}
|
||||
} else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) {
|
||||
if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0)) {
|
||||
if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0) &&
|
||||
!(LJ_GC64 && irt_isaddr(ir->t))) {
|
||||
asm_fuseahuref(as, ir->op1, xallow);
|
||||
return RID_MRM;
|
||||
}
|
||||
@ -373,7 +460,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
|
||||
asm_fusexref(as, ir->op1, xallow);
|
||||
return RID_MRM;
|
||||
}
|
||||
} else if (ir->o == IR_VLOAD) {
|
||||
} else if (ir->o == IR_VLOAD && !(LJ_GC64 && irt_isaddr(ir->t))) {
|
||||
asm_fuseahuref(as, ir->op1, xallow);
|
||||
return RID_MRM;
|
||||
}
|
||||
@ -501,6 +588,13 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
|
||||
if (ir->o == IR_KINT64)
|
||||
emit_loadu64(as, r, ir_kint64(ir)->u64);
|
||||
else
|
||||
#if LJ_GC64
|
||||
if (ir->o == IR_KGC)
|
||||
emit_loadu64(as, r, (uintptr_t)ir_kgc(ir));
|
||||
else if (ir->o == IR_KPTR || ir->o == IR_KKPTR)
|
||||
emit_loadu64(as, r, (uintptr_t)ir_kptr(ir));
|
||||
else
|
||||
#endif
|
||||
#endif
|
||||
emit_loadi(as, r, ir->i);
|
||||
} else {
|
||||
@ -667,7 +761,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
|
||||
emit_addptr(as, base, -8*delta);
|
||||
asm_guardcc(as, CC_NE);
|
||||
#if LJ_FR2
|
||||
emit_rmro(as, XO_CMP, rpc, base, -8);
|
||||
emit_rmro(as, XO_CMP, rpc|REX_GC64, base, -8);
|
||||
emit_loadu64(as, rpc, u64ptr(pc));
|
||||
#else
|
||||
emit_gmroi(as, XG_ARITHi(XOg_CMP), base, -4, ptr2addr(pc));
|
||||
@ -695,8 +789,9 @@ static void asm_tobit(ASMState *as, IRIns *ir)
|
||||
Reg tmp = ra_noreg(IR(ir->op1)->r) ?
|
||||
ra_alloc1(as, ir->op1, RSET_FPR) :
|
||||
ra_scratch(as, RSET_FPR);
|
||||
Reg right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp));
|
||||
Reg right;
|
||||
emit_rr(as, XO_MOVDto, tmp, dest);
|
||||
right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp));
|
||||
emit_mrm(as, XO_ADDSD, tmp, right);
|
||||
ra_left(as, tmp, ir->op1);
|
||||
}
|
||||
@ -769,13 +864,12 @@ static void asm_conv(ASMState *as, IRIns *ir)
|
||||
emit_rr(as, op, dest|REX_64, tmp);
|
||||
ra_left(as, tmp, lref);
|
||||
} else {
|
||||
Reg left = asm_fuseload(as, lref, RSET_FPR);
|
||||
if (LJ_64 && irt_isu32(ir->t))
|
||||
emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */
|
||||
emit_mrm(as, op,
|
||||
dest|((LJ_64 &&
|
||||
(irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0),
|
||||
left);
|
||||
asm_fuseload(as, lref, RSET_FPR));
|
||||
}
|
||||
}
|
||||
} else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
|
||||
@ -953,6 +1047,23 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
|
||||
emit_rmro(as, XO_LEA, dest|REX_64, RID_ESP, ra_spill(as, ir));
|
||||
} else {
|
||||
/* Otherwise use g->tmptv to hold the TValue. */
|
||||
#if LJ_GC64
|
||||
if (irref_isk(ref)) {
|
||||
TValue k;
|
||||
lj_ir_kvalue(as->J->L, &k, ir);
|
||||
emit_movmroi(as, dest, 4, k.u32.hi);
|
||||
emit_movmroi(as, dest, 0, k.u32.lo);
|
||||
} else {
|
||||
Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
|
||||
if (irt_is64(ir->t)) {
|
||||
emit_u32(as, irt_toitype(ir->t) << 15);
|
||||
emit_rmro(as, XO_ARITHi, XOg_OR, dest, 4);
|
||||
} else {
|
||||
emit_movmroi(as, dest, 4, (irt_toitype(ir->t) << 15) | 0x7fff);
|
||||
}
|
||||
emit_movtomro(as, REX_64IR(ir, src), dest, 0);
|
||||
}
|
||||
#else
|
||||
if (!irref_isk(ref)) {
|
||||
Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
|
||||
emit_movtomro(as, REX_64IR(ir, src), dest, 0);
|
||||
@ -961,6 +1072,7 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
|
||||
}
|
||||
if (!(LJ_64 && irt_islightud(ir->t)))
|
||||
emit_movmroi(as, dest, 4, irt_toitype(ir->t));
|
||||
#endif
|
||||
emit_loada(as, dest, &J2G(as->J)->tmptv);
|
||||
}
|
||||
}
|
||||
@ -970,9 +1082,9 @@ static void asm_aref(ASMState *as, IRIns *ir)
|
||||
Reg dest = ra_dest(as, ir, RSET_GPR);
|
||||
asm_fusearef(as, ir, RSET_GPR);
|
||||
if (!(as->mrm.idx == RID_NONE && as->mrm.ofs == 0))
|
||||
emit_mrm(as, XO_LEA, dest, RID_MRM);
|
||||
emit_mrm(as, XO_LEA, dest|REX_GC64, RID_MRM);
|
||||
else if (as->mrm.base != dest)
|
||||
emit_rr(as, XO_MOV, dest, as->mrm.base);
|
||||
emit_rr(as, XO_MOV, dest|REX_GC64, as->mrm.base);
|
||||
}
|
||||
|
||||
/* Inlined hash lookup. Specialized for key type and for const keys.
|
||||
@ -999,7 +1111,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
|
||||
if (!isk) {
|
||||
rset_clear(allow, tab);
|
||||
key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow);
|
||||
if (!irt_isstr(kt))
|
||||
if (LJ_GC64 || !irt_isstr(kt))
|
||||
tmp = ra_scratch(as, rset_exclude(allow, key));
|
||||
}
|
||||
|
||||
@ -1012,8 +1124,8 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
|
||||
|
||||
/* Follow hash chain until the end. */
|
||||
l_loop = emit_sjcc_label(as, CC_NZ);
|
||||
emit_rr(as, XO_TEST, dest, dest);
|
||||
emit_rmro(as, XO_MOV, dest, dest, offsetof(Node, next));
|
||||
emit_rr(as, XO_TEST, dest|REX_GC64, dest);
|
||||
emit_rmro(as, XO_MOV, dest|REX_GC64, dest, offsetof(Node, next));
|
||||
l_next = emit_label(as);
|
||||
|
||||
/* Type and value comparison. */
|
||||
@ -1034,7 +1146,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
|
||||
emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n));
|
||||
emit_sjcc(as, CC_AE, l_next);
|
||||
/* The type check avoids NaN penalties and complaints from Valgrind. */
|
||||
#if LJ_64
|
||||
#if LJ_64 && !LJ_GC64
|
||||
emit_u32(as, LJ_TISNUM);
|
||||
emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it));
|
||||
#else
|
||||
@ -1042,10 +1154,27 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
|
||||
emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it));
|
||||
#endif
|
||||
}
|
||||
#if LJ_64
|
||||
#if LJ_64 && !LJ_GC64
|
||||
} else if (irt_islightud(kt)) {
|
||||
emit_rmro(as, XO_CMP, key|REX_64, dest, offsetof(Node, key.u64));
|
||||
#endif
|
||||
#elif LJ_GC64
|
||||
} else if (irt_isaddr(kt)) {
|
||||
if (isk) {
|
||||
TValue k;
|
||||
k.u64 = ((uint64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64;
|
||||
emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.lo),
|
||||
k.u32.lo);
|
||||
emit_sjcc(as, CC_NE, l_next);
|
||||
emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.hi),
|
||||
k.u32.hi);
|
||||
} else {
|
||||
emit_rmro(as, XO_CMP, tmp|REX_64, dest, offsetof(Node, key.u64));
|
||||
}
|
||||
} else {
|
||||
lua_assert(irt_ispri(kt) && !irt_isnil(kt));
|
||||
emit_u32(as, (irt_toitype(kt)<<15)|0x7fff);
|
||||
emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it));
|
||||
#else
|
||||
} else {
|
||||
if (!irt_ispri(kt)) {
|
||||
lua_assert(irt_isaddr(kt));
|
||||
@ -1059,16 +1188,23 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
|
||||
lua_assert(!irt_isnil(kt));
|
||||
emit_i8(as, irt_toitype(kt));
|
||||
emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it));
|
||||
#endif
|
||||
}
|
||||
emit_sfixup(as, l_loop);
|
||||
checkmclim(as);
|
||||
#if LJ_GC64
|
||||
if (!isk && irt_isaddr(kt)) {
|
||||
emit_rr(as, XO_OR, tmp|REX_64, key);
|
||||
emit_loadu64(as, tmp, (uint64_t)irt_toitype(kt) << 47);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Load main position relative to tab->node into dest. */
|
||||
khash = isk ? ir_khash(irkey) : 1;
|
||||
if (khash == 0) {
|
||||
emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, node));
|
||||
emit_rmro(as, XO_MOV, dest|REX_GC64, tab, offsetof(GCtab, node));
|
||||
} else {
|
||||
emit_rmro(as, XO_ARITH(XOg_ADD), dest, tab, offsetof(GCtab, node));
|
||||
emit_rmro(as, XO_ARITH(XOg_ADD), dest|REX_GC64, tab, offsetof(GCtab,node));
|
||||
if ((as->flags & JIT_F_PREFER_IMUL)) {
|
||||
emit_i8(as, sizeof(Node));
|
||||
emit_rr(as, XO_IMULi8, dest, dest);
|
||||
@ -1123,11 +1259,11 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
|
||||
if (ra_hasreg(dest)) {
|
||||
if (ofs != 0) {
|
||||
if (dest == node && !(as->flags & JIT_F_LEA_AGU))
|
||||
emit_gri(as, XG_ARITHi(XOg_ADD), dest, ofs);
|
||||
emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, ofs);
|
||||
else
|
||||
emit_rmro(as, XO_LEA, dest, node, ofs);
|
||||
emit_rmro(as, XO_LEA, dest|REX_GC64, node, ofs);
|
||||
} else if (dest != node) {
|
||||
emit_rr(as, XO_MOV, dest, node);
|
||||
emit_rr(as, XO_MOV, dest|REX_GC64, node);
|
||||
}
|
||||
}
|
||||
asm_guardcc(as, CC_NE);
|
||||
@ -1139,13 +1275,24 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
|
||||
lua_assert(irt_isnum(irkey->t) || irt_isgcv(irkey->t));
|
||||
/* Assumes -0.0 is already canonicalized to +0.0. */
|
||||
emit_loadu64(as, key, irt_isnum(irkey->t) ? ir_knum(irkey)->u64 :
|
||||
#if LJ_GC64
|
||||
((uint64_t)irt_toitype(irkey->t) << 47) |
|
||||
(uint64_t)ir_kgc(irkey));
|
||||
#else
|
||||
((uint64_t)irt_toitype(irkey->t) << 32) |
|
||||
(uint64_t)(uint32_t)ptr2addr(ir_kgc(irkey)));
|
||||
#endif
|
||||
} else {
|
||||
lua_assert(!irt_isnil(irkey->t));
|
||||
#if LJ_GC64
|
||||
emit_i32(as, (irt_toitype(irkey->t)<<15)|0x7fff);
|
||||
emit_rmro(as, XO_ARITHi, XOg_CMP, node,
|
||||
ofs + (int32_t)offsetof(Node, key.it));
|
||||
#else
|
||||
emit_i8(as, irt_toitype(irkey->t));
|
||||
emit_rmro(as, XO_ARITHi8, XOg_CMP, node,
|
||||
ofs + (int32_t)offsetof(Node, key.it));
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
l_exit = emit_label(as);
|
||||
@ -1181,20 +1328,20 @@ static void asm_uref(ASMState *as, IRIns *ir)
|
||||
if (irref_isk(ir->op1)) {
|
||||
GCfunc *fn = ir_kfunc(IR(ir->op1));
|
||||
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
|
||||
emit_rma(as, XO_MOV, dest, v);
|
||||
emit_rma(as, XO_MOV, dest|REX_GC64, v);
|
||||
} else {
|
||||
Reg uv = ra_scratch(as, RSET_GPR);
|
||||
Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
|
||||
if (ir->o == IR_UREFC) {
|
||||
emit_rmro(as, XO_LEA, dest, uv, offsetof(GCupval, tv));
|
||||
emit_rmro(as, XO_LEA, dest|REX_GC64, uv, offsetof(GCupval, tv));
|
||||
asm_guardcc(as, CC_NE);
|
||||
emit_i8(as, 1);
|
||||
emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed));
|
||||
} else {
|
||||
emit_rmro(as, XO_MOV, dest, uv, offsetof(GCupval, v));
|
||||
emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v));
|
||||
}
|
||||
emit_rmro(as, XO_MOV, uv, func,
|
||||
(int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
|
||||
emit_rmro(as, XO_MOV, uv|REX_GC64, func, (int32_t)offsetof(GCfuncL, uvptr)
|
||||
+ (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1212,9 +1359,9 @@ static void asm_strref(ASMState *as, IRIns *ir)
|
||||
if (as->mrm.base == RID_NONE)
|
||||
emit_loadi(as, dest, as->mrm.ofs);
|
||||
else if (as->mrm.base == dest && as->mrm.idx == RID_NONE)
|
||||
emit_gri(as, XG_ARITHi(XOg_ADD), dest, as->mrm.ofs);
|
||||
emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, as->mrm.ofs);
|
||||
else
|
||||
emit_mrm(as, XO_LEA, dest, RID_MRM);
|
||||
emit_mrm(as, XO_LEA, dest|REX_GC64, RID_MRM);
|
||||
}
|
||||
|
||||
/* -- Loads and stores ---------------------------------------------------- */
|
||||
@ -1283,7 +1430,7 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
|
||||
case IRT_I16: case IRT_U16: xo = XO_MOVtow; break;
|
||||
case IRT_NUM: xo = XO_MOVSDto; break;
|
||||
case IRT_FLOAT: xo = XO_MOVSSto; break;
|
||||
#if LJ_64
|
||||
#if LJ_64 && !LJ_GC64
|
||||
case IRT_LIGHTUD: lua_assert(0); /* NYI: mask 64 bit lightuserdata. */
|
||||
#endif
|
||||
default:
|
||||
@ -1315,7 +1462,7 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
|
||||
#define asm_fstore(as, ir) asm_fxstore(as, ir)
|
||||
#define asm_xstore(as, ir) asm_fxstore(as, ir)
|
||||
|
||||
#if LJ_64
|
||||
#if LJ_64 && !LJ_GC64
|
||||
static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
|
||||
{
|
||||
if (ra_used(ir) || typecheck) {
|
||||
@ -1337,9 +1484,12 @@ static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
|
||||
|
||||
static void asm_ahuvload(ASMState *as, IRIns *ir)
|
||||
{
|
||||
#if LJ_GC64
|
||||
Reg tmp = RID_NONE;
|
||||
#endif
|
||||
lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
|
||||
(LJ_DUALNUM && irt_isint(ir->t)));
|
||||
#if LJ_64
|
||||
#if LJ_64 && !LJ_GC64
|
||||
if (irt_islightud(ir->t)) {
|
||||
Reg dest = asm_load_lightud64(as, ir, 1);
|
||||
if (ra_hasreg(dest)) {
|
||||
@ -1353,20 +1503,59 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
|
||||
RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
|
||||
Reg dest = ra_dest(as, ir, allow);
|
||||
asm_fuseahuref(as, ir->op1, RSET_GPR);
|
||||
#if LJ_GC64
|
||||
if (irt_isaddr(ir->t)) {
|
||||
emit_shifti(as, XOg_SHR|REX_64, dest, 17);
|
||||
asm_guardcc(as, CC_NE);
|
||||
emit_i8(as, irt_toitype(ir->t));
|
||||
emit_rr(as, XO_ARITHi8, XOg_CMP, dest);
|
||||
emit_i8(as, 0x66);
|
||||
if ((as->flags & JIT_F_BMI2)) {
|
||||
emit_i8(as, 47);
|
||||
emit_mrm(as, XV_RORX|VEX_64, dest, RID_MRM);
|
||||
} else {
|
||||
emit_shifti(as, XOg_ROR|REX_64, dest, 47);
|
||||
emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM);
|
||||
}
|
||||
return;
|
||||
} else
|
||||
#endif
|
||||
emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM);
|
||||
} else {
|
||||
asm_fuseahuref(as, ir->op1, RSET_GPR);
|
||||
RegSet gpr = RSET_GPR;
|
||||
#if LJ_GC64
|
||||
if (irt_isaddr(ir->t)) {
|
||||
tmp = ra_scratch(as, RSET_GPR);
|
||||
gpr = rset_exclude(gpr, tmp);
|
||||
}
|
||||
#endif
|
||||
asm_fuseahuref(as, ir->op1, gpr);
|
||||
}
|
||||
/* Always do the type check, even if the load result is unused. */
|
||||
as->mrm.ofs += 4;
|
||||
asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE);
|
||||
if (LJ_64 && irt_type(ir->t) >= IRT_NUM) {
|
||||
lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t));
|
||||
#if LJ_GC64
|
||||
emit_u32(as, LJ_TISNUM << 15);
|
||||
#else
|
||||
emit_u32(as, LJ_TISNUM);
|
||||
#endif
|
||||
emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM);
|
||||
#if LJ_GC64
|
||||
} else if (irt_isaddr(ir->t)) {
|
||||
emit_i8(as, irt_toitype(ir->t));
|
||||
emit_mrm(as, XO_ARITHi8, XOg_CMP, tmp);
|
||||
emit_shifti(as, XOg_SAR, tmp, 15);
|
||||
emit_mrm(as, XO_MOV, tmp, RID_MRM);
|
||||
} else {
|
||||
emit_u32(as, (irt_toitype(ir->t) << 15) | 0x7fff);
|
||||
emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM);
|
||||
#else
|
||||
} else {
|
||||
emit_i8(as, irt_toitype(ir->t));
|
||||
emit_mrm(as, XO_ARITHi8, XOg_CMP, RID_MRM);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@ -1378,11 +1567,22 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
|
||||
Reg src = ra_alloc1(as, ir->op2, RSET_FPR);
|
||||
asm_fuseahuref(as, ir->op1, RSET_GPR);
|
||||
emit_mrm(as, XO_MOVSDto, src, RID_MRM);
|
||||
#if LJ_64
|
||||
#if LJ_64 && !LJ_GC64
|
||||
} else if (irt_islightud(ir->t)) {
|
||||
Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
|
||||
asm_fuseahuref(as, ir->op1, rset_exclude(RSET_GPR, src));
|
||||
emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM);
|
||||
#endif
|
||||
#if LJ_GC64
|
||||
} else if (irref_isk(ir->op2)) {
|
||||
TValue k;
|
||||
lj_ir_kvalue(as->J->L, &k, IR(ir->op2));
|
||||
asm_fuseahuref(as, ir->op1, RSET_GPR);
|
||||
emit_u32(as, k.u32.lo);
|
||||
emit_mrm(as, XO_MOVmi, 0, RID_MRM);
|
||||
as->mrm.ofs += 4;
|
||||
emit_u32(as, k.u32.hi);
|
||||
emit_mrm(as, XO_MOVmi, 0, RID_MRM);
|
||||
#endif
|
||||
} else {
|
||||
IRIns *irr = IR(ir->op2);
|
||||
@ -1394,6 +1594,16 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
|
||||
}
|
||||
asm_fuseahuref(as, ir->op1, allow);
|
||||
if (ra_hasreg(src)) {
|
||||
#if LJ_GC64
|
||||
if (!(LJ_DUALNUM && irt_isinteger(ir->t))) {
|
||||
as->mrm.ofs += 4;
|
||||
emit_u32(as, irt_toitype(ir->t) << 15);
|
||||
emit_mrm(as, XO_ARITHi, XOg_OR, RID_MRM);
|
||||
as->mrm.ofs -= 4;
|
||||
emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
emit_mrm(as, XO_MOVto, src, RID_MRM);
|
||||
} else if (!irt_ispri(irr->t)) {
|
||||
lua_assert(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t)));
|
||||
@ -1401,7 +1611,12 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
|
||||
emit_mrm(as, XO_MOVmi, 0, RID_MRM);
|
||||
}
|
||||
as->mrm.ofs += 4;
|
||||
#if LJ_GC64
|
||||
lua_assert(LJ_DUALNUM && irt_isinteger(ir->t));
|
||||
emit_i32(as, LJ_TNUMX << 15);
|
||||
#else
|
||||
emit_i32(as, (int32_t)irt_toitype(ir->t));
|
||||
#endif
|
||||
emit_mrm(as, XO_MOVmi, 0, RID_MRM);
|
||||
}
|
||||
}
|
||||
@ -1425,7 +1640,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
|
||||
base = ra_alloc1(as, REF_BASE, RSET_GPR);
|
||||
emit_rmro(as, XO_MOVSD, left, base, ofs);
|
||||
t.irt = IRT_NUM; /* Continue with a regular number type check. */
|
||||
#if LJ_64
|
||||
#if LJ_64 && !LJ_GC64
|
||||
} else if (irt_islightud(t)) {
|
||||
Reg dest = asm_load_lightud64(as, ir, (ir->op2 & IRSLOAD_TYPECHECK));
|
||||
if (ra_hasreg(dest)) {
|
||||
@ -1443,6 +1658,28 @@ static void asm_sload(ASMState *as, IRIns *ir)
|
||||
t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */
|
||||
emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs);
|
||||
} else {
|
||||
#if LJ_GC64
|
||||
if (irt_isaddr(t)) {
|
||||
emit_shifti(as, XOg_SHR|REX_64, dest, 17);
|
||||
if ((ir->op2 & IRSLOAD_TYPECHECK)) {
|
||||
asm_guardcc(as, CC_NE);
|
||||
emit_i8(as, irt_toitype(t));
|
||||
emit_rr(as, XO_ARITHi8, XOg_CMP, dest);
|
||||
emit_i8(as, 0x66);
|
||||
}
|
||||
if ((as->flags & JIT_F_BMI2)) {
|
||||
emit_i8(as, 47);
|
||||
emit_rmro(as, XV_RORX|VEX_64, dest, base, ofs);
|
||||
} else {
|
||||
if ((ir->op2 & IRSLOAD_TYPECHECK))
|
||||
emit_shifti(as, XOg_ROR|REX_64, dest, 47);
|
||||
else
|
||||
emit_shifti(as, XOg_SHL|REX_64, dest, 17);
|
||||
emit_rmro(as, XO_MOV, dest|REX_64, base, ofs);
|
||||
}
|
||||
return;
|
||||
} else
|
||||
#endif
|
||||
emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs);
|
||||
}
|
||||
} else {
|
||||
@ -1455,11 +1692,26 @@ static void asm_sload(ASMState *as, IRIns *ir)
|
||||
asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE);
|
||||
if (LJ_64 && irt_type(t) >= IRT_NUM) {
|
||||
lua_assert(irt_isinteger(t) || irt_isnum(t));
|
||||
#if LJ_GC64
|
||||
emit_u32(as, LJ_TISNUM << 15);
|
||||
#else
|
||||
emit_u32(as, LJ_TISNUM);
|
||||
#endif
|
||||
emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4);
|
||||
} else if (LJ_GC64 && irt_ispri(t)) {
|
||||
emit_u32(as, (irt_toitype(t) << 15) | 0x7fff);
|
||||
emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4);
|
||||
} else {
|
||||
#if LJ_GC64
|
||||
Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, base));
|
||||
emit_i8(as, irt_toitype(t));
|
||||
emit_rr(as, XO_ARITHi8, XOg_CMP, tmp);
|
||||
emit_shifti(as, XOg_SAR, tmp, 15);
|
||||
emit_rmro(as, XO_MOV, tmp, base, ofs+4);
|
||||
#else
|
||||
emit_i8(as, irt_toitype(t));
|
||||
emit_rmro(as, XO_ARITHi8, XOg_CMP, base, ofs+4);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1553,7 +1805,7 @@ static void asm_tbar(ASMState *as, IRIns *ir)
|
||||
Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
|
||||
Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, tab));
|
||||
MCLabel l_end = emit_label(as);
|
||||
emit_movtomro(as, tmp, tab, offsetof(GCtab, gclist));
|
||||
emit_movtomro(as, tmp|REX_GC64, tab, offsetof(GCtab, gclist));
|
||||
emit_setgl(as, tab, gc.grayagain);
|
||||
emit_getgl(as, tmp, gc.grayagain);
|
||||
emit_i8(as, ~LJ_GC_BLACK);
|
||||
@ -2089,7 +2341,6 @@ static void asm_comp(ASMState *as, IRIns *ir)
|
||||
cc ^= (VCC_PS|(5<<4)); /* A <-> B, AE <-> BE, PS <-> none */
|
||||
}
|
||||
left = ra_alloc1(as, lref, RSET_FPR);
|
||||
right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left));
|
||||
l_around = emit_label(as);
|
||||
asm_guardcc(as, cc >> 4);
|
||||
if (cc & VCC_P) { /* Extra CC_P branch required? */
|
||||
@ -2106,6 +2357,7 @@ static void asm_comp(ASMState *as, IRIns *ir)
|
||||
emit_jcc(as, CC_P, as->mcp);
|
||||
}
|
||||
}
|
||||
right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left));
|
||||
emit_mrm(as, XO_UCOMISD, left, right);
|
||||
} else {
|
||||
IRRef lref = ir->op1, rref = ir->op2;
|
||||
@ -2382,13 +2634,18 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
|
||||
emit_rmro(as, XO_MOV, r|REX_64, RID_ESP, 0);
|
||||
else
|
||||
ra_modified(as, r);
|
||||
emit_gri(as, XG_ARITHi(XOg_CMP), r, (int32_t)(8*topslot));
|
||||
emit_gri(as, XG_ARITHi(XOg_CMP), r|REX_GC64, (int32_t)(8*topslot));
|
||||
if (ra_hasreg(pbase) && pbase != r)
|
||||
emit_rr(as, XO_ARITH(XOg_SUB), r, pbase);
|
||||
emit_rr(as, XO_ARITH(XOg_SUB), r|REX_GC64, pbase);
|
||||
else
|
||||
#if LJ_GC64
|
||||
emit_rmro(as, XO_ARITH(XOg_SUB), r|REX_64, RID_DISPATCH,
|
||||
(int32_t)dispofs(as, &J2G(as->J)->jit_base));
|
||||
#else
|
||||
emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE,
|
||||
ptr2addr(&J2G(as->J)->jit_base));
|
||||
emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack));
|
||||
#endif
|
||||
emit_rmro(as, XO_MOV, r|REX_GC64, r, offsetof(lua_State, maxstack));
|
||||
emit_getgl(as, r, cur_L);
|
||||
if (allow == RSET_EMPTY) /* Spill temp. register. */
|
||||
emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0);
|
||||
@ -2417,18 +2674,38 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
|
||||
(LJ_DUALNUM && irt_isinteger(ir->t)));
|
||||
if (!irref_isk(ref)) {
|
||||
Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
|
||||
#if LJ_GC64
|
||||
if (irt_is64(ir->t)) {
|
||||
emit_u32(as, irt_toitype(ir->t) << 15);
|
||||
emit_rmro(as, XO_ARITHi, XOg_OR, RID_BASE, ofs+4);
|
||||
} else if (LJ_DUALNUM && irt_isinteger(ir->t)) {
|
||||
emit_movmroi(as, RID_BASE, ofs+4, LJ_TISNUM << 15);
|
||||
} else {
|
||||
emit_movmroi(as, RID_BASE, ofs+4, (irt_toitype(ir->t)<<15)|0x7fff);
|
||||
}
|
||||
#endif
|
||||
emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs);
|
||||
#if LJ_GC64
|
||||
} else {
|
||||
TValue k;
|
||||
lj_ir_kvalue(as->J->L, &k, ir);
|
||||
emit_movmroi(as, RID_BASE, ofs+4, k.u32.hi);
|
||||
emit_movmroi(as, RID_BASE, ofs, k.u32.lo);
|
||||
#else
|
||||
} else if (!irt_ispri(ir->t)) {
|
||||
emit_movmroi(as, RID_BASE, ofs, ir->i);
|
||||
#endif
|
||||
}
|
||||
if ((sn & (SNAP_CONT|SNAP_FRAME))) {
|
||||
#if !LJ_FR2
|
||||
if (s != 0) /* Do not overwrite link to previous frame. */
|
||||
emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--));
|
||||
#endif
|
||||
#if !LJ_GC64
|
||||
} else {
|
||||
if (!(LJ_64 && irt_islightud(ir->t)))
|
||||
emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t));
|
||||
#endif
|
||||
}
|
||||
}
|
||||
checkmclim(as);
|
||||
@ -2454,11 +2731,15 @@ static void asm_gc_check(ASMState *as)
|
||||
args[1] = ASMREF_TMP2; /* MSize steps */
|
||||
asm_gencall(as, ci, args);
|
||||
tmp = ra_releasetmp(as, ASMREF_TMP1);
|
||||
#if LJ_GC64
|
||||
emit_rmro(as, XO_LEA, tmp|REX_64, RID_DISPATCH, GG_DISP2G);
|
||||
#else
|
||||
emit_loada(as, tmp, J2G(as->J));
|
||||
#endif
|
||||
emit_loadi(as, ra_releasetmp(as, ASMREF_TMP2), as->gcsteps);
|
||||
/* Jump around GC step if GC total < GC threshold. */
|
||||
emit_sjcc(as, CC_B, l_end);
|
||||
emit_opgl(as, XO_ARITH(XOg_CMP), tmp, gc.threshold);
|
||||
emit_opgl(as, XO_ARITH(XOg_CMP), tmp|REX_GC64, gc.threshold);
|
||||
emit_getgl(as, tmp, gc.total);
|
||||
as->gcsteps = 0;
|
||||
checkmclim(as);
|
||||
@ -2523,7 +2804,7 @@ static void asm_head_root_base(ASMState *as)
|
||||
if (rset_test(as->modset, r) || irt_ismarked(ir->t))
|
||||
ir->r = RID_INIT; /* No inheritance for modified BASE register. */
|
||||
if (r != RID_BASE)
|
||||
emit_rr(as, XO_MOV, r, RID_BASE);
|
||||
emit_rr(as, XO_MOV, r|REX_GC64, RID_BASE);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2540,7 +2821,7 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
|
||||
rset_clear(allow, r); /* Mark same BASE register as coalesced. */
|
||||
} else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
|
||||
rset_clear(allow, irp->r);
|
||||
emit_rr(as, XO_MOV, r, irp->r); /* Move from coalesced parent reg. */
|
||||
emit_rr(as, XO_MOV, r|REX_GC64, irp->r); /* Move from coalesced parent reg. */
|
||||
} else {
|
||||
emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */
|
||||
}
|
||||
@ -2753,12 +3034,16 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
|
||||
MSize len = T->szmcode;
|
||||
MCode *px = exitstub_addr(J, exitno) - 6;
|
||||
MCode *pe = p+len-6;
|
||||
uint32_t stateaddr = u32ptr(&J2G(J)->vmstate);
|
||||
#if LJ_GC64
|
||||
uint32_t statei = (uint32_t)(GG_OFS(g.vmstate) - GG_OFS(dispatch));
|
||||
#else
|
||||
uint32_t statei = u32ptr(&J2G(J)->vmstate);
|
||||
#endif
|
||||
if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px)
|
||||
*(int32_t *)(p+len-4) = jmprel(p+len, target);
|
||||
/* Do not patch parent exit for a stack check. Skip beyond vmstate update. */
|
||||
for (; p < pe; p += asm_x86_inslen(p))
|
||||
if (*(uint32_t *)(p+(LJ_64 ? 3 : 2)) == stateaddr && p[0] == XI_MOVmi)
|
||||
if (*(uint32_t*)(p+2+(LJ_64!=LJ_GC64)) == statei && p[0] == XI_MOVmi)
|
||||
break;
|
||||
lua_assert(p < pe);
|
||||
for (; p < pe; p += asm_x86_inslen(p))
|
||||
|
@ -20,6 +20,11 @@
|
||||
#define REX_64 0
|
||||
#define VEX_64 0
|
||||
#endif
|
||||
#if LJ_GC64
|
||||
#define REX_GC64 REX_64
|
||||
#else
|
||||
#define REX_GC64 0
|
||||
#endif
|
||||
|
||||
#define emit_i8(as, i) (*--as->mcp = (MCode)(i))
|
||||
#define emit_i32(as, i) (*(int32_t *)(as->mcp-4) = (i), as->mcp -= 4)
|
||||
@ -94,25 +99,19 @@ static int32_t ptr2addr(const void *p)
|
||||
#define ptr2addr(p) (i32ptr((p)))
|
||||
#endif
|
||||
|
||||
/* op r, [addr] */
|
||||
static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr)
|
||||
{
|
||||
MCode *p = as->mcp;
|
||||
*(int32_t *)(p-4) = ptr2addr(addr);
|
||||
#if LJ_64
|
||||
p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
|
||||
as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5);
|
||||
#else
|
||||
as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* op r, [base+ofs] */
|
||||
static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs)
|
||||
{
|
||||
MCode *p = as->mcp;
|
||||
x86Mode mode;
|
||||
if (ra_hasreg(rb)) {
|
||||
#if LJ_GC64
|
||||
if (rb == RID_RIP) {
|
||||
mode = XM_OFS0;
|
||||
p -= 4;
|
||||
*(int32_t *)p = ofs;
|
||||
} else
|
||||
#endif
|
||||
if (ofs == 0 && (rb&7) != RID_EBP) {
|
||||
mode = XM_OFS0;
|
||||
} else if (checki8(ofs)) {
|
||||
@ -210,6 +209,13 @@ static void emit_mrm(ASMState *as, x86Op xo, Reg rr, Reg rb)
|
||||
#if LJ_64
|
||||
*--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
|
||||
rb = RID_ESP;
|
||||
#endif
|
||||
#if LJ_GC64
|
||||
} else if (rb == RID_RIP) {
|
||||
lua_assert(as->mrm.idx == RID_NONE);
|
||||
mode = XM_OFS0;
|
||||
p -= 4;
|
||||
*(int32_t *)p = as->mrm.ofs;
|
||||
#endif
|
||||
} else {
|
||||
if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) {
|
||||
@ -264,8 +270,8 @@ static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i)
|
||||
/* Get/set global_State fields. */
|
||||
#define emit_opgl(as, xo, r, field) \
|
||||
emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field)
|
||||
#define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r), field)
|
||||
#define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r), field)
|
||||
#define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r)|REX_GC64, field)
|
||||
#define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r)|REX_GC64, field)
|
||||
|
||||
#define emit_setvmstate(as, i) \
|
||||
(emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, vmstate))
|
||||
@ -288,9 +294,21 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i)
|
||||
}
|
||||
}
|
||||
|
||||
#if LJ_GC64
|
||||
#define dispofs(as, k) \
|
||||
((intptr_t)((uintptr_t)(k) - (uintptr_t)J2GG(as->J)->dispatch))
|
||||
#define mcpofs(as, k) \
|
||||
((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mcp))
|
||||
#define mctopofs(as, k) \
|
||||
((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mctop))
|
||||
/* mov r, addr */
|
||||
#define emit_loada(as, r, addr) \
|
||||
emit_loadu64(as, (r), (uintptr_t)(addr))
|
||||
#else
|
||||
/* mov r, addr */
|
||||
#define emit_loada(as, r, addr) \
|
||||
emit_loadi(as, (r), ptr2addr((addr)))
|
||||
#endif
|
||||
|
||||
#if LJ_64
|
||||
/* mov r, imm64 or shorter 32 bit extended load. */
|
||||
@ -302,6 +320,12 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
|
||||
MCode *p = as->mcp;
|
||||
*(int32_t *)(p-4) = (int32_t)u64;
|
||||
as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4);
|
||||
#if LJ_GC64
|
||||
} else if (checki32(dispofs(as, u64))) {
|
||||
emit_rmro(as, XO_LEA, r|REX_64, RID_DISPATCH, (int32_t)dispofs(as, u64));
|
||||
} else if (checki32(mcpofs(as, u64)) && checki32(mctopofs(as, u64))) {
|
||||
emit_rmro(as, XO_LEA, r|REX_64, RID_RIP, (int32_t)mcpofs(as, u64));
|
||||
#endif
|
||||
} else { /* Full-size 64 bit load. */
|
||||
MCode *p = as->mcp;
|
||||
*(uint64_t *)(p-8) = u64;
|
||||
@ -313,6 +337,31 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
|
||||
}
|
||||
#endif
|
||||
|
||||
/* op r, [addr] */
|
||||
static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr)
|
||||
{
|
||||
#if LJ_GC64
|
||||
if (checki32(dispofs(as, addr))) {
|
||||
emit_rmro(as, xo, rr, RID_DISPATCH, (int32_t)dispofs(as, addr));
|
||||
} else if (checki32(mcpofs(as, addr)) && checki32(mctopofs(as, addr))) {
|
||||
emit_rmro(as, xo, rr, RID_RIP, (int32_t)mcpofs(as, addr));
|
||||
} else if (!checki32((intptr_t)addr) && (xo == XO_MOV || xo == XO_MOVSD)) {
|
||||
emit_rmro(as, xo, rr, rr, 0);
|
||||
emit_loadu64(as, rr, (uintptr_t)addr);
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
MCode *p = as->mcp;
|
||||
*(int32_t *)(p-4) = ptr2addr(addr);
|
||||
#if LJ_64
|
||||
p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
|
||||
as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5);
|
||||
#else
|
||||
as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
/* Load 64-bit IR constant into register. */
|
||||
static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
|
||||
{
|
||||
@ -328,8 +377,28 @@ static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
|
||||
}
|
||||
if (!*k) {
|
||||
emit_rr(as, rset_test(RSET_FPR, r) ? XO_XORPS : XO_ARITH(XOg_XOR), r, r);
|
||||
#if LJ_GC64
|
||||
} else if (checki32((intptr_t)k) || checki32(dispofs(as, k)) ||
|
||||
(checki32(mcpofs(as, k)) && checki32(mctopofs(as, k)))) {
|
||||
emit_rma(as, xo, r64, k);
|
||||
} else {
|
||||
if (ir->i) {
|
||||
lua_assert(*k == *(uint64_t*)(as->mctop - ir->i));
|
||||
} else if (as->curins <= as->stopins && rset_test(RSET_GPR, r)) {
|
||||
emit_loadu64(as, r, *k);
|
||||
return;
|
||||
} else {
|
||||
while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3;
|
||||
*(uint64_t*)as->mcbot = *k;
|
||||
ir->i = (int32_t)(as->mctop - as->mcbot);
|
||||
as->mcbot += 8;
|
||||
as->mclim = as->mcbot + MCLIM_REDZONE;
|
||||
}
|
||||
emit_rmro(as, xo, r64, RID_RIP, (int32_t)mcpofs(as, as->mctop - ir->i));
|
||||
#else
|
||||
} else {
|
||||
emit_rma(as, xo, r64, k);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@ -471,9 +540,9 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
|
||||
{
|
||||
if (ofs) {
|
||||
if ((as->flags & JIT_F_LEA_AGU))
|
||||
emit_rmro(as, XO_LEA, r, r, ofs);
|
||||
emit_rmro(as, XO_LEA, r|REX_GC64, r, ofs);
|
||||
else
|
||||
emit_gri(as, XG_ARITHi(XOg_ADD), r, ofs);
|
||||
emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1114,8 +1114,12 @@ static TRef recff_io_fp(jit_State *J, TRef *udp, int32_t id)
|
||||
{
|
||||
TRef tr, ud, fp;
|
||||
if (id) { /* io.func() */
|
||||
#if LJ_GC64
|
||||
ud = lj_ir_ggfload(J, IRT_UDATA, GG_OFS(g.gcroot[id]));
|
||||
#else
|
||||
tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]);
|
||||
ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0);
|
||||
#endif
|
||||
} else { /* fp:method() */
|
||||
ud = J->base[0];
|
||||
if (!tref_isudata(ud))
|
||||
|
@ -412,7 +412,7 @@ static LJ_AINLINE IRType itype2irt(const TValue *tv)
|
||||
|
||||
static LJ_AINLINE uint32_t irt_toitype_(IRType t)
|
||||
{
|
||||
lua_assert(!LJ_64 || t != IRT_LIGHTUD);
|
||||
lua_assert(!LJ_64 || LJ_GC64 || t != IRT_LIGHTUD);
|
||||
if (LJ_DUALNUM && t > IRT_NUM) {
|
||||
return LJ_TISNUM;
|
||||
} else {
|
||||
|
@ -976,7 +976,12 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm)
|
||||
}
|
||||
/* The cdata metatable is treated as immutable. */
|
||||
if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt;
|
||||
#if LJ_GC64
|
||||
ix->mt = mix.tab = lj_ir_ggfload(J, IRT_TAB,
|
||||
GG_OFS(g.gcroot[GCROOT_BASEMT+itypemap(&ix->tabv)]));
|
||||
#else
|
||||
ix->mt = mix.tab = lj_ir_ktab(J, mt);
|
||||
#endif
|
||||
goto nocheck;
|
||||
}
|
||||
ix->mt = mt ? mix.tab : TREF_NIL;
|
||||
|
@ -630,7 +630,6 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
|
||||
}
|
||||
if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
|
||||
rs = snap_renameref(T, snapno, ref, rs);
|
||||
lua_assert(!LJ_GC64); /* TODO_GC64: handle 64 bit references. */
|
||||
if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */
|
||||
int32_t *sps = &ex->spill[regsp_spill(rs)];
|
||||
if (irt_isinteger(t)) {
|
||||
@ -639,9 +638,11 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
|
||||
} else if (irt_isnum(t)) {
|
||||
o->u64 = *(uint64_t *)sps;
|
||||
#endif
|
||||
} else if (LJ_64 && irt_islightud(t)) {
|
||||
#if LJ_64 && !LJ_GC64
|
||||
} else if (irt_islightud(t)) {
|
||||
/* 64 bit lightuserdata which may escape already has the tag bits. */
|
||||
o->u64 = *(uint64_t *)sps;
|
||||
#endif
|
||||
} else {
|
||||
lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */
|
||||
setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t));
|
||||
@ -659,9 +660,11 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
|
||||
} else if (irt_isnum(t)) {
|
||||
setnumV(o, ex->fpr[r-RID_MIN_FPR]);
|
||||
#endif
|
||||
} else if (LJ_64 && irt_is64(t)) {
|
||||
#if LJ_64 && !LJ_GC64
|
||||
} else if (irt_is64(t)) {
|
||||
/* 64 bit values that already have the tag bits. */
|
||||
o->u64 = ex->gpr[r-RID_MIN_GPR];
|
||||
#endif
|
||||
} else if (irt_ispri(t)) {
|
||||
setpriV(o, irt_toitype(t));
|
||||
} else {
|
||||
|
@ -21,8 +21,13 @@
|
||||
#define FPRDEF(_) \
|
||||
_(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7)
|
||||
#endif
|
||||
#if LJ_GC64
|
||||
#define VRIDDEF(_) \
|
||||
_(MRM) _(BAD) _(BAD) _(BAD) _(BAD) _(RIP)
|
||||
#else
|
||||
#define VRIDDEF(_) \
|
||||
_(MRM)
|
||||
#endif
|
||||
|
||||
#define RIDENUM(name) RID_##name,
|
||||
|
||||
@ -31,6 +36,9 @@ enum {
|
||||
FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
|
||||
RID_MAX,
|
||||
RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */
|
||||
#if LJ_GC64
|
||||
RID_RIP = 0x25, /* Pseudo-id for RIP. */
|
||||
#endif
|
||||
|
||||
/* Calling conventions. */
|
||||
RID_SP = RID_ESP,
|
||||
@ -63,8 +71,10 @@ enum {
|
||||
|
||||
/* -- Register sets ------------------------------------------------------- */
|
||||
|
||||
/* Make use of all registers, except the stack pointer. */
|
||||
#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR)-RID2RSET(RID_ESP))
|
||||
/* Make use of all registers, except the stack pointer (and maybe DISPATCH). */
|
||||
#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR)\
|
||||
- RID2RSET(RID_ESP)\
|
||||
- LJ_GC64*RID2RSET(RID_DISPATCH))
|
||||
#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR))
|
||||
#define RSET_ALL (RSET_GPR|RSET_FPR)
|
||||
#define RSET_INIT RSET_ALL
|
||||
@ -217,6 +227,7 @@ typedef enum {
|
||||
XI_PUSHi8 = 0x6a,
|
||||
XI_TESTb = 0x84,
|
||||
XI_TEST = 0x85,
|
||||
XI_INT3 = 0xcc,
|
||||
XI_MOVmi = 0xc7,
|
||||
XI_GROUP5 = 0xff,
|
||||
|
||||
@ -243,6 +254,7 @@ typedef enum {
|
||||
XV_SHRX = XV_f20f38(f7),
|
||||
|
||||
/* Variable-length opcodes. XO_* prefix. */
|
||||
XO_OR = XO_(0b),
|
||||
XO_MOV = XO_(8b),
|
||||
XO_MOVto = XO_(89),
|
||||
XO_MOVtow = XO_66(89),
|
||||
|
@ -2402,7 +2402,6 @@ static void build_subroutines(BuildCtx *ctx)
|
||||
| mov RCH, byte [rbp-16]
|
||||
| mov [rbp-8], r15; mov [rbp-16], r14
|
||||
| // Caveat: DISPATCH is rbx.
|
||||
| mov DISPATCH, [ebp]
|
||||
| mov RAd, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number.
|
||||
| set_vmstate EXIT
|
||||
| mov [DISPATCH+DISPATCH_J(exitno)], RCd
|
||||
|
Loading…
Reference in New Issue
Block a user