mirror of
https://github.com/LuaJIT/LuaJIT.git
synced 2025-02-07 23:24:09 +00:00
Add x64 call argument setup. More 32/64 bit cleanups in assembler.
This commit is contained in:
parent
b95294572c
commit
3c6cec0846
100
src/lj_asm.c
100
src/lj_asm.c
@ -1292,21 +1292,52 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
|
|||||||
{
|
{
|
||||||
RegSet allow = RSET_ALL;
|
RegSet allow = RSET_ALL;
|
||||||
uint32_t n, nargs = CCI_NARGS(ci);
|
uint32_t n, nargs = CCI_NARGS(ci);
|
||||||
int32_t ofs = 0;
|
int32_t ofs = STACKARG_OFS;
|
||||||
|
uint32_t gprs = REGARG_GPRS;
|
||||||
|
#if LJ_64
|
||||||
|
Reg fpr = REGARG_FIRSTFPR;
|
||||||
|
#endif
|
||||||
lua_assert(!(nargs > 2 && (ci->flags&CCI_FASTCALL))); /* Avoid stack adj. */
|
lua_assert(!(nargs > 2 && (ci->flags&CCI_FASTCALL))); /* Avoid stack adj. */
|
||||||
emit_call(as, ci->func);
|
emit_call(as, ci->func);
|
||||||
for (n = 0; n < nargs; n++) { /* Setup args. */
|
for (n = 0; n < nargs; n++) { /* Setup args. */
|
||||||
#if LJ_64
|
|
||||||
#error "NYI: 64 bit mode call argument setup"
|
|
||||||
#endif
|
|
||||||
IRIns *ir = IR(args[n]);
|
IRIns *ir = IR(args[n]);
|
||||||
|
Reg r;
|
||||||
|
#if LJ_64 && defined(_WIN64)
|
||||||
|
/* Windows/x64 argument registers are strictly positional. */
|
||||||
|
r = irt_isnum(ir->t) ? (fpr <= REGARG_LASTFPR ? fpr : 0) : (gprs & 31);
|
||||||
|
fpr++; gprs >>= 5;
|
||||||
|
#elif LJ_64
|
||||||
|
/* POSIX/x64 argument registers are used in order of appearance. */
|
||||||
if (irt_isnum(ir->t)) {
|
if (irt_isnum(ir->t)) {
|
||||||
if ((ofs & 4) && irref_isk(args[n])) {
|
r = fpr <= REGARG_LASTFPR ? fpr : 0; fpr++;
|
||||||
|
} else {
|
||||||
|
r = gprs & 31; gprs >>= 5;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
if (irt_isnum(ir->t) || !(ci->flags & CCI_FASTCALL)) {
|
||||||
|
r = 0;
|
||||||
|
} else {
|
||||||
|
r = gprs & 31; gprs >>= 5;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
if (r) { /* Argument is in a register. */
|
||||||
|
if (args[n] < ASMREF_TMP1) {
|
||||||
|
emit_loadi(as, r, ir->i);
|
||||||
|
} else {
|
||||||
|
lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */
|
||||||
|
if (ra_hasreg(ir->r)) {
|
||||||
|
ra_noweak(as, ir->r);
|
||||||
|
ra_movrr(as, ir, r, ir->r);
|
||||||
|
} else {
|
||||||
|
ra_allocref(as, args[n], RID2RSET(r));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (irt_isnum(ir->t)) { /* FP argument is on stack. */
|
||||||
|
if (!LJ_64 && (ofs & 4) && irref_isk(args[n])) {
|
||||||
/* Split stores for unaligned FP consts. */
|
/* Split stores for unaligned FP consts. */
|
||||||
emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo);
|
emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo);
|
||||||
emit_movmroi(as, RID_ESP, ofs+4, (int32_t)ir_knum(ir)->u32.hi);
|
emit_movmroi(as, RID_ESP, ofs+4, (int32_t)ir_knum(ir)->u32.hi);
|
||||||
} else {
|
} else {
|
||||||
Reg r;
|
|
||||||
if ((allow & RSET_FPR) == RSET_EMPTY)
|
if ((allow & RSET_FPR) == RSET_EMPTY)
|
||||||
lj_trace_err(as->J, LJ_TRERR_NYICOAL);
|
lj_trace_err(as->J, LJ_TRERR_NYICOAL);
|
||||||
r = ra_alloc1(as, args[n], allow & RSET_FPR);
|
r = ra_alloc1(as, args[n], allow & RSET_FPR);
|
||||||
@ -1314,34 +1345,18 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
|
|||||||
emit_rmro(as, XO_MOVSDto, r, RID_ESP, ofs);
|
emit_rmro(as, XO_MOVSDto, r, RID_ESP, ofs);
|
||||||
}
|
}
|
||||||
ofs += 8;
|
ofs += 8;
|
||||||
} else {
|
} else { /* Non-FP argument is on stack. */
|
||||||
if ((ci->flags & CCI_FASTCALL) && n < 2) {
|
/* NYI: no widening for 64 bit parameters on x64. */
|
||||||
Reg r = n == 0 ? RID_ECX : RID_EDX;
|
if (args[n] < ASMREF_TMP1) {
|
||||||
if (args[n] < ASMREF_TMP1) {
|
emit_movmroi(as, RID_ESP, ofs, ir->i);
|
||||||
emit_loadi(as, r, ir->i);
|
|
||||||
} else {
|
|
||||||
lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */
|
|
||||||
allow &= ~RID2RSET(r);
|
|
||||||
if (ra_hasreg(ir->r)) {
|
|
||||||
ra_noweak(as, ir->r);
|
|
||||||
ra_movrr(as, ir, r, ir->r);
|
|
||||||
} else {
|
|
||||||
ra_allocref(as, args[n], RID2RSET(r));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
if (args[n] < ASMREF_TMP1) {
|
if ((allow & RSET_GPR) == RSET_EMPTY)
|
||||||
emit_movmroi(as, RID_ESP, ofs, ir->i);
|
lj_trace_err(as->J, LJ_TRERR_NYICOAL);
|
||||||
} else {
|
r = ra_alloc1(as, args[n], allow & RSET_GPR);
|
||||||
Reg r;
|
allow &= ~RID2RSET(r);
|
||||||
if ((allow & RSET_GPR) == RSET_EMPTY)
|
emit_movtomro(as, REX_64LU(ir, r), RID_ESP, ofs);
|
||||||
lj_trace_err(as->J, LJ_TRERR_NYICOAL);
|
|
||||||
r = ra_alloc1(as, args[n], allow & RSET_GPR);
|
|
||||||
allow &= ~RID2RSET(r);
|
|
||||||
emit_movtomro(as, r, RID_ESP, ofs);
|
|
||||||
}
|
|
||||||
ofs += 4;
|
|
||||||
}
|
}
|
||||||
|
ofs += sizeof(intptr_t);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2561,7 +2576,7 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
|
|||||||
asm_guardcc(as, cc);
|
asm_guardcc(as, cc);
|
||||||
if (usetest && left != RID_MRM) {
|
if (usetest && left != RID_MRM) {
|
||||||
/* Use test r,r instead of cmp r,0. */
|
/* Use test r,r instead of cmp r,0. */
|
||||||
emit_rr(as, XO_TEST, left, left);
|
emit_rr(as, XO_TEST, REX_64LU(ir, left), left);
|
||||||
if (irl+1 == ir) /* Referencing previous ins? */
|
if (irl+1 == ir) /* Referencing previous ins? */
|
||||||
as->testmcp = as->mcp; /* Set flag to drop test r,r if possible. */
|
as->testmcp = as->mcp; /* Set flag to drop test r,r if possible. */
|
||||||
} else {
|
} else {
|
||||||
@ -2580,11 +2595,7 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
|
|||||||
Reg left = ra_alloc1(as, lref, RSET_GPR);
|
Reg left = ra_alloc1(as, lref, RSET_GPR);
|
||||||
Reg right = asm_fuseload(as, rref, rset_exclude(RSET_GPR, left));
|
Reg right = asm_fuseload(as, rref, rset_exclude(RSET_GPR, left));
|
||||||
asm_guardcc(as, cc);
|
asm_guardcc(as, cc);
|
||||||
#if LJ_64
|
emit_mrm(as, XO_CMP, REX_64LU(ir, left), right);
|
||||||
if (irt_islightud(ir->t))
|
|
||||||
left |= REX_64;
|
|
||||||
#endif
|
|
||||||
emit_mrm(as, XO_CMP, left, right);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2732,14 +2743,14 @@ static void asm_gc_check(ASMState *as, SnapShot *snap)
|
|||||||
/* We don't know spadj yet, so get the C frame from L->cframe. */
|
/* We don't know spadj yet, so get the C frame from L->cframe. */
|
||||||
emit_movmroi(as, tmp, CFRAME_OFS_PC,
|
emit_movmroi(as, tmp, CFRAME_OFS_PC,
|
||||||
(int32_t)as->T->snapmap[snap->mapofs+snap->nent]);
|
(int32_t)as->T->snapmap[snap->mapofs+snap->nent]);
|
||||||
emit_gri(as, XG_ARITHi(XOg_AND), tmp, CFRAME_RAWMASK);
|
emit_gri(as, XG_ARITHi(XOg_AND), tmp|REX_64, CFRAME_RAWMASK);
|
||||||
lstate = IR(ASMREF_L)->r;
|
lstate = IR(ASMREF_L)->r;
|
||||||
emit_rmro(as, XO_MOV, tmp, lstate, offsetof(lua_State, cframe));
|
emit_rmro(as, XO_MOV, tmp|REX_64, lstate, offsetof(lua_State, cframe));
|
||||||
/* It's ok if lstate is already in a non-scratch reg. But all allocations
|
/* It's ok if lstate is already in a non-scratch reg. But all allocations
|
||||||
** in the non-fast path must use a scratch reg. See comment above.
|
** in the non-fast path must use a scratch reg. See comment above.
|
||||||
*/
|
*/
|
||||||
base = ra_alloc1(as, REF_BASE, rset_exclude(RSET_SCRATCH & RSET_GPR, lstate));
|
base = ra_alloc1(as, REF_BASE, rset_exclude(RSET_SCRATCH & RSET_GPR, lstate));
|
||||||
emit_movtomro(as, base, lstate, offsetof(lua_State, base));
|
emit_movtomro(as, base|REX_64, lstate, offsetof(lua_State, base));
|
||||||
asm_gc_sync(as, snap, base);
|
asm_gc_sync(as, snap, base);
|
||||||
/* BASE/L get restored anyway, better do it inside the slow path. */
|
/* BASE/L get restored anyway, better do it inside the slow path. */
|
||||||
if (as->parent || as->curins == as->loopref) ra_restore(as, REF_BASE);
|
if (as->parent || as->curins == as->loopref) ra_restore(as, REF_BASE);
|
||||||
@ -3447,7 +3458,12 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
|
|||||||
case IR_CALLN: case IR_CALLL: case IR_CALLS: {
|
case IR_CALLN: case IR_CALLL: case IR_CALLS: {
|
||||||
const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
|
const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
|
||||||
#if LJ_64
|
#if LJ_64
|
||||||
/* NYI: add stack slots for calls with more than 4/6 args. */
|
/* NYI: add stack slots for x64 calls with many args. */
|
||||||
|
#ifdef _WIN64
|
||||||
|
lua_assert(CCI_NARGS(ci) <= 4);
|
||||||
|
#else
|
||||||
|
lua_assert(CCI_NARGS(ci) <= 6); /* Safe lower bound. */
|
||||||
|
#endif
|
||||||
ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET);
|
ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET);
|
||||||
#else
|
#else
|
||||||
/* NYI: not fastcall-aware, but doesn't matter (yet). */
|
/* NYI: not fastcall-aware, but doesn't matter (yet). */
|
||||||
|
@ -78,14 +78,27 @@ enum {
|
|||||||
/* Windows x64 ABI. */
|
/* Windows x64 ABI. */
|
||||||
#define RSET_SCRATCH \
|
#define RSET_SCRATCH \
|
||||||
(RSET_ACD|RSET_RANGE(RID_R8D, RID_R11D+1)|RSET_RANGE(RID_XMM0, RID_XMM5+1))
|
(RSET_ACD|RSET_RANGE(RID_R8D, RID_R11D+1)|RSET_RANGE(RID_XMM0, RID_XMM5+1))
|
||||||
|
#define REGARG_GPRS \
|
||||||
|
(RID_ECX|((RID_EDX|((RID_R8D|(RID_R9D<<5))<<5))<<5))
|
||||||
|
#define REGARG_FIRSTFPR RID_XMM0
|
||||||
|
#define REGARG_LASTFPR RID_XMM3
|
||||||
|
#define STACKARG_OFS (4*8)
|
||||||
#else
|
#else
|
||||||
/* The rest of the civilized x64 world has a common ABI. */
|
/* The rest of the civilized x64 world has a common ABI. */
|
||||||
#define RSET_SCRATCH \
|
#define RSET_SCRATCH \
|
||||||
(RSET_ACD|RSET_RANGE(RID_ESI, RID_R11D+1)|RSET_FPR)
|
(RSET_ACD|RSET_RANGE(RID_ESI, RID_R11D+1)|RSET_FPR)
|
||||||
|
#define REGARG_GPRS \
|
||||||
|
(RID_EDI|((RID_ESI|((RID_EDX|((RID_ECX|((RID_R8D|(RID_R9D \
|
||||||
|
<<5))<<5))<<5))<<5))<<5))
|
||||||
|
#define REGARG_FIRSTFPR RID_XMM0
|
||||||
|
#define REGARG_LASTFPR RID_XMM7
|
||||||
|
#define STACKARG_OFS 0
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
/* Common x86 ABI. */
|
/* Common x86 ABI. */
|
||||||
#define RSET_SCRATCH (RSET_ACD|RSET_FPR)
|
#define RSET_SCRATCH (RSET_ACD|RSET_FPR)
|
||||||
|
#define REGARG_GPRS (RID_ECX|(RID_EDX<<5)) /* Fastcall only. */
|
||||||
|
#define STACKARG_OFS 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if LJ_64
|
#if LJ_64
|
||||||
@ -96,23 +109,26 @@ enum {
|
|||||||
|
|
||||||
/* -- Spill slots --------------------------------------------------------- */
|
/* -- Spill slots --------------------------------------------------------- */
|
||||||
|
|
||||||
/* Available fixed spill slots in interpreter frame.
|
/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
|
||||||
|
**
|
||||||
|
** SPS_FIXED: Available fixed spill slots in interpreter frame.
|
||||||
** This definition must match with the *.dasc file(s).
|
** This definition must match with the *.dasc file(s).
|
||||||
|
**
|
||||||
|
** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots.
|
||||||
*/
|
*/
|
||||||
#if LJ_64
|
#if LJ_64
|
||||||
#ifdef _WIN64
|
#ifdef _WIN64
|
||||||
#define SPS_FIXED (5*2)
|
#define SPS_FIXED (5*2)
|
||||||
|
#define SPS_FIRST (4*2) /* Don't use callee register save area. */
|
||||||
#else
|
#else
|
||||||
#define SPS_FIXED 2
|
#define SPS_FIXED 2
|
||||||
|
#define SPS_FIRST 2
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
#define SPS_FIXED 6
|
#define SPS_FIXED 6
|
||||||
|
#define SPS_FIRST 2
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* First spill slot for general use. Reserve one 64 bit slot. */
|
|
||||||
#define SPS_FIRST 2
|
|
||||||
|
|
||||||
/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. */
|
|
||||||
#define sps_scale(slot) (4 * (int32_t)(slot))
|
#define sps_scale(slot) (4 * (int32_t)(slot))
|
||||||
|
|
||||||
/* -- Exit state ---------------------------------------------------------- */
|
/* -- Exit state ---------------------------------------------------------- */
|
||||||
|
Loading…
Reference in New Issue
Block a user