Add x64 call argument setup. More 32/64 bit cleanups in assembler.

This commit is contained in:
Mike Pall 2010-02-25 03:35:07 +01:00
parent b95294572c
commit 3c6cec0846
2 changed files with 79 additions and 47 deletions

View File

@ -1292,21 +1292,52 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
{
RegSet allow = RSET_ALL;
uint32_t n, nargs = CCI_NARGS(ci);
int32_t ofs = 0;
int32_t ofs = STACKARG_OFS;
uint32_t gprs = REGARG_GPRS;
#if LJ_64
Reg fpr = REGARG_FIRSTFPR;
#endif
lua_assert(!(nargs > 2 && (ci->flags&CCI_FASTCALL))); /* Avoid stack adj. */
emit_call(as, ci->func);
for (n = 0; n < nargs; n++) { /* Setup args. */
#if LJ_64
#error "NYI: 64 bit mode call argument setup"
#endif
IRIns *ir = IR(args[n]);
Reg r;
#if LJ_64 && defined(_WIN64)
/* Windows/x64 argument registers are strictly positional. */
r = irt_isnum(ir->t) ? (fpr <= REGARG_LASTFPR ? fpr : 0) : (gprs & 31);
fpr++; gprs >>= 5;
#elif LJ_64
/* POSIX/x64 argument registers are used in order of appearance. */
if (irt_isnum(ir->t)) {
if ((ofs & 4) && irref_isk(args[n])) {
r = fpr <= REGARG_LASTFPR ? fpr : 0; fpr++;
} else {
r = gprs & 31; gprs >>= 5;
}
#else
if (irt_isnum(ir->t) || !(ci->flags & CCI_FASTCALL)) {
r = 0;
} else {
r = gprs & 31; gprs >>= 5;
}
#endif
if (r) { /* Argument is in a register. */
if (args[n] < ASMREF_TMP1) {
emit_loadi(as, r, ir->i);
} else {
lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */
if (ra_hasreg(ir->r)) {
ra_noweak(as, ir->r);
ra_movrr(as, ir, r, ir->r);
} else {
ra_allocref(as, args[n], RID2RSET(r));
}
}
} else if (irt_isnum(ir->t)) { /* FP argument is on stack. */
if (!LJ_64 && (ofs & 4) && irref_isk(args[n])) {
/* Split stores for unaligned FP consts. */
emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo);
emit_movmroi(as, RID_ESP, ofs+4, (int32_t)ir_knum(ir)->u32.hi);
} else {
Reg r;
if ((allow & RSET_FPR) == RSET_EMPTY)
lj_trace_err(as->J, LJ_TRERR_NYICOAL);
r = ra_alloc1(as, args[n], allow & RSET_FPR);
@ -1314,34 +1345,18 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
emit_rmro(as, XO_MOVSDto, r, RID_ESP, ofs);
}
ofs += 8;
} else {
if ((ci->flags & CCI_FASTCALL) && n < 2) {
Reg r = n == 0 ? RID_ECX : RID_EDX;
if (args[n] < ASMREF_TMP1) {
emit_loadi(as, r, ir->i);
} else {
lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */
allow &= ~RID2RSET(r);
if (ra_hasreg(ir->r)) {
ra_noweak(as, ir->r);
ra_movrr(as, ir, r, ir->r);
} else {
ra_allocref(as, args[n], RID2RSET(r));
}
}
} else { /* Non-FP argument is on stack. */
/* NYI: no widening for 64 bit parameters on x64. */
if (args[n] < ASMREF_TMP1) {
emit_movmroi(as, RID_ESP, ofs, ir->i);
} else {
if (args[n] < ASMREF_TMP1) {
emit_movmroi(as, RID_ESP, ofs, ir->i);
} else {
Reg r;
if ((allow & RSET_GPR) == RSET_EMPTY)
lj_trace_err(as->J, LJ_TRERR_NYICOAL);
r = ra_alloc1(as, args[n], allow & RSET_GPR);
allow &= ~RID2RSET(r);
emit_movtomro(as, r, RID_ESP, ofs);
}
ofs += 4;
if ((allow & RSET_GPR) == RSET_EMPTY)
lj_trace_err(as->J, LJ_TRERR_NYICOAL);
r = ra_alloc1(as, args[n], allow & RSET_GPR);
allow &= ~RID2RSET(r);
emit_movtomro(as, REX_64LU(ir, r), RID_ESP, ofs);
}
ofs += sizeof(intptr_t);
}
}
}
@ -2561,7 +2576,7 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
asm_guardcc(as, cc);
if (usetest && left != RID_MRM) {
/* Use test r,r instead of cmp r,0. */
emit_rr(as, XO_TEST, left, left);
emit_rr(as, XO_TEST, REX_64LU(ir, left), left);
if (irl+1 == ir) /* Referencing previous ins? */
as->testmcp = as->mcp; /* Set flag to drop test r,r if possible. */
} else {
@ -2580,11 +2595,7 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
Reg left = ra_alloc1(as, lref, RSET_GPR);
Reg right = asm_fuseload(as, rref, rset_exclude(RSET_GPR, left));
asm_guardcc(as, cc);
#if LJ_64
if (irt_islightud(ir->t))
left |= REX_64;
#endif
emit_mrm(as, XO_CMP, left, right);
emit_mrm(as, XO_CMP, REX_64LU(ir, left), right);
}
}
}
@ -2732,14 +2743,14 @@ static void asm_gc_check(ASMState *as, SnapShot *snap)
/* We don't know spadj yet, so get the C frame from L->cframe. */
emit_movmroi(as, tmp, CFRAME_OFS_PC,
(int32_t)as->T->snapmap[snap->mapofs+snap->nent]);
emit_gri(as, XG_ARITHi(XOg_AND), tmp, CFRAME_RAWMASK);
emit_gri(as, XG_ARITHi(XOg_AND), tmp|REX_64, CFRAME_RAWMASK);
lstate = IR(ASMREF_L)->r;
emit_rmro(as, XO_MOV, tmp, lstate, offsetof(lua_State, cframe));
emit_rmro(as, XO_MOV, tmp|REX_64, lstate, offsetof(lua_State, cframe));
/* It's ok if lstate is already in a non-scratch reg. But all allocations
** in the non-fast path must use a scratch reg. See comment above.
*/
base = ra_alloc1(as, REF_BASE, rset_exclude(RSET_SCRATCH & RSET_GPR, lstate));
emit_movtomro(as, base, lstate, offsetof(lua_State, base));
emit_movtomro(as, base|REX_64, lstate, offsetof(lua_State, base));
asm_gc_sync(as, snap, base);
/* BASE/L get restored anyway, better do it inside the slow path. */
if (as->parent || as->curins == as->loopref) ra_restore(as, REF_BASE);
@ -3447,7 +3458,12 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
case IR_CALLN: case IR_CALLL: case IR_CALLS: {
const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
#if LJ_64
/* NYI: add stack slots for calls with more than 4/6 args. */
/* NYI: add stack slots for x64 calls with many args. */
#ifdef _WIN64
lua_assert(CCI_NARGS(ci) <= 4);
#else
lua_assert(CCI_NARGS(ci) <= 6); /* Safe lower bound. */
#endif
ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET);
#else
/* NYI: not fastcall-aware, but doesn't matter (yet). */

View File

@ -78,14 +78,27 @@ enum {
/* Windows x64 ABI. */
#define RSET_SCRATCH \
(RSET_ACD|RSET_RANGE(RID_R8D, RID_R11D+1)|RSET_RANGE(RID_XMM0, RID_XMM5+1))
#define REGARG_GPRS \
(RID_ECX|((RID_EDX|((RID_R8D|(RID_R9D<<5))<<5))<<5))
#define REGARG_FIRSTFPR RID_XMM0
#define REGARG_LASTFPR RID_XMM3
#define STACKARG_OFS (4*8)
#else
/* The rest of the civilized x64 world has a common ABI. */
#define RSET_SCRATCH \
(RSET_ACD|RSET_RANGE(RID_ESI, RID_R11D+1)|RSET_FPR)
#define REGARG_GPRS \
(RID_EDI|((RID_ESI|((RID_EDX|((RID_ECX|((RID_R8D|(RID_R9D \
<<5))<<5))<<5))<<5))<<5))
#define REGARG_FIRSTFPR RID_XMM0
#define REGARG_LASTFPR RID_XMM7
#define STACKARG_OFS 0
#endif
#else
/* Common x86 ABI. */
#define RSET_SCRATCH (RSET_ACD|RSET_FPR)
#define REGARG_GPRS (RID_ECX|(RID_EDX<<5)) /* Fastcall only. */
#define STACKARG_OFS 0
#endif
#if LJ_64
@ -96,23 +109,26 @@ enum {
/* -- Spill slots --------------------------------------------------------- */
/* Available fixed spill slots in interpreter frame.
/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
**
** SPS_FIXED: Available fixed spill slots in interpreter frame.
** This definition must match with the *.dasc file(s).
**
** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots.
*/
#if LJ_64
#ifdef _WIN64
#define SPS_FIXED (5*2)
#define SPS_FIRST (4*2) /* Don't use callee register save area. */
#else
#define SPS_FIXED 2
#define SPS_FIRST 2
#endif
#else
#define SPS_FIXED 6
#define SPS_FIRST 2
#endif
/* First spill slot for general use. Reserve one 64 bit slot. */
#define SPS_FIRST 2
/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. */
#define sps_scale(slot) (4 * (int32_t)(slot))
/* -- Exit state ---------------------------------------------------------- */