Windows/ARM64: Support Windows calling conventions.

Dear Microsoft: your butchering of the (perfectly fine) ARM64 ABI is a disgrace.
Thanks to Peter Cawley. #593
This commit is contained in:
Mike Pall 2023-09-11 16:35:28 +02:00
parent f63bc569fa
commit 1c33f46314
4 changed files with 36 additions and 9 deletions

View File

@ -432,6 +432,11 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
as->cost[gpr] = REGCOST(~0u, ASMREF_L); as->cost[gpr] = REGCOST(~0u, ASMREF_L);
gpr = REGARG_FIRSTGPR; gpr = REGARG_FIRSTGPR;
#if LJ_HASFFI && LJ_ABI_WIN
if ((ci->flags & CCI_VARARG)) {
fpr = REGARG_LASTFPR+1;
}
#endif
for (n = 0; n < nargs; n++) { /* Setup args. */ for (n = 0; n < nargs; n++) { /* Setup args. */
IRRef ref = args[n]; IRRef ref = args[n];
IRIns *ir = IR(ref); IRIns *ir = IR(ref);
@ -442,6 +447,11 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
"reg %d not free", fpr); /* Must have been evicted. */ "reg %d not free", fpr); /* Must have been evicted. */
ra_leftov(as, fpr, ref); ra_leftov(as, fpr, ref);
fpr++; fpr++;
#if LJ_HASFFI && LJ_ABI_WIN
} else if ((ci->flags & CCI_VARARG) && (gpr <= REGARG_LASTGPR)) {
Reg rf = ra_alloc1(as, ref, RSET_FPR);
emit_dn(as, A64I_FMOV_R_D, gpr++, rf & 31);
#endif
} else { } else {
Reg r = ra_alloc1(as, ref, RSET_FPR); Reg r = ra_alloc1(as, ref, RSET_FPR);
int32_t al = spalign; int32_t al = spalign;
@ -1943,6 +1953,9 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
int ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; int ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
int spofs = 0, spalign = LJ_TARGET_OSX ? 0 : 7, nslots; int spofs = 0, spalign = LJ_TARGET_OSX ? 0 : 7, nslots;
asm_collectargs(as, ir, ci, args); asm_collectargs(as, ir, ci, args);
#if LJ_ABI_WIN
if ((ci->flags & CCI_VARARG)) nfpr = 0;
#endif
for (i = 0; i < nargs; i++) { for (i = 0; i < nargs; i++) {
int al = spalign; int al = spalign;
if (!args[i]) { if (!args[i]) {
@ -1954,7 +1967,9 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
#endif #endif
} else if (irt_isfp(IR(args[i])->t)) { } else if (irt_isfp(IR(args[i])->t)) {
if (nfpr > 0) { nfpr--; continue; } if (nfpr > 0) { nfpr--; continue; }
#if LJ_TARGET_OSX #if LJ_ABI_WIN
if ((ci->flags & CCI_VARARG) && ngpr > 0) { ngpr--; continue; }
#elif LJ_TARGET_OSX
al |= irt_isnum(IR(args[i])->t) ? 7 : 3; al |= irt_isnum(IR(args[i])->t) ? 7 : 3;
#endif #endif
} else { } else {

View File

@ -985,6 +985,14 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
fid = ctf->sib; fid = ctf->sib;
} }
#if LJ_TARGET_ARM64 && LJ_ABI_WIN
if ((ct->info & CTF_VARARG)) {
nsp -= maxgpr * CTSIZE_PTR; /* May end up with negative nsp. */
ngpr = maxgpr;
nfpr = CCALL_NARG_FPR;
}
#endif
/* Walk through all passed arguments. */ /* Walk through all passed arguments. */
for (o = L->base+1, narg = 1; o < top; o++, narg++) { for (o = L->base+1, narg = 1; o < top; o++, narg++) {
CTypeID did; CTypeID did;
@ -1035,9 +1043,14 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
align = CTSIZE_PTR-1; align = CTSIZE_PTR-1;
nsp = (nsp + align) & ~align; nsp = (nsp + align) & ~align;
} }
#if LJ_TARGET_ARM64 && LJ_ABI_WIN
/* A negative nsp points into cc->gpr. Blame MS for their messy ABI. */
dp = ((uint8_t *)cc->stack) + (int32_t)nsp;
#else
dp = ((uint8_t *)cc->stack) + nsp; dp = ((uint8_t *)cc->stack) + nsp;
#endif
nsp += CCALL_PACK_STACKARG ? sz : n * CTSIZE_PTR; nsp += CCALL_PACK_STACKARG ? sz : n * CTSIZE_PTR;
if (nsp > CCALL_SIZE_STACK) { /* Too many arguments. */ if ((int32_t)nsp > CCALL_SIZE_STACK) { /* Too many arguments. */
err_nyi: err_nyi:
lj_err_caller(L, LJ_ERR_FFI_NYICALL); lj_err_caller(L, LJ_ERR_FFI_NYICALL);
} }
@ -1099,6 +1112,9 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
#endif #endif
} }
if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */ if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */
#if LJ_TARGET_ARM64 && LJ_ABI_WIN
if ((int32_t)nsp < 0) nsp = 0;
#endif
#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) #if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP)
cc->nfpr = nfpr; /* Required for vararg functions. */ cc->nfpr = nfpr; /* Required for vararg functions. */

View File

@ -1118,12 +1118,8 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd,
ngpr = 1; ngpr = 1;
else if (ctype_cconv(ct->info) == CTCC_FASTCALL) else if (ctype_cconv(ct->info) == CTCC_FASTCALL)
ngpr = 2; ngpr = 2;
#elif LJ_TARGET_ARM64 #elif LJ_TARGET_ARM64 && LJ_TARGET_OSX
#if LJ_ABI_WIN
#error "NYI: ARM64 Windows ABI calling conventions"
#elif LJ_TARGET_OSX
int ngpr = CCALL_NARG_GPR; int ngpr = CCALL_NARG_GPR;
#endif
#endif #endif
/* Skip initial attributes. */ /* Skip initial attributes. */

View File

@ -124,9 +124,9 @@ static LJ_AINLINE uint32_t emit_lso_pair_candidate(A64Ins ai, int ofs, int sc)
} }
} }
static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs) static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs64)
{ {
int ot = emit_checkofs(ai, ofs), sc = (ai >> 30) & 3; int ot = emit_checkofs(ai, ofs64), sc = (ai >> 30) & 3, ofs = (int)ofs64;
lj_assertA(ot, "load/store offset %d out of range", ofs); lj_assertA(ot, "load/store offset %d out of range", ofs);
/* Combine LDR/STR pairs to LDP/STP. */ /* Combine LDR/STR pairs to LDP/STP. */
if ((sc == 2 || sc == 3) && if ((sc == 2 || sc == 3) &&