From 1c33f46314cc4e3cb52ac83c5b27419bc06b5154 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 11 Sep 2023 16:35:28 +0200 Subject: [PATCH] Windows/ARM64: Support Windows calling conventions. Dear Microsoft: your butchering of the (perfectly fine) ARM64 ABI is a disgrace. Thanks to Peter Cawley. #593 --- src/lj_asm_arm64.h | 17 ++++++++++++++++- src/lj_ccall.c | 18 +++++++++++++++++- src/lj_crecord.c | 6 +----- src/lj_emit_arm64.h | 4 ++-- 4 files changed, 36 insertions(+), 9 deletions(-) diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index 4dd6b711..c2b17737 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -432,6 +432,11 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) as->cost[gpr] = REGCOST(~0u, ASMREF_L); gpr = REGARG_FIRSTGPR; +#if LJ_HASFFI && LJ_ABI_WIN + if ((ci->flags & CCI_VARARG)) { + fpr = REGARG_LASTFPR+1; + } +#endif for (n = 0; n < nargs; n++) { /* Setup args. */ IRRef ref = args[n]; IRIns *ir = IR(ref); @@ -442,6 +447,11 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) "reg %d not free", fpr); /* Must have been evicted. */ ra_leftov(as, fpr, ref); fpr++; +#if LJ_HASFFI && LJ_ABI_WIN + } else if ((ci->flags & CCI_VARARG) && (gpr <= REGARG_LASTGPR)) { + Reg rf = ra_alloc1(as, ref, RSET_FPR); + emit_dn(as, A64I_FMOV_R_D, gpr++, rf & 31); +#endif } else { Reg r = ra_alloc1(as, ref, RSET_FPR); int32_t al = spalign; @@ -1943,6 +1953,9 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) int ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; int spofs = 0, spalign = LJ_TARGET_OSX ? 0 : 7, nslots; asm_collectargs(as, ir, ci, args); +#if LJ_ABI_WIN + if ((ci->flags & CCI_VARARG)) nfpr = 0; +#endif for (i = 0; i < nargs; i++) { int al = spalign; if (!args[i]) { @@ -1954,7 +1967,9 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) #endif } else if (irt_isfp(IR(args[i])->t)) { if (nfpr > 0) { nfpr--; continue; } -#if LJ_TARGET_OSX +#if LJ_ABI_WIN + if ((ci->flags & CCI_VARARG) && ngpr > 0) { ngpr--; continue; } +#elif LJ_TARGET_OSX al |= irt_isnum(IR(args[i])->t) ? 7 : 3; #endif } else { diff --git a/src/lj_ccall.c b/src/lj_ccall.c index 00e753b9..5f95f5d8 100644 --- a/src/lj_ccall.c +++ b/src/lj_ccall.c @@ -985,6 +985,14 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, fid = ctf->sib; } +#if LJ_TARGET_ARM64 && LJ_ABI_WIN + if ((ct->info & CTF_VARARG)) { + nsp -= maxgpr * CTSIZE_PTR; /* May end up with negative nsp. */ + ngpr = maxgpr; + nfpr = CCALL_NARG_FPR; + } +#endif + /* Walk through all passed arguments. */ for (o = L->base+1, narg = 1; o < top; o++, narg++) { CTypeID did; @@ -1035,9 +1043,14 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, align = CTSIZE_PTR-1; nsp = (nsp + align) & ~align; } +#if LJ_TARGET_ARM64 && LJ_ABI_WIN + /* A negative nsp points into cc->gpr. Blame MS for their messy ABI. */ + dp = ((uint8_t *)cc->stack) + (int32_t)nsp; +#else dp = ((uint8_t *)cc->stack) + nsp; +#endif nsp += CCALL_PACK_STACKARG ? sz : n * CTSIZE_PTR; - if (nsp > CCALL_SIZE_STACK) { /* Too many arguments. */ + if ((int32_t)nsp > CCALL_SIZE_STACK) { /* Too many arguments. */ err_nyi: lj_err_caller(L, LJ_ERR_FFI_NYICALL); } @@ -1099,6 +1112,9 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, #endif } if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */ +#if LJ_TARGET_ARM64 && LJ_ABI_WIN + if ((int32_t)nsp < 0) nsp = 0; +#endif #if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) cc->nfpr = nfpr; /* Required for vararg functions. */ diff --git a/src/lj_crecord.c b/src/lj_crecord.c index d7a522fb..55d0b3ef 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c @@ -1118,12 +1118,8 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd, ngpr = 1; else if (ctype_cconv(ct->info) == CTCC_FASTCALL) ngpr = 2; -#elif LJ_TARGET_ARM64 -#if LJ_ABI_WIN -#error "NYI: ARM64 Windows ABI calling conventions" -#elif LJ_TARGET_OSX +#elif LJ_TARGET_ARM64 && LJ_TARGET_OSX int ngpr = CCALL_NARG_GPR; -#endif #endif /* Skip initial attributes. */ diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h index fef5d973..3c510492 100644 --- a/src/lj_emit_arm64.h +++ b/src/lj_emit_arm64.h @@ -124,9 +124,9 @@ static LJ_AINLINE uint32_t emit_lso_pair_candidate(A64Ins ai, int ofs, int sc) } } -static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs) +static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs64) { - int ot = emit_checkofs(ai, ofs), sc = (ai >> 30) & 3; + int ot = emit_checkofs(ai, ofs64), sc = (ai >> 30) & 3, ofs = (int)ofs64; lj_assertA(ot, "load/store offset %d out of range", ofs); /* Combine LDR/STR pairs to LDP/STP. */ if ((sc == 2 || sc == 3) &&