From 83954100dba9fc0cf5eeaf122f007df35ec9a604 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Tue, 29 Aug 2023 02:21:51 +0200 Subject: [PATCH] FFI/ARM64/OSX: Handle non-standard OSX C calling conventions. Contributed by Peter Cawley. #205 --- src/lj_asm_arm64.h | 75 ++++++++++++++++++++++++++++++++++++---------- src/lj_ccall.c | 11 ++++--- src/lj_ccall.h | 6 ++++ src/lj_crecord.c | 27 +++++++++++++++++ 4 files changed, 98 insertions(+), 21 deletions(-) diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index 1d5cca4f..3889883d 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -419,7 +419,7 @@ static int asm_fuseorshift(ASMState *as, IRIns *ir) static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) { uint32_t n, nargs = CCI_XNARGS(ci); - int32_t ofs = 0; + int32_t spofs = 0, spalign = LJ_HASFFI && LJ_TARGET_OSX ? 0 : 7; Reg gpr, fpr = REGARG_FIRSTFPR; if (ci->func) emit_call(as, ci->func); @@ -438,8 +438,14 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) fpr++; } else { Reg r = ra_alloc1(as, ref, RSET_FPR); - emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isnum(ir->t)) ? 4 : 0)); - ofs += 8; + int32_t al = spalign; +#if LJ_HASFFI && LJ_TARGET_OSX + al |= irt_isnum(ir->t) ? 7 : 3; +#endif + spofs = (spofs + al) & ~al; + if (LJ_BE && al >= 7 && !irt_isnum(ir->t)) spofs += 4, al -= 4; + emit_spstore(as, ir, r, spofs); + spofs += al + 1; } } else { if (gpr <= REGARG_LASTGPR) { @@ -449,10 +455,27 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) gpr++; } else { Reg r = ra_alloc1(as, ref, RSET_GPR); - emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_is64(ir->t)) ? 4 : 0)); - ofs += 8; + int32_t al = spalign; +#if LJ_HASFFI && LJ_TARGET_OSX + al |= irt_size(ir->t) - 1; +#endif + spofs = (spofs + al) & ~al; + if (al >= 3) { + if (LJ_BE && al >= 7 && !irt_is64(ir->t)) spofs += 4, al -= 4; + emit_spstore(as, ir, r, spofs); + } else { + lj_assertA(al == 0 || al == 1, "size %d unexpected", al + 1); + emit_lso(as, al ? A64I_STRH : A64I_STRB, r, RID_SP, spofs); + } + spofs += al + 1; } } +#if LJ_HASFFI && LJ_TARGET_OSX + } else { /* Marker for start of varargs. */ + gpr = REGARG_LASTGPR+1; + fpr = REGARG_LASTFPR+1; + spalign = 7; +#endif } } } @@ -1976,19 +1999,41 @@ static void asm_tail_prep(ASMState *as) /* Ensure there are enough stack slots for call arguments. */ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) { - IRRef args[CCI_NARGS_MAX*2]; +#if LJ_HASFFI uint32_t i, nargs = CCI_XNARGS(ci); - int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; - asm_collectargs(as, ir, ci, args); - for (i = 0; i < nargs; i++) { - if (args[i] && irt_isfp(IR(args[i])->t)) { - if (nfpr > 0) nfpr--; else nslots += 2; - } else { - if (ngpr > 0) ngpr--; else nslots += 2; + if (nargs > (REGARG_NUMGPR < REGARG_NUMFPR ? REGARG_NUMGPR : REGARG_NUMFPR) || + (LJ_TARGET_OSX && (ci->flags & CCI_VARARG))) { + IRRef args[CCI_NARGS_MAX*2]; + int ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; + int spofs = 0, spalign = LJ_TARGET_OSX ? 0 : 7, nslots; + asm_collectargs(as, ir, ci, args); + for (i = 0; i < nargs; i++) { + int al = spalign; + if (!args[i]) { +#if LJ_TARGET_OSX + /* Marker for start of varaargs. */ + nfpr = 0; + ngpr = 0; + spalign = 7; +#endif + } else if (irt_isfp(IR(args[i])->t)) { + if (nfpr > 0) { nfpr--; continue; } +#if LJ_TARGET_OSX + al |= irt_isnum(IR(args[i])->t) ? 7 : 3; +#endif + } else { + if (ngpr > 0) { ngpr--; continue; } +#if LJ_TARGET_OSX + al |= irt_size(IR(args[i])->t) - 1; +#endif + } + spofs = (spofs + 2*al+1) & ~al; /* Align and bump stack pointer. */ } + nslots = (spofs + 3) >> 2; + if (nslots > as->evenspill) /* Leave room for args in stack slots. */ + as->evenspill = nslots; } - if (nslots > as->evenspill) /* Leave room for args in stack slots. */ - as->evenspill = nslots; +#endif return REGSP_HINT(RID_RET); } diff --git a/src/lj_ccall.c b/src/lj_ccall.c index 9001cb5a..00e753b9 100644 --- a/src/lj_ccall.c +++ b/src/lj_ccall.c @@ -348,7 +348,6 @@ goto done; \ } else { \ nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \ - if (LJ_TARGET_OSX && d->size < 8) goto err_nyi; \ } \ } else { /* Try to pass argument in GPRs. */ \ if (!LJ_TARGET_OSX && (d->info & CTF_ALIGN) > CTALIGN_PTR) \ @@ -359,7 +358,6 @@ goto done; \ } else { \ ngpr = maxgpr; /* Prevent reordering. */ \ - if (LJ_TARGET_OSX && d->size < 8) goto err_nyi; \ } \ } @@ -1023,7 +1021,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, CCALL_HANDLE_STRUCTARG } else if (ctype_iscomplex(d->info)) { CCALL_HANDLE_COMPLEXARG - } else { + } else if (!(CCALL_PACK_STACKARG && ctype_isenum(d->info))) { sz = CTSIZE_PTR; } n = (sz + CTSIZE_PTR-1) / CTSIZE_PTR; /* Number of GPRs or stack slots needed. */ @@ -1033,12 +1031,12 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, /* Otherwise pass argument on stack. */ if (CCALL_ALIGN_STACKARG) { /* Align argument on stack. */ MSize align = (1u << ctype_align(d->info)) - 1; - if (rp) + if (rp || (CCALL_PACK_STACKARG && isva && align < CTSIZE_PTR-1)) align = CTSIZE_PTR-1; nsp = (nsp + align) & ~align; } dp = ((uint8_t *)cc->stack) + nsp; - nsp += n * CTSIZE_PTR; + nsp += CCALL_PACK_STACKARG ? sz : n * CTSIZE_PTR; if (nsp > CCALL_SIZE_STACK) { /* Too many arguments. */ err_nyi: lj_err_caller(L, LJ_ERR_FFI_NYICALL); @@ -1053,7 +1051,8 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, } lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg)); /* Extend passed integers to 32 bits at least. */ - if (ctype_isinteger_or_bool(d->info) && d->size < 4) { + if (ctype_isinteger_or_bool(d->info) && d->size < 4 && + (!CCALL_PACK_STACKARG || !((uintptr_t)dp & 3))) { /* Assumes LJ_LE. */ if (d->info & CTF_UNSIGNED) *(uint32_t *)dp = d->size == 1 ? (uint32_t)*(uint8_t *)dp : (uint32_t)*(uint16_t *)dp; diff --git a/src/lj_ccall.h b/src/lj_ccall.h index 57300817..24646d90 100644 --- a/src/lj_ccall.h +++ b/src/lj_ccall.h @@ -75,6 +75,9 @@ typedef union FPRArg { #define CCALL_NARG_FPR 8 #define CCALL_NRET_FPR 4 #define CCALL_SPS_FREE 0 +#if LJ_TARGET_OSX +#define CCALL_PACK_STACKARG 1 +#endif typedef intptr_t GPRArg; typedef union FPRArg { @@ -139,6 +142,9 @@ typedef union FPRArg { #ifndef CCALL_ALIGN_STACKARG #define CCALL_ALIGN_STACKARG 1 #endif +#ifndef CCALL_PACK_STACKARG +#define CCALL_PACK_STACKARG 0 +#endif #ifndef CCALL_ALIGN_CALLSTATE #define CCALL_ALIGN_CALLSTATE 8 #endif diff --git a/src/lj_crecord.c b/src/lj_crecord.c index 04bc895d..d7a522fb 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c @@ -1118,6 +1118,12 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd, ngpr = 1; else if (ctype_cconv(ct->info) == CTCC_FASTCALL) ngpr = 2; +#elif LJ_TARGET_ARM64 +#if LJ_ABI_WIN +#error "NYI: ARM64 Windows ABI calling conventions" +#elif LJ_TARGET_OSX + int ngpr = CCALL_NARG_GPR; +#endif #endif /* Skip initial attributes. */ @@ -1143,6 +1149,14 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd, } else { if (!(ct->info & CTF_VARARG)) lj_trace_err(J, LJ_TRERR_NYICALL); /* Too many arguments. */ +#if LJ_TARGET_ARM64 && LJ_TARGET_OSX + if (ngpr >= 0) { + ngpr = -1; + args[n++] = TREF_NIL; /* Marker for start of varargs. */ + if (n >= CCI_NARGS_MAX) + lj_trace_err(J, LJ_TRERR_NYICALL); + } +#endif did = lj_ccall_ctid_vararg(cts, o); /* Infer vararg type. */ } d = ctype_raw(cts, did); @@ -1151,6 +1165,15 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd, lj_trace_err(J, LJ_TRERR_NYICALL); tr = crec_ct_tv(J, d, 0, *base, o); if (ctype_isinteger_or_bool(d->info)) { +#if LJ_TARGET_ARM64 && LJ_TARGET_OSX + if (!ngpr) { + /* Fixed args passed on the stack use their unpromoted size. */ + if (d->size != lj_ir_type_size[tref_type(tr)]) { + lj_assertJ(d->size == 1 || d->size==2, "unexpected size %d", d->size); + tr = emitconv(tr, d->size==1 ? IRT_U8 : IRT_U16, tref_type(tr), 0); + } + } else +#endif if (d->size < 4) { if ((d->info & CTF_UNSIGNED)) tr = emitconv(tr, IRT_INT, d->size==1 ? IRT_U8 : IRT_U16, 0); @@ -1188,6 +1211,10 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd, } } #endif +#elif LJ_TARGET_ARM64 && LJ_TARGET_OSX + if (!ctype_isfp(d->info) && ngpr) { + ngpr--; + } #endif args[n] = tr; }