From 4b0af611d05fc15ccc57a050dead055546a3f485 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 30 Jul 2012 19:00:52 +0200 Subject: [PATCH] ARM: Add hard-float ABI support to the FFI (interpreter). --- src/lj_ccall.c | 116 +++++++++++++++++++++++++++++++++++++++++++-- src/lj_ccall.h | 15 ++++-- src/lj_ccallback.c | 42 ++++++++++++++-- src/lj_ctype.h | 2 +- src/vm_arm.dasc | 37 ++++++++++++--- 5 files changed, 193 insertions(+), 19 deletions(-) diff --git a/src/lj_ccall.c b/src/lj_ccall.c index c3eb25f6..71331f39 100644 --- a/src/lj_ccall.c +++ b/src/lj_ccall.c @@ -168,6 +168,8 @@ #elif LJ_TARGET_ARM /* -- ARM calling conventions --------------------------------------------- */ +#if LJ_ABI_SOFTFP + #define CCALL_HANDLE_STRUCTRET \ /* Return structs of size <= 4 in a GPR. */ \ cc->retref = !(sz <= 4); \ @@ -186,13 +188,70 @@ #define CCALL_HANDLE_COMPLEXARG \ /* Pass complex by value in 2 or 4 GPRs. */ -/* ARM has a softfp ABI. */ +#define CCALL_HANDLE_REGARG_FP1 +#define CCALL_HANDLE_REGARG_FP2 + +#else + +#define CCALL_HANDLE_STRUCTRET \ + cc->retref = !ccall_classify_struct(cts, ctr, ct); \ + if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp; + +#define CCALL_HANDLE_STRUCTRET2 \ + if (ccall_classify_struct(cts, ctr, ct) > 1) sp = (uint8_t *)&cc->fpr[0]; \ + memcpy(dp, sp, ctr->size); + +#define CCALL_HANDLE_COMPLEXRET \ + if (!(ct->info & CTF_VARARG)) cc->retref = 0; /* Return complex in FPRs. */ + +#define CCALL_HANDLE_COMPLEXRET2 \ + if (!(ct->info & CTF_VARARG)) memcpy(dp, &cc->fpr[0], ctr->size); + +#define CCALL_HANDLE_STRUCTARG \ + isfp = (ccall_classify_struct(cts, d, ct) > 1); + /* Pass all structs by value in registers and/or on the stack. */ + +#define CCALL_HANDLE_COMPLEXARG \ + isfp = 1; /* Pass complex by value in FPRs or on stack. */ + +#define CCALL_HANDLE_REGARG_FP1 \ + if (isfp && !(ct->info & CTF_VARARG)) { \ + if ((d->info & CTF_ALIGN) > CTALIGN_PTR) { \ + if (nfpr + (n >> 1) <= CCALL_NARG_FPR) { \ + dp = &cc->fpr[nfpr]; \ + nfpr += (n >> 1); \ + goto done; \ + } \ + } else { \ + if (sz > 1 && fprodd != nfpr) fprodd = 0; \ + if (fprodd) { \ + if (2*nfpr+n <= 2*CCALL_NARG_FPR+1) { \ + dp = (void *)&cc->fpr[fprodd-1].f[1]; \ + nfpr += (n >> 1); \ + if ((n & 1)) fprodd = 0; else fprodd = nfpr-1; \ + goto done; \ + } \ + } else { \ + if (2*nfpr+n <= 2*CCALL_NARG_FPR) { \ + dp = (void *)&cc->fpr[nfpr]; \ + nfpr += (n >> 1); \ + if ((n & 1)) fprodd = ++nfpr; else fprodd = 0; \ + goto done; \ + } \ + } \ + } \ + fprodd = 0; /* No reordering after the first FP value is on stack. */ \ + } else { + +#define CCALL_HANDLE_REGARG_FP2 } + +#endif + #define CCALL_HANDLE_REGARG \ + CCALL_HANDLE_REGARG_FP1 \ if ((d->info & CTF_ALIGN) > CTALIGN_PTR) { \ if (ngpr < maxgpr) \ ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ - else \ - nsp = (nsp + 1u) & ~1u; /* Align argument on stack. */ \ } \ if (ngpr < maxgpr) { \ dp = &cc->gpr[ngpr]; \ @@ -204,7 +263,10 @@ ngpr += n; \ } \ goto done; \ - } + } CCALL_HANDLE_REGARG_FP2 + +#define CCALL_HANDLE_RET \ + if ((ct->info & CTF_VARARG)) sp = (uint8_t *)&cc->gpr[0]; #elif LJ_TARGET_PPC /* -- PPC calling conventions --------------------------------------------- */ @@ -453,6 +515,49 @@ static void ccall_struct_ret(CCallState *cc, int *rcl, uint8_t *dp, CTSize sz) } #endif +/* -- ARM hard-float ABI struct classification ---------------------------- */ + +#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP + +/* Classify a struct based on its fields. */ +static unsigned int ccall_classify_struct(CTState *cts, CType *ct, CType *ctf) +{ + CTSize sz = ct->size; + unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION); + if ((ctf->info & CTF_VARARG)) goto noth; + while (ct->sib) { + ct = ctype_get(cts, ct->sib); + if (ctype_isfield(ct->info)) { + CType *sct = ctype_rawchild(cts, ct); + if (ctype_isfp(sct->info)) { + r |= sct->size; + if (!isu) n++; else if (n == 0) n = 1; + } else if (ctype_iscomplex(sct->info)) { + r |= (sct->size >> 1); + if (!isu) n += 2; else if (n < 2) n = 2; + } else { + goto noth; + } + } else if (ctype_isbitfield(ct->info)) { + goto noth; + } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) { + CType *sct = ctype_child(cts, ct); + if (sct->size > 0) { + unsigned int s = ccall_classify_struct(cts, sct, ctf); + if (s <= 1) goto noth; + r |= (s & 255); + if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8); + } + } + } + if ((r == 4 || r == 8) && n <= 4) + return r + (n << 8); +noth: /* Not a homogeneous float/double aggregate. */ + return (sz <= 4); /* Return structs of size <= 4 in a GPR. */ +} + +#endif + /* -- Common C call handling ---------------------------------------------- */ /* Infer the destination CTypeID for a vararg argument. */ @@ -494,6 +599,9 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, MSize maxgpr, ngpr = 0, nsp = 0, narg; #if CCALL_NARG_FPR MSize nfpr = 0; +#if LJ_TARGET_ARM + MSize fprodd = 0; +#endif #endif /* Clear unused regs to get some determinism in case of misdeclaration. */ diff --git a/src/lj_ccall.h b/src/lj_ccall.h index 5985c4a9..62f963e1 100644 --- a/src/lj_ccall.h +++ b/src/lj_ccall.h @@ -51,12 +51,21 @@ typedef intptr_t GPRArg; #elif LJ_TARGET_ARM #define CCALL_NARG_GPR 4 -#define CCALL_NARG_FPR 0 #define CCALL_NRET_GPR 2 /* For softfp double. */ +#if LJ_ABI_SOFTFP +#define CCALL_NARG_FPR 0 #define CCALL_NRET_FPR 0 +#else +#define CCALL_NARG_FPR 8 +#define CCALL_NRET_FPR 4 +#endif #define CCALL_SPS_FREE 0 typedef intptr_t GPRArg; +typedef union FPRArg { + double d; + float f[2]; +} FPRArg; #elif LJ_TARGET_PPC @@ -122,7 +131,7 @@ LJ_STATIC_ASSERT(CCALL_NUM_FPR <= CCALL_MAX_FPR); /* -- C call state -------------------------------------------------------- */ -typedef struct CCallState { +typedef LJ_ALIGN(8) struct CCallState { void (*func)(void); /* Pointer to called function. */ uint32_t spadj; /* Stack pointer adjustment. */ uint8_t nsp; /* Number of stack slots. */ @@ -135,10 +144,10 @@ typedef struct CCallState { #elif LJ_TARGET_PPC uint8_t nfpr; /* Number of arguments in FPRs. */ #endif -#if CCALL_NUM_FPR #if LJ_32 int32_t align1; #endif +#if CCALL_NUM_FPR FPRArg fpr[CCALL_NUM_FPR]; /* Arguments/results in FPRs. */ #endif GPRArg gpr[CCALL_NUM_GPR]; /* Arguments/results in GPRs. */ diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c index a9567bc5..430643ee 100644 --- a/src/lj_ccallback.c +++ b/src/lj_ccallback.c @@ -310,22 +310,53 @@ void lj_ccallback_mcode_free(CTState *cts) #elif LJ_TARGET_ARM +#if LJ_ABI_SOFTFP + +#define CALLBACK_HANDLE_REGARG_FP1 UNUSED(isfp); +#define CALLBACK_HANDLE_REGARG_FP2 + +#else + +#define CALLBACK_HANDLE_REGARG_FP1 \ + if (isfp) { \ + if (n == 1) { \ + if (fprodd) { \ + sp = &cts->cb.fpr[fprodd-1]; \ + fprodd = 0; \ + goto done; \ + } else if (nfpr + 1 <= CCALL_NARG_FPR) { \ + sp = &cts->cb.fpr[nfpr++]; \ + fprodd = nfpr; \ + goto done; \ + } \ + } else { \ + if (nfpr + 1 <= CCALL_NARG_FPR) { \ + sp = &cts->cb.fpr[nfpr++]; \ + goto done; \ + } \ + } \ + fprodd = 0; /* No reordering after the first FP value is on stack. */ \ + } else { + +#define CALLBACK_HANDLE_REGARG_FP2 } + +#endif + #define CALLBACK_HANDLE_REGARG \ - UNUSED(isfp); \ + CALLBACK_HANDLE_REGARG_FP1 \ if (n > 1) ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ if (ngpr + n <= maxgpr) { \ sp = &cts->cb.gpr[ngpr]; \ ngpr += n; \ goto done; \ - } + } CALLBACK_HANDLE_REGARG_FP2 #elif LJ_TARGET_PPC #define CALLBACK_HANDLE_REGARG \ if (isfp) { \ if (nfpr + 1 <= CCALL_NARG_FPR) { \ - sp = &cts->cb.fpr[nfpr]; \ - nfpr += 1; \ + sp = &cts->cb.fpr[nfpr++]; \ cta = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \ goto done; \ } \ @@ -382,6 +413,9 @@ static void callback_conv_args(CTState *cts, lua_State *L) MSize ngpr = 0, nsp = 0, maxgpr = CCALL_NARG_GPR; #if CCALL_NARG_FPR MSize nfpr = 0; +#if LJ_TARGET_ARM + MSize fprodd = 0; +#endif #endif if (slot < cts->cb.sizeid && (id = cts->cb.cbid[slot]) != 0) { diff --git a/src/lj_ctype.h b/src/lj_ctype.h index 7953654f..7c3b667c 100644 --- a/src/lj_ctype.h +++ b/src/lj_ctype.h @@ -155,7 +155,7 @@ typedef struct CType { #define CCALL_MAX_GPR 8 #define CCALL_MAX_FPR 8 -typedef LJ_ALIGN(8) union FPRCBArg { double d; float f; } FPRCBArg; +typedef LJ_ALIGN(8) union FPRCBArg { double d; float f[2]; } FPRCBArg; /* C callback state. Defined here, to avoid dragging in lj_ccall.h. */ diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc index 26f97aa3..355a53e6 100644 --- a/src/vm_arm.dasc +++ b/src/vm_arm.dasc @@ -2414,8 +2414,19 @@ static void build_subroutines(BuildCtx *ctx) |.type CTSTATE, CTState, PC | ldr CTSTATE, GL:r12->ctype_state | add DISPATCH, r12, #GG_G2DISP - | strd CARG12, CTSTATE->cb.gpr[0] + |.if FPU + | str r4, SAVE_R4 + | add r4, sp, CFRAME_SPACE+4+8*8 + | vstmdb r4!, {d8-d15} + |.endif + |.if HFABI + | add r12, CTSTATE, #offsetof(CTState, cb.fpr[8]) + |.endif | strd CARG34, CTSTATE->cb.gpr[2] + | strd CARG12, CTSTATE->cb.gpr[0] + |.if HFABI + | vstmdb r12!, {d0-d7} + |.endif | ldr CARG4, [sp] | add CARG3, sp, #CFRAME_SIZE | mov CARG1, CTSTATE @@ -2448,6 +2459,9 @@ static void build_subroutines(BuildCtx *ctx) | mov CARG2, RA | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) | ldrd CARG12, CTSTATE->cb.gpr[0] + |.if HFABI + | vldr d0, CTSTATE->cb.fpr[0] + |.endif | b ->vm_leave_unw |.endif | @@ -2460,9 +2474,15 @@ static void build_subroutines(BuildCtx *ctx) | ldr CARG1, CCSTATE:CARG1->spadj | ldrb CARG2, CCSTATE->nsp | add CARG3, CCSTATE, #offsetof(CCallState, stack) + |.if HFABI + | add RB, CCSTATE, #offsetof(CCallState, fpr[0]) + |.endif | mov r11, sp | sub sp, sp, CARG1 // Readjust stack. | subs CARG2, CARG2, #1 + |.if HFABI + | vldm RB, {d0-d7} + |.endif | ldr RB, CCSTATE->func | bmi >2 |1: // Copy stack slots. @@ -2471,14 +2491,17 @@ static void build_subroutines(BuildCtx *ctx) | subs CARG2, CARG2, #1 | bpl <1 |2: - | ldr CARG1, CCSTATE->gpr[0] - | ldr CARG2, CCSTATE->gpr[1] - | ldr CARG3, CCSTATE->gpr[2] - | ldr CARG4, CCSTATE->gpr[3] + | ldrd CARG12, CCSTATE->gpr[0] + | ldrd CARG34, CCSTATE->gpr[2] | blx RB | mov sp, r11 - | str CRET1, CCSTATE->gpr[0] - | str CRET2, CCSTATE->gpr[1] + |.if HFABI + | add r12, CCSTATE, #offsetof(CCallState, fpr[4]) + |.endif + | strd CRET1, CCSTATE->gpr[0] + |.if HFABI + | vstmdb r12!, {d0-d3} + |.endif | pop {CCSTATE, r5, r11, pc} |.endif |// Note: vm_ffi_call must be the last function in this object file!