FFI: Unify stack setup for C calls in interpreter.

This commit is contained in:
Mike Pall 2023-08-29 02:12:13 +02:00
parent 7cc53f0b85
commit cf903edb30
9 changed files with 63 additions and 54 deletions

View File

@ -20,12 +20,15 @@
#if LJ_TARGET_X86 #if LJ_TARGET_X86
/* -- x86 calling conventions --------------------------------------------- */ /* -- x86 calling conventions --------------------------------------------- */
#define CCALL_PUSH(arg) \
*(GPRArg *)((uint8_t *)cc->stack + nsp) = (GPRArg)(arg), nsp += CTSIZE_PTR
#if LJ_ABI_WIN #if LJ_ABI_WIN
#define CCALL_HANDLE_STRUCTRET \ #define CCALL_HANDLE_STRUCTRET \
/* Return structs bigger than 8 by reference (on stack only). */ \ /* Return structs bigger than 8 by reference (on stack only). */ \
cc->retref = (sz > 8); \ cc->retref = (sz > 8); \
if (cc->retref) cc->stack[nsp++] = (GPRArg)dp; if (cc->retref) CCALL_PUSH(dp);
#define CCALL_HANDLE_COMPLEXRET CCALL_HANDLE_STRUCTRET #define CCALL_HANDLE_COMPLEXRET CCALL_HANDLE_STRUCTRET
@ -40,7 +43,7 @@
if (ngpr < maxgpr) \ if (ngpr < maxgpr) \
cc->gpr[ngpr++] = (GPRArg)dp; \ cc->gpr[ngpr++] = (GPRArg)dp; \
else \ else \
cc->stack[nsp++] = (GPRArg)dp; \ CCALL_PUSH(dp); \
} else { /* Struct with single FP field ends up in FPR. */ \ } else { /* Struct with single FP field ends up in FPR. */ \
cc->resx87 = ccall_classify_struct(cts, ctr); \ cc->resx87 = ccall_classify_struct(cts, ctr); \
} }
@ -56,7 +59,7 @@
if (ngpr < maxgpr) \ if (ngpr < maxgpr) \
cc->gpr[ngpr++] = (GPRArg)dp; \ cc->gpr[ngpr++] = (GPRArg)dp; \
else \ else \
cc->stack[nsp++] = (GPRArg)dp; CCALL_PUSH(dp);
#endif #endif
@ -67,7 +70,7 @@
if (ngpr < maxgpr) \ if (ngpr < maxgpr) \
cc->gpr[ngpr++] = (GPRArg)dp; \ cc->gpr[ngpr++] = (GPRArg)dp; \
else \ else \
cc->stack[nsp++] = (GPRArg)dp; \ CCALL_PUSH(dp); \
} }
#endif #endif
@ -278,8 +281,8 @@
if (ngpr < maxgpr) { \ if (ngpr < maxgpr) { \
dp = &cc->gpr[ngpr]; \ dp = &cc->gpr[ngpr]; \
if (ngpr + n > maxgpr) { \ if (ngpr + n > maxgpr) { \
nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \
if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \
ngpr = maxgpr; \ ngpr = maxgpr; \
} else { \ } else { \
ngpr += n; \ ngpr += n; \
@ -471,8 +474,8 @@
if (ngpr < maxgpr) { \ if (ngpr < maxgpr) { \
dp = &cc->gpr[ngpr]; \ dp = &cc->gpr[ngpr]; \
if (ngpr + n > maxgpr) { \ if (ngpr + n > maxgpr) { \
nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \
if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \
ngpr = maxgpr; \ ngpr = maxgpr; \
} else { \ } else { \
ngpr += n; \ ngpr += n; \
@ -565,8 +568,8 @@
if (ngpr < maxgpr) { \ if (ngpr < maxgpr) { \
dp = &cc->gpr[ngpr]; \ dp = &cc->gpr[ngpr]; \
if (ngpr + n > maxgpr) { \ if (ngpr + n > maxgpr) { \
nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \
if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \
ngpr = maxgpr; \ ngpr = maxgpr; \
} else { \ } else { \
ngpr += n; \ ngpr += n; \
@ -698,10 +701,11 @@ static int ccall_struct_arg(CCallState *cc, CTState *cts, CType *d, int *rcl,
lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg)); lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg));
if (ccall_struct_reg(cc, cts, dp, rcl)) { if (ccall_struct_reg(cc, cts, dp, rcl)) {
/* Register overflow? Pass on stack. */ /* Register overflow? Pass on stack. */
MSize nsp = cc->nsp, n = rcl[1] ? 2 : 1; MSize nsp = cc->nsp, sz = rcl[1] ? 2*CTSIZE_PTR : CTSIZE_PTR;
if (nsp + n > CCALL_MAXSTACK) return 1; /* Too many arguments. */ if (nsp + sz > CCALL_SIZE_STACK)
cc->nsp = nsp + n; return 1; /* Too many arguments. */
memcpy(&cc->stack[nsp], dp, n*CTSIZE_PTR); cc->nsp = nsp + sz;
memcpy((uint8_t *)cc->stack + nsp, dp, sz);
} }
return 0; /* Ok. */ return 0; /* Ok. */
} }
@ -1022,22 +1026,23 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
} else { } else {
sz = CTSIZE_PTR; sz = CTSIZE_PTR;
} }
sz = (sz + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1); n = (sz + CTSIZE_PTR-1) / CTSIZE_PTR; /* Number of GPRs or stack slots needed. */
n = sz / CTSIZE_PTR; /* Number of GPRs or stack slots needed. */
CCALL_HANDLE_REGARG /* Handle register arguments. */ CCALL_HANDLE_REGARG /* Handle register arguments. */
/* Otherwise pass argument on stack. */ /* Otherwise pass argument on stack. */
if (CCALL_ALIGN_STACKARG && !rp && (d->info & CTF_ALIGN) > CTALIGN_PTR) { if (CCALL_ALIGN_STACKARG) { /* Align argument on stack. */
MSize align = (1u << ctype_align(d->info-CTALIGN_PTR)) -1; MSize align = (1u << ctype_align(d->info)) - 1;
nsp = (nsp + align) & ~align; /* Align argument on stack. */ if (rp)
align = CTSIZE_PTR-1;
nsp = (nsp + align) & ~align;
} }
if (nsp + n > CCALL_MAXSTACK) { /* Too many arguments. */ dp = ((uint8_t *)cc->stack) + nsp;
nsp += n * CTSIZE_PTR;
if (nsp > CCALL_SIZE_STACK) { /* Too many arguments. */
err_nyi: err_nyi:
lj_err_caller(L, LJ_ERR_FFI_NYICALL); lj_err_caller(L, LJ_ERR_FFI_NYICALL);
} }
dp = &cc->stack[nsp];
nsp += n;
isva = 0; isva = 0;
done: done:
@ -1099,10 +1104,10 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) #if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP)
cc->nfpr = nfpr; /* Required for vararg functions. */ cc->nfpr = nfpr; /* Required for vararg functions. */
#endif #endif
cc->nsp = nsp; cc->nsp = (nsp + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1);
cc->spadj = (CCALL_SPS_FREE + CCALL_SPS_EXTRA)*CTSIZE_PTR; cc->spadj = (CCALL_SPS_FREE + CCALL_SPS_EXTRA) * CTSIZE_PTR;
if (nsp > CCALL_SPS_FREE) if (cc->nsp > CCALL_SPS_FREE * CTSIZE_PTR)
cc->spadj += (((nsp-CCALL_SPS_FREE)*CTSIZE_PTR + 15u) & ~15u); cc->spadj += (((cc->nsp - CCALL_SPS_FREE * CTSIZE_PTR) + 15u) & ~15u);
return gcsteps; return gcsteps;
} }

View File

@ -152,14 +152,15 @@ typedef union FPRArg {
LJ_STATIC_ASSERT(CCALL_NUM_GPR <= CCALL_MAX_GPR); LJ_STATIC_ASSERT(CCALL_NUM_GPR <= CCALL_MAX_GPR);
LJ_STATIC_ASSERT(CCALL_NUM_FPR <= CCALL_MAX_FPR); LJ_STATIC_ASSERT(CCALL_NUM_FPR <= CCALL_MAX_FPR);
#define CCALL_MAXSTACK 32 #define CCALL_NUM_STACK 31
#define CCALL_SIZE_STACK (CCALL_NUM_STACK * CTSIZE_PTR)
/* -- C call state -------------------------------------------------------- */ /* -- C call state -------------------------------------------------------- */
typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState { typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
void (*func)(void); /* Pointer to called function. */ void (*func)(void); /* Pointer to called function. */
uint32_t spadj; /* Stack pointer adjustment. */ uint32_t spadj; /* Stack pointer adjustment. */
uint8_t nsp; /* Number of stack slots. */ uint8_t nsp; /* Number of bytes on stack. */
uint8_t retref; /* Return value by reference. */ uint8_t retref; /* Return value by reference. */
#if LJ_TARGET_X64 #if LJ_TARGET_X64
uint8_t ngpr; /* Number of arguments in GPRs. */ uint8_t ngpr; /* Number of arguments in GPRs. */
@ -178,7 +179,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
FPRArg fpr[CCALL_NUM_FPR]; /* Arguments/results in FPRs. */ FPRArg fpr[CCALL_NUM_FPR]; /* Arguments/results in FPRs. */
#endif #endif
GPRArg gpr[CCALL_NUM_GPR]; /* Arguments/results in GPRs. */ GPRArg gpr[CCALL_NUM_GPR]; /* Arguments/results in GPRs. */
GPRArg stack[CCALL_MAXSTACK]; /* Stack slots. */ GPRArg stack[CCALL_NUM_STACK]; /* Stack slots. */
} CCallState; } CCallState;
/* -- C call handling ----------------------------------------------------- */ /* -- C call handling ----------------------------------------------------- */

View File

@ -2571,16 +2571,16 @@ static void build_subroutines(BuildCtx *ctx)
|.endif |.endif
| mov r11, sp | mov r11, sp
| sub sp, sp, CARG1 // Readjust stack. | sub sp, sp, CARG1 // Readjust stack.
| subs CARG2, CARG2, #1 | subs CARG2, CARG2, #4
|.if HFABI |.if HFABI
| vldm RB, {d0-d7} | vldm RB, {d0-d7}
|.endif |.endif
| ldr RB, CCSTATE->func | ldr RB, CCSTATE->func
| bmi >2 | bmi >2
|1: // Copy stack slots. |1: // Copy stack slots.
| ldr CARG4, [CARG3, CARG2, lsl #2] | ldr CARG4, [CARG3, CARG2]
| str CARG4, [sp, CARG2, lsl #2] | str CARG4, [sp, CARG2]
| subs CARG2, CARG2, #1 | subs CARG2, CARG2, #4
| bpl <1 | bpl <1
|2: |2:
| ldrd CARG12, CCSTATE->gpr[0] | ldrd CARG12, CCSTATE->gpr[0]

View File

@ -2222,14 +2222,14 @@ static void build_subroutines(BuildCtx *ctx)
| ldr TMP0w, CCSTATE:x0->spadj | ldr TMP0w, CCSTATE:x0->spadj
| ldrb TMP1w, CCSTATE->nsp | ldrb TMP1w, CCSTATE->nsp
| add TMP2, CCSTATE, #offsetof(CCallState, stack) | add TMP2, CCSTATE, #offsetof(CCallState, stack)
| subs TMP1, TMP1, #1 | subs TMP1, TMP1, #8
| ldr TMP3, CCSTATE->func | ldr TMP3, CCSTATE->func
| sub sp, sp, TMP0 | sub sp, sp, TMP0
| bmi >2 | bmi >2
|1: // Copy stack slots |1: // Copy stack slots
| ldr TMP0, [TMP2, TMP1, lsl #3] | ldr TMP0, [TMP2, TMP1]
| str TMP0, [sp, TMP1, lsl #3] | str TMP0, [sp, TMP1]
| subs TMP1, TMP1, #1 | subs TMP1, TMP1, #8
| bpl <1 | bpl <1
|2: |2:
| ldp x0, x1, CCSTATE->gpr[0] | ldp x0, x1, CCSTATE->gpr[0]

View File

@ -2951,7 +2951,6 @@ static void build_subroutines(BuildCtx *ctx)
| move TMP2, sp | move TMP2, sp
| subu sp, sp, TMP1 | subu sp, sp, TMP1
| sw ra, -4(TMP2) | sw ra, -4(TMP2)
| sll CARG2, CARG2, 2
| sw r16, -8(TMP2) | sw r16, -8(TMP2)
| sw CCSTATE, -12(TMP2) | sw CCSTATE, -12(TMP2)
| move r16, TMP2 | move r16, TMP2

View File

@ -3065,7 +3065,6 @@ static void build_subroutines(BuildCtx *ctx)
| move TMP2, sp | move TMP2, sp
| dsubu sp, sp, TMP1 | dsubu sp, sp, TMP1
| sd ra, -8(TMP2) | sd ra, -8(TMP2)
| sll CARG2, CARG2, 3
| sd r16, -16(TMP2) | sd r16, -16(TMP2)
| sd CCSTATE, -24(TMP2) | sd CCSTATE, -24(TMP2)
| move r16, TMP2 | move r16, TMP2

View File

@ -3269,14 +3269,13 @@ static void build_subroutines(BuildCtx *ctx)
| stw TMP0, 4(sp) | stw TMP0, 4(sp)
| cmpwi cr1, CARG3, 0 | cmpwi cr1, CARG3, 0
| mr TMP2, sp | mr TMP2, sp
| addic. CARG2, CARG2, -1 | addic. CARG2, CARG2, -4
| stwux sp, sp, TMP1 | stwux sp, sp, TMP1
| crnot 4*cr1+eq, 4*cr1+eq // For vararg calls. | crnot 4*cr1+eq, 4*cr1+eq // For vararg calls.
| stw r14, -4(TMP2) | stw r14, -4(TMP2)
| stw CCSTATE, -8(TMP2) | stw CCSTATE, -8(TMP2)
| mr r14, TMP2 | mr r14, TMP2
| la TMP1, CCSTATE->stack | la TMP1, CCSTATE->stack
| slwi CARG2, CARG2, 2
| blty >2 | blty >2
| la TMP2, 8(sp) | la TMP2, 8(sp)
|1: |1:

View File

@ -2755,12 +2755,12 @@ static void build_subroutines(BuildCtx *ctx)
| |
| // Copy stack slots. | // Copy stack slots.
| movzx ecx, byte CCSTATE->nsp | movzx ecx, byte CCSTATE->nsp
| sub ecx, 1 | sub ecx, 8
| js >2 | js >2
|1: |1:
| mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)] | mov rax, [CCSTATE+rcx+offsetof(CCallState, stack)]
| mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax | mov [rsp+rcx+CCALL_SPS_EXTRA*8], rax
| sub ecx, 1 | sub ecx, 8
| jns <1 | jns <1
|2: |2:
| |

View File

@ -3314,19 +3314,25 @@ static void build_subroutines(BuildCtx *ctx)
| |
| // Copy stack slots. | // Copy stack slots.
| movzx ecx, byte CCSTATE->nsp | movzx ecx, byte CCSTATE->nsp
| sub ecx, 1 |.if X64
| sub ecx, 8
| js >2 | js >2
|1: |1:
|.if X64 | mov rax, [CCSTATE+rcx+offsetof(CCallState, stack)]
| mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)] | mov [rsp+rcx+CCALL_SPS_EXTRA*8], rax
| mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax | sub ecx, 8
|.else
| mov eax, [CCSTATE+ecx*4+offsetof(CCallState, stack)]
| mov [esp+ecx*4], eax
|.endif
| sub ecx, 1
| jns <1 | jns <1
|2: |2:
|.else
| sub ecx, 4
| js >2
|1:
| mov eax, [CCSTATE+ecx+offsetof(CCallState, stack)]
| mov [esp+ecx], eax
| sub ecx, 4
| jns <1
|2:
|.endif
| |
|.if X64 |.if X64
| movzx eax, byte CCSTATE->nfpr | movzx eax, byte CCSTATE->nfpr