diff --git a/src/lj_ccall.c b/src/lj_ccall.c index 25f54dee..ff15005d 100644 --- a/src/lj_ccall.c +++ b/src/lj_ccall.c @@ -574,6 +574,95 @@ goto done; \ } +#elif LJ_TARGET_LOONGARCH64 +/* -- LoongArch lp64 calling conventions ---------------------------------------- */ + +#define CCALL_HANDLE_STRUCTRET \ + /* Return structs of size > 16 by reference. */ \ + cc->retref = !(sz <= 16); \ + if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp; + +#define CCALL_HANDLE_STRUCTRET2 \ + unsigned int cl = ccall_classify_struct(cts, ctr); \ + if ((cl & 4) && (cl >> 8) <= 2) { \ + CTSize i = (cl >> 8) - 1; \ + do { ((float *)dp)[i] = cc->fpr[i].f; } while (i--); \ + } else { \ + if (cl > 1) { \ + sp = (uint8_t *)&cc->fpr[0]; \ + if ((cl >> 8) > 2) \ + sp = (uint8_t *)&cc->gpr[0]; \ + } \ + memcpy(dp, sp, ctr->size); \ + } \ + +#define CCALL_HANDLE_COMPLEXRET \ + /* Complex values are returned in 1 or 2 FPRs. */ \ + cc->retref = 0; + +#define CCALL_HANDLE_COMPLEXRET2 \ + if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ + ((float *)dp)[0] = cc->fpr[0].f; \ + ((float *)dp)[1] = cc->fpr[1].f; \ + } else { /* Copy complex double from FPRs. */ \ + ((double *)dp)[0] = cc->fpr[0].d; \ + ((double *)dp)[1] = cc->fpr[1].d; \ + } + +#define CCALL_HANDLE_COMPLEXARG \ + /* Pass complex double by reference. */ \ + if (sz == 4*sizeof(double)) { \ + rp = cdataptr(lj_cdata_new(cts, did, sz)); \ + sz = CTSIZE_PTR; \ + } else if (sz == 2*sizeof(float)) { \ + isfp = 2; \ + sz = 2*CTSIZE_PTR; \ + } else { \ + isfp = 1; \ + sz = 2*CTSIZE_PTR; \ + } + +#define CCALL_HANDLE_RET \ + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ + sp = (uint8_t *)&cc->fpr[0].f; + +#define CCALL_HANDLE_STRUCTARG \ + /* Pass structs of size >16 by reference. */ \ + unsigned int cl = ccall_classify_struct(cts, d); \ + nff = cl >> 8; \ + if (sz > 16) { \ + rp = cdataptr(lj_cdata_new(cts, did, sz)); \ + sz = CTSIZE_PTR; \ + } \ + /* Pass struct in FPRs. */ \ + if (cl > 1) { \ + isfp = (cl & 4) ? 2 : 1; \ + } + + +#define CCALL_HANDLE_REGARG \ + if (isfp && (!isva)) { /* Try to pass argument in FPRs. */ \ + int n2 = ctype_isvector(d->info) ? 1 : \ + isfp == 1 ? n : 2; \ + if (nfpr + n2 <= CCALL_NARG_FPR && nff <= 2) { \ + dp = &cc->fpr[nfpr]; \ + nfpr += n2; \ + goto done; \ + } else { \ + if (ngpr + n2 <= maxgpr) { \ + dp = &cc->gpr[ngpr]; \ + ngpr += n2; \ + goto done; \ + } \ + } \ + } else { /* Try to pass argument in GPRs. */ \ + if (ngpr + n <= maxgpr) { \ + dp = &cc->gpr[ngpr]; \ + ngpr += n; \ + goto done; \ + } \ + } + #else #error "Missing calling convention definitions for this architecture" #endif @@ -889,6 +978,53 @@ static void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, #endif +/* -- LoongArch64 ABI struct classification ---------------------------- */ + +#if LJ_TARGET_LOONGARCH64 + +static unsigned int ccall_classify_struct(CTState *cts, CType *ct) +{ + CTSize sz = ct->size; + unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION); + while (ct->sib) { + CType *sct; + ct = ctype_get(cts, ct->sib); + if (ctype_isfield(ct->info)) { + sct = ctype_rawchild(cts, ct); + if (ctype_isfp(sct->info)) { + r |= sct->size; + if (!isu) n++; else if (n == 0) n = 1; + } else if (ctype_iscomplex(sct->info)) { + r |= (sct->size >> 1); + if (!isu) n += 2; else if (n < 2) n = 2; + } else if (ctype_isstruct(sct->info)) { + goto substruct; + } else { + goto noth; + } + } else if (ctype_isbitfield(ct->info)) { + goto noth; + } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) { + sct = ctype_rawchild(cts, ct); + substruct: + if (sct->size > 0) { + unsigned int s = ccall_classify_struct(cts, sct); + if (s <= 1) goto noth; + r |= (s & 255); + if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8); + } + } + } + if ((r == 4 || r == 8) && n <= 4) + return r + (n << 8); +noth: /* Not a homogeneous float/double aggregate. */ + return (sz <= 16); /* Return structs of size <= 16 in GPRs. */ +} + + +#endif + + /* -- Common C call handling ---------------------------------------------- */ /* Infer the destination CTypeID for a vararg argument. */ @@ -934,7 +1070,9 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, MSize fprodd = 0; #endif #endif - +#if LJ_TARGET_LOONGARCH64 + int nff = 0; +#endif /* Clear unused regs to get some determinism in case of misdeclaration. */ memset(cc->gpr, 0, sizeof(cc->gpr)); #if CCALL_NUM_FPR @@ -1060,7 +1198,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, if (isfp && d->size == sizeof(float)) ((float *)dp)[1] = ((float *)dp)[0]; /* Floats occupy high slot. */ #endif -#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) +#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_LOONGARCH64 if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) #if LJ_TARGET_MIPS64 || (isfp && nsp == 0) @@ -1090,13 +1228,21 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, CTSize i = (sz >> 2) - 1; do { ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; } while (i--); } +#elif LJ_TARGET_LOONGARCH64 + if (isfp == 2 && nff <= 2) { + /* Split complex float into separate registers. */ + CTSize i = (sz >> 2) - 1; + do { + ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; + } while (i--); + } #else UNUSED(isfp); #endif } if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */ -#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) +#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) || LJ_TARGET_LOONGARCH64 cc->nfpr = nfpr; /* Required for vararg functions. */ #endif cc->nsp = nsp; diff --git a/src/lj_ccall.h b/src/lj_ccall.h index 0b3c5244..4236184b 100644 --- a/src/lj_ccall.h +++ b/src/lj_ccall.h @@ -126,6 +126,21 @@ typedef union FPRArg { struct { LJ_ENDIAN_LOHI(float f; , float g;) }; } FPRArg; +#elif LJ_TARGET_LOONGARCH64 + +#define CCALL_NARG_GPR 8 +#define CCALL_NARG_FPR 8 +#define CCALL_NRET_GPR 2 +#define CCALL_NRET_FPR 2 +#define CCALL_SPS_EXTRA 3 +#define CCALL_SPS_FREE 1 + +typedef intptr_t GPRArg; +typedef union FPRArg { + double d; + struct { LJ_ENDIAN_LOHI(float f; , float g;) }; +} FPRArg; + #else #error "Missing calling convention definitions for this architecture" #endif @@ -168,7 +183,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState { uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */ #elif LJ_TARGET_ARM64 void *retp; /* Aggregate return pointer in x8. */ -#elif LJ_TARGET_PPC +#elif LJ_TARGET_PPC || LJ_TARGET_LOONGARCH64 uint8_t nfpr; /* Number of arguments in FPRs. */ #endif #if LJ_32 diff --git a/src/vm_loongarch64.dasc b/src/vm_loongarch64.dasc index ab591cc2..8c6cde99 100644 --- a/src/vm_loongarch64.dasc +++ b/src/vm_loongarch64.dasc @@ -2366,6 +2366,70 @@ static void build_subroutines(BuildCtx *ctx) | jirl r0, ra, 0 |.endif | + |//----------------------------------------------------------------------- + |//-- FFI helper functions ----------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_ffi_call: // Call C function via FFI. + | // Caveat: needs special frame unwinding, see below. + |.if FFI + | .type CCSTATE, CCallState, CARG1 + | ld.w TMP1, CCSTATE->spadj + | ld.bu CARG2, CCSTATE->nsp + | ld.bu CARG3, CCSTATE->nfpr + | or TMP2, sp, r0 + | sub.d sp, sp, TMP1 + | st.d ra, -8(TMP2) + | slli.w CARG2, CARG2, 3 + | st.d r23, -16(TMP2) + | st.d CCSTATE, -24(TMP2) + | or r23, TMP2, r0 + | addi.d TMP1, CCSTATE, offsetof(CCallState, stack) + | or TMP2, sp, r0 + | add.d TMP3, TMP1, CARG2 + | beqz CARG2, >2 + |1: + | ld.d TMP0, 0(TMP1) + | addi.d TMP1, TMP1, 8 + | sltu TMP4, TMP1, TMP3 + | st.d TMP0, 0(TMP2) + | addi.d TMP2, TMP2, 8 + | bnez TMP4, <1 + |2: + | beqz CARG3, >3 + | fld.d FARG1, CCSTATE->fpr[0] + | fld.d FARG2, CCSTATE->fpr[1] + | fld.d FARG3, CCSTATE->fpr[2] + | fld.d FARG4, CCSTATE->fpr[3] + | fld.d FARG5, CCSTATE->fpr[4] + | fld.d FARG6, CCSTATE->fpr[5] + | fld.d FARG7, CCSTATE->fpr[6] + | fld.d FARG8, CCSTATE->fpr[7] + |3: + | ld.d TMP3, CCSTATE->func + | ld.d CARG2, CCSTATE->gpr[1] + | ld.d CARG3, CCSTATE->gpr[2] + | ld.d CARG4, CCSTATE->gpr[3] + | ld.d CARG5, CCSTATE->gpr[4] + | ld.d CARG6, CCSTATE->gpr[5] + | ld.d CARG7, CCSTATE->gpr[6] + | ld.d CARG8, CCSTATE->gpr[7] + | ld.d CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. + | jirl r1, TMP3, 0 + | ld.d CCSTATE:TMP1, -24(r23) + | ld.d TMP2, -16(r23) + | ld.d ra, -8(r23) + | st.d CRET1, CCSTATE:TMP1->gpr[0] + | st.d CRET2, CCSTATE:TMP1->gpr[1] + | fst.d FRET1, CCSTATE:TMP1->fpr[0] + | fst.d FRET2, CCSTATE:TMP1->fpr[1] + | or sp, r23, r0 + | or r23, TMP2, r0 + | jirl r0, ra, 0 + |.endif + |// Note: vm_ffi_call must be the last function in this object file! + | + |//----------------------------------------------------------------------- } /* Generate the code for a single instruction. */