LoongArch64: Add LoongArch lp64 calling conventions and FFI C call handling

Co-developed-by: Qiqi Huang <huangqiqi@loongson.cn>
This commit is contained in:
Xiaolin Zhao 2022-07-27 18:20:58 +08:00
parent 73360555d1
commit 700f82bb79
3 changed files with 229 additions and 4 deletions

View File

@ -574,6 +574,95 @@
goto done; \
}
#elif LJ_TARGET_LOONGARCH64
/* -- LoongArch lp64 calling conventions ---------------------------------------- */
#define CCALL_HANDLE_STRUCTRET \
/* Return structs of size > 16 by reference. */ \
cc->retref = !(sz <= 16); \
if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp;
#define CCALL_HANDLE_STRUCTRET2 \
unsigned int cl = ccall_classify_struct(cts, ctr); \
if ((cl & 4) && (cl >> 8) <= 2) { \
CTSize i = (cl >> 8) - 1; \
do { ((float *)dp)[i] = cc->fpr[i].f; } while (i--); \
} else { \
if (cl > 1) { \
sp = (uint8_t *)&cc->fpr[0]; \
if ((cl >> 8) > 2) \
sp = (uint8_t *)&cc->gpr[0]; \
} \
memcpy(dp, sp, ctr->size); \
} \
#define CCALL_HANDLE_COMPLEXRET \
/* Complex values are returned in 1 or 2 FPRs. */ \
cc->retref = 0;
#define CCALL_HANDLE_COMPLEXRET2 \
if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \
((float *)dp)[0] = cc->fpr[0].f; \
((float *)dp)[1] = cc->fpr[1].f; \
} else { /* Copy complex double from FPRs. */ \
((double *)dp)[0] = cc->fpr[0].d; \
((double *)dp)[1] = cc->fpr[1].d; \
}
#define CCALL_HANDLE_COMPLEXARG \
/* Pass complex double by reference. */ \
if (sz == 4*sizeof(double)) { \
rp = cdataptr(lj_cdata_new(cts, did, sz)); \
sz = CTSIZE_PTR; \
} else if (sz == 2*sizeof(float)) { \
isfp = 2; \
sz = 2*CTSIZE_PTR; \
} else { \
isfp = 1; \
sz = 2*CTSIZE_PTR; \
}
#define CCALL_HANDLE_RET \
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
sp = (uint8_t *)&cc->fpr[0].f;
#define CCALL_HANDLE_STRUCTARG \
/* Pass structs of size >16 by reference. */ \
unsigned int cl = ccall_classify_struct(cts, d); \
nff = cl >> 8; \
if (sz > 16) { \
rp = cdataptr(lj_cdata_new(cts, did, sz)); \
sz = CTSIZE_PTR; \
} \
/* Pass struct in FPRs. */ \
if (cl > 1) { \
isfp = (cl & 4) ? 2 : 1; \
}
#define CCALL_HANDLE_REGARG \
if (isfp && (!isva)) { /* Try to pass argument in FPRs. */ \
int n2 = ctype_isvector(d->info) ? 1 : \
isfp == 1 ? n : 2; \
if (nfpr + n2 <= CCALL_NARG_FPR && nff <= 2) { \
dp = &cc->fpr[nfpr]; \
nfpr += n2; \
goto done; \
} else { \
if (ngpr + n2 <= maxgpr) { \
dp = &cc->gpr[ngpr]; \
ngpr += n2; \
goto done; \
} \
} \
} else { /* Try to pass argument in GPRs. */ \
if (ngpr + n <= maxgpr) { \
dp = &cc->gpr[ngpr]; \
ngpr += n; \
goto done; \
} \
}
#else
#error "Missing calling convention definitions for this architecture"
#endif
@ -889,6 +978,53 @@ static void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp,
#endif
/* -- LoongArch64 ABI struct classification ---------------------------- */
#if LJ_TARGET_LOONGARCH64
static unsigned int ccall_classify_struct(CTState *cts, CType *ct)
{
CTSize sz = ct->size;
unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION);
while (ct->sib) {
CType *sct;
ct = ctype_get(cts, ct->sib);
if (ctype_isfield(ct->info)) {
sct = ctype_rawchild(cts, ct);
if (ctype_isfp(sct->info)) {
r |= sct->size;
if (!isu) n++; else if (n == 0) n = 1;
} else if (ctype_iscomplex(sct->info)) {
r |= (sct->size >> 1);
if (!isu) n += 2; else if (n < 2) n = 2;
} else if (ctype_isstruct(sct->info)) {
goto substruct;
} else {
goto noth;
}
} else if (ctype_isbitfield(ct->info)) {
goto noth;
} else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
sct = ctype_rawchild(cts, ct);
substruct:
if (sct->size > 0) {
unsigned int s = ccall_classify_struct(cts, sct);
if (s <= 1) goto noth;
r |= (s & 255);
if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8);
}
}
}
if ((r == 4 || r == 8) && n <= 4)
return r + (n << 8);
noth: /* Not a homogeneous float/double aggregate. */
return (sz <= 16); /* Return structs of size <= 16 in GPRs. */
}
#endif
/* -- Common C call handling ---------------------------------------------- */
/* Infer the destination CTypeID for a vararg argument. */
@ -934,7 +1070,9 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
MSize fprodd = 0;
#endif
#endif
#if LJ_TARGET_LOONGARCH64
int nff = 0;
#endif
/* Clear unused regs to get some determinism in case of misdeclaration. */
memset(cc->gpr, 0, sizeof(cc->gpr));
#if CCALL_NUM_FPR
@ -1060,7 +1198,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
if (isfp && d->size == sizeof(float))
((float *)dp)[1] = ((float *)dp)[0]; /* Floats occupy high slot. */
#endif
#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_LOONGARCH64
if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info)
#if LJ_TARGET_MIPS64
|| (isfp && nsp == 0)
@ -1090,13 +1228,21 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
CTSize i = (sz >> 2) - 1;
do { ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; } while (i--);
}
#elif LJ_TARGET_LOONGARCH64
if (isfp == 2 && nff <= 2) {
/* Split complex float into separate registers. */
CTSize i = (sz >> 2) - 1;
do {
((uint64_t *)dp)[i] = ((uint32_t *)dp)[i];
} while (i--);
}
#else
UNUSED(isfp);
#endif
}
if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */
#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP)
#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) || LJ_TARGET_LOONGARCH64
cc->nfpr = nfpr; /* Required for vararg functions. */
#endif
cc->nsp = nsp;

View File

@ -126,6 +126,21 @@ typedef union FPRArg {
struct { LJ_ENDIAN_LOHI(float f; , float g;) };
} FPRArg;
#elif LJ_TARGET_LOONGARCH64
#define CCALL_NARG_GPR 8
#define CCALL_NARG_FPR 8
#define CCALL_NRET_GPR 2
#define CCALL_NRET_FPR 2
#define CCALL_SPS_EXTRA 3
#define CCALL_SPS_FREE 1
typedef intptr_t GPRArg;
typedef union FPRArg {
double d;
struct { LJ_ENDIAN_LOHI(float f; , float g;) };
} FPRArg;
#else
#error "Missing calling convention definitions for this architecture"
#endif
@ -168,7 +183,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */
#elif LJ_TARGET_ARM64
void *retp; /* Aggregate return pointer in x8. */
#elif LJ_TARGET_PPC
#elif LJ_TARGET_PPC || LJ_TARGET_LOONGARCH64
uint8_t nfpr; /* Number of arguments in FPRs. */
#endif
#if LJ_32

View File

@ -2366,6 +2366,70 @@ static void build_subroutines(BuildCtx *ctx)
| jirl r0, ra, 0
|.endif
|
|//-----------------------------------------------------------------------
|//-- FFI helper functions -----------------------------------------------
|//-----------------------------------------------------------------------
|
|->vm_ffi_call: // Call C function via FFI.
| // Caveat: needs special frame unwinding, see below.
|.if FFI
| .type CCSTATE, CCallState, CARG1
| ld.w TMP1, CCSTATE->spadj
| ld.bu CARG2, CCSTATE->nsp
| ld.bu CARG3, CCSTATE->nfpr
| or TMP2, sp, r0
| sub.d sp, sp, TMP1
| st.d ra, -8(TMP2)
| slli.w CARG2, CARG2, 3
| st.d r23, -16(TMP2)
| st.d CCSTATE, -24(TMP2)
| or r23, TMP2, r0
| addi.d TMP1, CCSTATE, offsetof(CCallState, stack)
| or TMP2, sp, r0
| add.d TMP3, TMP1, CARG2
| beqz CARG2, >2
|1:
| ld.d TMP0, 0(TMP1)
| addi.d TMP1, TMP1, 8
| sltu TMP4, TMP1, TMP3
| st.d TMP0, 0(TMP2)
| addi.d TMP2, TMP2, 8
| bnez TMP4, <1
|2:
| beqz CARG3, >3
| fld.d FARG1, CCSTATE->fpr[0]
| fld.d FARG2, CCSTATE->fpr[1]
| fld.d FARG3, CCSTATE->fpr[2]
| fld.d FARG4, CCSTATE->fpr[3]
| fld.d FARG5, CCSTATE->fpr[4]
| fld.d FARG6, CCSTATE->fpr[5]
| fld.d FARG7, CCSTATE->fpr[6]
| fld.d FARG8, CCSTATE->fpr[7]
|3:
| ld.d TMP3, CCSTATE->func
| ld.d CARG2, CCSTATE->gpr[1]
| ld.d CARG3, CCSTATE->gpr[2]
| ld.d CARG4, CCSTATE->gpr[3]
| ld.d CARG5, CCSTATE->gpr[4]
| ld.d CARG6, CCSTATE->gpr[5]
| ld.d CARG7, CCSTATE->gpr[6]
| ld.d CARG8, CCSTATE->gpr[7]
| ld.d CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1.
| jirl r1, TMP3, 0
| ld.d CCSTATE:TMP1, -24(r23)
| ld.d TMP2, -16(r23)
| ld.d ra, -8(r23)
| st.d CRET1, CCSTATE:TMP1->gpr[0]
| st.d CRET2, CCSTATE:TMP1->gpr[1]
| fst.d FRET1, CCSTATE:TMP1->fpr[0]
| fst.d FRET2, CCSTATE:TMP1->fpr[1]
| or sp, r23, r0
| or r23, TMP2, r0
| jirl r0, ra, 0
|.endif
|// Note: vm_ffi_call must be the last function in this object file!
|
|//-----------------------------------------------------------------------
}
/* Generate the code for a single instruction. */