From 9eb94f1afc108b216e5161977bdbf8093bf5ecb3 Mon Sep 17 00:00:00 2001 From: gns Date: Wed, 6 Mar 2024 09:21:02 +0800 Subject: [PATCH] riscv(ffi): add call convention and support framework --- src/lj_ccall.c | 156 +++++++++++++++++++++++++++++++- src/lj_ccall.h | 17 +++- src/lj_ccallback.c | 64 +++++++++++++- src/lj_target_riscv.h | 2 +- src/vm_riscv64.dasc | 201 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 435 insertions(+), 5 deletions(-) diff --git a/src/lj_ccall.c b/src/lj_ccall.c index ae69cd28..bb4da414 100644 --- a/src/lj_ccall.c +++ b/src/lj_ccall.c @@ -575,6 +575,97 @@ goto done; \ } +#elif LJ_TARGET_RISCV64 +/* -- RISC-V lp64d calling conventions ------------------------------------ */ + +#define CCALL_HANDLE_STRUCTRET \ + /* Return structs of size > 16 by reference. */ \ + cc->retref = !(sz <= 16); \ + if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp; + +#define CCALL_HANDLE_STRUCTRET2 \ + unsigned int cl = ccall_classify_struct(cts, ctr); \ + if ((cl & 4) && (cl >> 8) <= 2) { \ + CTSize i = (cl >> 8) - 1; \ + do { ((float *)dp)[i] = cc->fpr[i].f; } while (i--); \ + } else { \ + if (cl > 1) { \ + sp = (uint8_t *)&cc->fpr[0]; \ + if ((cl >> 8) > 2) \ + sp = (uint8_t *)&cc->gpr[0]; \ + } \ + memcpy(dp, sp, ctr->size); \ + } \ + +#define CCALL_HANDLE_COMPLEXRET \ + /* Complex values are returned in 1 or 2 FPRs. */ \ + cc->retref = 0; + +#define CCALL_HANDLE_COMPLEXRET2 \ + if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ + ((float *)dp)[0] = cc->fpr[0].f; \ + ((float *)dp)[1] = cc->fpr[1].f; \ + } else { /* Copy complex double from FPRs. */ \ + ((double *)dp)[0] = cc->fpr[0].d; \ + ((double *)dp)[1] = cc->fpr[1].d; \ + } + +#define CCALL_HANDLE_COMPLEXARG \ + /* Pass long double complex by reference. */ \ + if (sz == 2*sizeof(long double)) { \ + rp = cdataptr(lj_cdata_new(cts, did, sz)); \ + sz = CTSIZE_PTR; \ + } \ + /* Pass complex in two FPRs or on stack. */ \ + else if (sz == 2*sizeof(float)) { \ + isfp = 2; \ + sz = 2*CTSIZE_PTR; \ + } else { \ + isfp = 1; \ + sz = 2*CTSIZE_PTR; \ + } + +#define CCALL_HANDLE_RET \ + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ + sp = (uint8_t *)&cc->fpr[0].f; + +#define CCALL_HANDLE_STRUCTARG \ + /* Pass structs of size >16 by reference. */ \ + unsigned int cl = ccall_classify_struct(cts, d); \ + nff = cl >> 8; \ + if (sz > 16) { \ + rp = cdataptr(lj_cdata_new(cts, did, sz)); \ + sz = CTSIZE_PTR; \ + } \ + /* Pass struct in FPRs. */ \ + if (cl > 1) { \ + isfp = (cl & 4) ? 2 : 1; \ + } + + +#define CCALL_HANDLE_REGARG \ + if (isfp && (!isva)) { /* Try to pass argument in FPRs. */ \ + int n2 = ctype_isvector(d->info) ? 1 : \ + isfp == 1 ? n : 2; \ + if (nfpr + n2 <= CCALL_NARG_FPR && nff <= 2) { \ + dp = &cc->fpr[nfpr]; \ + nfpr += n2; \ + goto done; \ + } else { \ + if (ngpr + n2 <= maxgpr) { \ + dp = &cc->gpr[ngpr]; \ + ngpr += n2; \ + goto done; \ + } \ + } \ + } else { /* Try to pass argument in GPRs. */ \ + if (ngpr + n <= maxgpr) { \ + dp = &cc->gpr[ngpr]; \ + ngpr += n; \ + goto done; \ + } \ + } + #else #error "Missing calling convention definitions for this architecture" #endif @@ -891,6 +982,51 @@ static void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, #endif +/* -- RISC-V ABI struct classification ---------------------------- */ + +#if LJ_TARGET_RISCV64 + +static unsigned int ccall_classify_struct(CTState *cts, CType *ct) +{ + CTSize sz = ct->size; + unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION); + while (ct->sib) { + CType *sct; + ct = ctype_get(cts, ct->sib); + if (ctype_isfield(ct->info)) { + sct = ctype_rawchild(cts, ct); + if (ctype_isfp(sct->info)) { + r |= sct->size; + if (!isu) n++; else if (n == 0) n = 1; + } else if (ctype_iscomplex(sct->info)) { + r |= (sct->size >> 1); + if (!isu) n += 2; else if (n < 2) n = 2; + } else if (ctype_isstruct(sct->info)) { + goto substruct; + } else { + goto noth; + } + } else if (ctype_isbitfield(ct->info)) { + goto noth; + } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) { + sct = ctype_rawchild(cts, ct); + substruct: + if (sct->size > 0) { + unsigned int s = ccall_classify_struct(cts, sct); + if (s <= 1) goto noth; + r |= (s & 255); + if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8); + } + } + } + if ((r == 4 || r == 8) && n <= 4) + return r + (n << 8); +noth: /* Not a homogeneous float/double aggregate. */ + return (sz <= 16); /* Return structs of size <= 16 in GPRs. */ +} + +#endif + /* -- Common C call handling ---------------------------------------------- */ /* Infer the destination CTypeID for a vararg argument. */ @@ -937,6 +1073,10 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, #endif #endif +#if LJ_TARGET_RISCV64 + int nff = 0; +#endif + /* Clear unused regs to get some determinism in case of misdeclaration. */ memset(cc->gpr, 0, sizeof(cc->gpr)); #if CCALL_NUM_FPR @@ -1077,7 +1217,11 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, if (isfp && d->size == sizeof(float)) ((float *)dp)[1] = ((float *)dp)[0]; /* Floats occupy high slot. */ #endif -#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) +#if LJ_TARGET_RISCV64 + if (isfp && d->size == sizeof(float)) + ((uint32_t *)dp)[1] = 0xffffffffu; /* Float NaN boxing */ +#endif +#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64 if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) #if LJ_TARGET_MIPS64 || (isfp && nsp == 0) @@ -1107,6 +1251,14 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, CTSize i = (sz >> 2) - 1; do { ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; } while (i--); } +#elif LJ_TARGET_RISCV64 + if (isfp == 2 && nff <= 2) { + /* Split complex float into separate registers. */ + CTSize i = (sz >> 2) - 1; + do { + ((uint64_t *)dp)[i] = 0xffffffff00000000ul | ((uint32_t *)dp)[i]; + } while (i--); + } #else UNUSED(isfp); #endif @@ -1116,7 +1268,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, if ((int32_t)nsp < 0) nsp = 0; #endif -#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) +#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) || LJ_TARGET_RISCV64 cc->nfpr = nfpr; /* Required for vararg functions. */ #endif cc->nsp = (nsp + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1); diff --git a/src/lj_ccall.h b/src/lj_ccall.h index 3528fca5..d8c59fd8 100644 --- a/src/lj_ccall.h +++ b/src/lj_ccall.h @@ -129,6 +129,21 @@ typedef union FPRArg { struct { LJ_ENDIAN_LOHI(float f; , float g;) }; } FPRArg; +#elif LJ_TARGET_RISCV64 + +#define CCALL_NARG_GPR 8 +#define CCALL_NARG_FPR 8 +#define CCALL_NRET_GPR 2 +#define CCALL_NRET_FPR 2 +#define CCALL_SPS_EXTRA 3 +#define CCALL_SPS_FREE 1 + +typedef intptr_t GPRArg; +typedef union FPRArg { + double d; + struct { LJ_ENDIAN_LOHI(float f; , float g;) }; +} FPRArg; + #else #error "Missing calling convention definitions for this architecture" #endif @@ -175,7 +190,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState { uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */ #elif LJ_TARGET_ARM64 void *retp; /* Aggregate return pointer in x8. */ -#elif LJ_TARGET_PPC +#elif LJ_TARGET_PPC || LJ_TARGET_RISCV64 uint8_t nfpr; /* Number of arguments in FPRs. */ #endif #if LJ_32 diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c index 52f92932..e904c493 100644 --- a/src/lj_ccallback.c +++ b/src/lj_ccallback.c @@ -71,6 +71,10 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs) #define CALLBACK_MCODE_HEAD 52 +#elif LJ_TARGET_RISCV64 + +#define CALLBACK_MCODE_HEAD 68 + #else /* Missing support for this architecture. */ @@ -238,6 +242,39 @@ static void *callback_mcode_init(global_State *g, uint32_t *page) } return p; } +#elif LJ_TARGET_RISCV64 +static void *callback_mcode_init(global_State *g, uint32_t *page) +{ + uint32_t *p = page; + uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback; + uintptr_t ug = (uintptr_t)(void *)g; + uintptr_t target_hi = (target >> 32), target_lo = target & 0xffffffffULL; + uintptr_t ug_hi = (ug >> 32), ug_lo = ug & 0xffffffffULL; + MSize slot; + *p++ = RISCVI_LUI | RISCVF_D(RID_X6) | RISCVF_IMMU(RISCVF_HI(target_hi)); + *p++ = RISCVI_LUI | RISCVF_D(RID_X7) | RISCVF_IMMU(RISCVF_HI(ug_hi)); + *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(RISCVF_LO(target_hi)); + *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(RISCVF_LO(ug_hi)); + *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(11); + *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(11); + *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(target_lo >> 21); + *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(ug_lo >> 21); + *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(11); + *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(11); + *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI((target_lo >> 10) & 0x7ff); + *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI((ug_lo >> 10) & 0x7ff); + *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(10); + *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(10); + *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(target_lo & 0x3ff); + *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(ug_lo & 0x3ff); + *p++ = RISCVI_JALR | RISCVF_D(RID_X0) | RISCVF_S1(RID_X6) | RISCVF_IMMJ(0); + for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { + *p++ = RISCVI_LUI | RISCVF_D(RID_X5) | RISCVF_IMMU(slot); + *p = RISCVI_JAL | RISCVF_IMMJ(((char *)page-(char *)p)); + p++; + } + return p; +} #else /* Missing support for this architecture. */ #define callback_mcode_init(g, p) (p) @@ -512,6 +549,31 @@ void lj_ccallback_mcode_free(CTState *cts) } #endif +#define CALLBACK_HANDLE_RET \ + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ + ((float *)dp)[1] = *(float *)dp; + +#elif LJ_TARGET_RISCV64 + +#define CALLBACK_HANDLE_REGARG \ + if (isfp) { \ + if (nfpr + n <= CCALL_NARG_FPR) { \ + sp = &cts->cb.fpr[nfpr]; \ + nfpr += n; \ + goto done; \ + } else if (ngpr + n <= maxgpr) { \ + sp = &cts->cb.gpr[ngpr]; \ + ngpr += n; \ + goto done; \ + } \ + } else { \ + if (ngpr + n <= maxgpr) { \ + sp = &cts->cb.gpr[ngpr]; \ + ngpr += n; \ + goto done; \ + } \ + } + #define CALLBACK_HANDLE_RET \ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ ((float *)dp)[1] = *(float *)dp; @@ -662,7 +724,7 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) *(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp : (int32_t)*(int16_t *)dp; } -#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) +#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64 /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */ if (ctr->size <= 4 && (LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info))) diff --git a/src/lj_target_riscv.h b/src/lj_target_riscv.h index a2a907db..22948dc5 100644 --- a/src/lj_target_riscv.h +++ b/src/lj_target_riscv.h @@ -1,6 +1,6 @@ /* ** Definitions for RISC-V CPUs. -** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_TARGET_RISCV_H diff --git a/src/vm_riscv64.dasc b/src/vm_riscv64.dasc index c86f94bd..0a8970a1 100644 --- a/src/vm_riscv64.dasc +++ b/src/vm_riscv64.dasc @@ -812,14 +812,29 @@ static void build_subroutines(BuildCtx *ctx) | mv BASE, TMP2 // Restore caller BASE. | ld LFUNC:TMP1, FRAME_FUNC(TMP2) | ld PC, -24(RB) // Restore PC from [cont|PC]. + |.if FFI + | sltiu TMP3, TMP0, 2 + |.endif | cleartp LFUNC:TMP1 | add TMP2, RA, RD | ld TMP1, LFUNC:TMP1->pc | sd TISNIL, -8(TMP2) // Ensure one valid arg. + |.if FFI + | bnez TMP3, >1 + |.endif | // BASE = base, RA = resultptr, RB = meta base | ld KBASE, PC2PROTO(k)(TMP1) | jr TMP0 // Jump to continuation. | + |.if FFI + |1: + | addi TMP1, RB, -32 + | bxnez TMP0, ->cont_ffi_callback // cont = 1: return from FFI callback. + | // cont = 0: tailcall from C function. + | sub RC, TMP1, BASE + | j ->vm_call_tail + |.endif + | |->cont_cat: // RA = resultptr, RB = meta base | lw INS, -4(PC) | addi CARG2, RB, -32 @@ -1019,6 +1034,18 @@ static void build_subroutines(BuildCtx *ctx) | // Returns 0/1 or TValue * (metamethod). | j <3 | + |->vmeta_equal_cd: + |.if FFI + | addi PC, PC, -4 + | mv CARG1, L + | mv CARG2, INS + | sd BASE, L->base + | sd PC, SAVE_PC(sp) + | call_intern vmeta_equal_cd, lj_meta_equal_cd // (lua_State *L, BCIns op) + | // Returns 0/1 or TValue * (metamethod). + | j <3 + |.endif + | |->vmeta_istype: | addi PC, PC, -4 | sd BASE, L->base @@ -2219,6 +2246,133 @@ static void build_subroutines(BuildCtx *ctx) |.endif | |//----------------------------------------------------------------------- + |//-- FFI helper functions ----------------------------------------------- + |//----------------------------------------------------------------------- + | + |// Handler for callback functions. Callback slot number in x5, g in x7. + |->vm_ffi_callback: + |.if FFI + |.type CTSTATE, CTState, PC + | saveregs + | ld CTSTATE, GL:x7->ctype_state + | mv GL, x7 + | addxi DISPATCH, x7, GG_G2DISP + | srli x5, x5, 12 + | sw x5, CTSTATE->cb.slot + | sd CARG1, CTSTATE->cb.gpr[0] + | fsd FARG1, CTSTATE->cb.fpr[0] + | sd CARG2, CTSTATE->cb.gpr[1] + | fsd FARG2, CTSTATE->cb.fpr[1] + | sd CARG3, CTSTATE->cb.gpr[2] + | fsd FARG3, CTSTATE->cb.fpr[2] + | sd CARG4, CTSTATE->cb.gpr[3] + | fsd FARG4, CTSTATE->cb.fpr[3] + | sd CARG5, CTSTATE->cb.gpr[4] + | fsd FARG5, CTSTATE->cb.fpr[4] + | sd CARG6, CTSTATE->cb.gpr[5] + | fsd FARG6, CTSTATE->cb.fpr[5] + | sd CARG7, CTSTATE->cb.gpr[6] + | fsd FARG7, CTSTATE->cb.fpr[6] + | sd CARG8, CTSTATE->cb.gpr[7] + | fsd FARG8, CTSTATE->cb.fpr[7] + | addi TMP0, sp, CFRAME_SPACE + | sd TMP0, CTSTATE->cb.stack + | sd x0, SAVE_PC(sp) // Any value outside of bytecode is ok. + | mv CARG1, CTSTATE + | mv CARG2, sp + | call_intern vm_ffi_callback, lj_ccallback_enter // (CTState *cts, void *cf) + | // Returns lua_State *. + | ld BASE, L:CRET1->base + | ld RC, L:CRET1->top + | mv L, CRET1 + | lui TMP3, 0x43380 // TOBIT = Hiword of 2^52 + 2^51 (double). + | ld LFUNC:RB, FRAME_FUNC(BASE) + | li TISNIL, LJ_TNIL + | li TISNUM, LJ_TISNUM + | slli TMP3, TMP3, 32 + | li_vmstate INTERP + | subw RC, RC, BASE + | cleartp LFUNC:RB + | st_vmstate + | fmv.d.x TOBIT, TMP3 + | ins_callt + |.endif + | + |->cont_ffi_callback: // Return from FFI callback. + |.if FFI + | ld CTSTATE, GL->ctype_state + | sd BASE, L->base + | sd RB, L->top + | sd L, CTSTATE->L + | mv CARG1, CTSTATE + | mv CARG2, RA + | // (CTState *cts, TValue *o) + | call_intern cont_ffi_callback, lj_ccallback_leave + | fld FRET1, CTSTATE->cb.fpr[0] + | ld CRET1, CTSTATE->cb.gpr[0] + | fld FRET2, CTSTATE->cb.fpr[1] + | ld CRET2, CTSTATE->cb.gpr[1] + | j ->vm_leave_unw + |.endif + | + |->vm_ffi_call: // Call C function via FFI. + | // Caveat: needs special frame unwinding, see below. + |.if FFI + | .type CCSTATE, CCallState, CARG1 + | lw TMP1, CCSTATE->spadj + | lbu CARG2, CCSTATE->nsp + | lbu CARG3, CCSTATE->nfpr + | mv TMP2, sp + | sub sp, sp, TMP1 + | sd ra, -8(TMP2) + | sd x18, -16(TMP2) + | sd CCSTATE, -24(TMP2) + | mv x18, TMP2 + | addi TMP1, CCSTATE, offsetof(CCallState, stack) + | mv TMP2, sp + | add TMP3, TMP1, CARG2 + | beqz CARG2, >2 + |1: + | ld TMP0, 0(TMP1) + | addi TMP1, TMP1, 8 + | sd TMP0, 0(TMP2) + | addi TMP2, TMP2, 8 + | bltu TMP1, TMP3, <1 + |2: + | beqz CARG3, >3 + | fld FARG1, CCSTATE->fpr[0] + | fld FARG2, CCSTATE->fpr[1] + | fld FARG3, CCSTATE->fpr[2] + | fld FARG4, CCSTATE->fpr[3] + | fld FARG5, CCSTATE->fpr[4] + | fld FARG6, CCSTATE->fpr[5] + | fld FARG7, CCSTATE->fpr[6] + | fld FARG8, CCSTATE->fpr[7] + |3: + | ld CFUNCADDR, CCSTATE->func + | ld CARG2, CCSTATE->gpr[1] + | ld CARG3, CCSTATE->gpr[2] + | ld CARG4, CCSTATE->gpr[3] + | ld CARG5, CCSTATE->gpr[4] + | ld CARG6, CCSTATE->gpr[5] + | ld CARG7, CCSTATE->gpr[6] + | ld CARG8, CCSTATE->gpr[7] + | ld CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. + | jalr CFUNCADDR + | ld CCSTATE:TMP1, -24(x18) + | ld TMP0, -16(x18) + | ld ra, -8(x18) + | sd CRET1, CCSTATE:TMP1->gpr[0] + | sd CRET2, CCSTATE:TMP1->gpr[1] + | fsd FRET1, CCSTATE:TMP1->fpr[0] + | fsd FRET2, CCSTATE:TMP1->fpr[1] + | mv sp, x18 + | mv x18, TMP0 + | ret + |.endif + |// Note: vm_ffi_call must be the last function in this object file! + | + |//----------------------------------------------------------------------- } /* Generate the code for a single instruction. */ @@ -2343,6 +2497,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | beqz TMP0, ->BC_ISNEN_Z } |// Either or both types are not numbers. + |.if FFI + | // Check if RA or RD is a cdata. + | xori TMP0, CARG3, LJ_TCDATA + | xori TMP1, CARG4, LJ_TCDATA + | and TMP0, TMP0, TMP1 + | bxeqz TMP0, ->vmeta_equal_cd + |.endif | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 | decode_BC4b TMP2 | addw TMP2, TMP2, TMP3 // (jump-0x8000)<<2 @@ -2395,10 +2556,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | sub RD, KBASE, RD | lhu TMP2, -4+OFS_RD(PC) | ld CARG2, -8(RD) // KBASE-8-str_const*8 + |.if FFI + | gettp CARG3, CARG1 + | li TMP1, LJ_TCDATA + |.endif | li TMP0, LJ_TSTR | decode_BC4b TMP2 | settp CARG2, TMP0 | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + |.if FFI + | bxeq CARG3, TMP1, ->vmeta_equal_cd + |.endif | xor TMP0, CARG1, CARG2 // TMP2=0: A==D; TMP2!=0: A!=D | addw TMP2, TMP2, TMP3 if (vk) { @@ -2453,7 +2621,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |4: // RA is not an integer. | addw TMP2, TMP2, TMP3 + |.if FFI + | bgeu CARG3, TISNUM, >7 + |.else | bgeu CARG3, TISNUM, <2 + |.endif | fmv.d.x FTMP0, CARG1 | fmv.d.x FTMP2, CARG2 | bne CARG4, TISNUM, >5 @@ -2466,11 +2638,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | j <1 | |6: // RA is an integer, RD is a number. + |.if FFI + | bgeu CARG4, TISNUM, >8 + |.else | bgeu CARG4, TISNUM, <2 + |.endif | fcvt.d.w FTMP0, CARG1 | fmv.d.x FTMP2, CARG2 | j <5 | + |.if FFI + |7: // RA not int, not number + | li TMP0, LJ_TCDATA + | bne CARG3, TMP0, <2 + | j ->vmeta_equal_cd + | + |8: // RD not int, not number + | li TMP0, LJ_TCDATA + | bne CARG4, TMP0, <2 + | j ->vmeta_equal_cd + |.endif break; case BC_ISEQP: case BC_ISNEP: @@ -2484,6 +2671,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | gettp TMP1, TMP1 | addi PC, PC, 4 | xor TMP0, TMP1, TMP0 // TMP0=0 A=D; TMP0!=0 A!=D + |.if FFI + | li TMP3, LJ_TCDATA + | bxeq TMP1, TMP3, ->vmeta_equal_cd + |.endif | decode_BC4b TMP2 | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 | addw TMP2, TMP2, TMP3 // TMP2=(jump-0x8000)<<2 @@ -2824,6 +3015,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ins_next break; case BC_KCDATA: + |.if FFI + | // RA = dst*8, RD = cdata_const*8 (~) + | sub TMP1, KBASE, RD + | ld TMP0, -8(TMP1) // KBASE-8-cdata_const*8 + | li TMP2, LJ_TCDATA + | add RA, BASE, RA + | settp TMP0, TMP2 + | sd TMP0, 0(RA) + | ins_next + |.endif break; case BC_KSHORT: | // RA = dst*8, RD = int16_literal*8