riscv(ffi): add call convention and support framework

This commit is contained in:
gns 2024-03-06 09:21:02 +08:00 committed by gns
parent 33d45379ae
commit 9eb94f1afc
5 changed files with 435 additions and 5 deletions

View File

@ -575,6 +575,97 @@
goto done; \ goto done; \
} }
#elif LJ_TARGET_RISCV64
/* -- RISC-V lp64d calling conventions ------------------------------------ */
#define CCALL_HANDLE_STRUCTRET \
/* Return structs of size > 16 by reference. */ \
cc->retref = !(sz <= 16); \
if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp;
#define CCALL_HANDLE_STRUCTRET2 \
unsigned int cl = ccall_classify_struct(cts, ctr); \
if ((cl & 4) && (cl >> 8) <= 2) { \
CTSize i = (cl >> 8) - 1; \
do { ((float *)dp)[i] = cc->fpr[i].f; } while (i--); \
} else { \
if (cl > 1) { \
sp = (uint8_t *)&cc->fpr[0]; \
if ((cl >> 8) > 2) \
sp = (uint8_t *)&cc->gpr[0]; \
} \
memcpy(dp, sp, ctr->size); \
} \
#define CCALL_HANDLE_COMPLEXRET \
/* Complex values are returned in 1 or 2 FPRs. */ \
cc->retref = 0;
#define CCALL_HANDLE_COMPLEXRET2 \
if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \
((float *)dp)[0] = cc->fpr[0].f; \
((float *)dp)[1] = cc->fpr[1].f; \
} else { /* Copy complex double from FPRs. */ \
((double *)dp)[0] = cc->fpr[0].d; \
((double *)dp)[1] = cc->fpr[1].d; \
}
#define CCALL_HANDLE_COMPLEXARG \
/* Pass long double complex by reference. */ \
if (sz == 2*sizeof(long double)) { \
rp = cdataptr(lj_cdata_new(cts, did, sz)); \
sz = CTSIZE_PTR; \
} \
/* Pass complex in two FPRs or on stack. */ \
else if (sz == 2*sizeof(float)) { \
isfp = 2; \
sz = 2*CTSIZE_PTR; \
} else { \
isfp = 1; \
sz = 2*CTSIZE_PTR; \
}
#define CCALL_HANDLE_RET \
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
sp = (uint8_t *)&cc->fpr[0].f;
#define CCALL_HANDLE_STRUCTARG \
/* Pass structs of size >16 by reference. */ \
unsigned int cl = ccall_classify_struct(cts, d); \
nff = cl >> 8; \
if (sz > 16) { \
rp = cdataptr(lj_cdata_new(cts, did, sz)); \
sz = CTSIZE_PTR; \
} \
/* Pass struct in FPRs. */ \
if (cl > 1) { \
isfp = (cl & 4) ? 2 : 1; \
}
#define CCALL_HANDLE_REGARG \
if (isfp && (!isva)) { /* Try to pass argument in FPRs. */ \
int n2 = ctype_isvector(d->info) ? 1 : \
isfp == 1 ? n : 2; \
if (nfpr + n2 <= CCALL_NARG_FPR && nff <= 2) { \
dp = &cc->fpr[nfpr]; \
nfpr += n2; \
goto done; \
} else { \
if (ngpr + n2 <= maxgpr) { \
dp = &cc->gpr[ngpr]; \
ngpr += n2; \
goto done; \
} \
} \
} else { /* Try to pass argument in GPRs. */ \
if (ngpr + n <= maxgpr) { \
dp = &cc->gpr[ngpr]; \
ngpr += n; \
goto done; \
} \
}
#else #else
#error "Missing calling convention definitions for this architecture" #error "Missing calling convention definitions for this architecture"
#endif #endif
@ -891,6 +982,51 @@ static void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp,
#endif #endif
/* -- RISC-V ABI struct classification ---------------------------- */
#if LJ_TARGET_RISCV64
static unsigned int ccall_classify_struct(CTState *cts, CType *ct)
{
CTSize sz = ct->size;
unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION);
while (ct->sib) {
CType *sct;
ct = ctype_get(cts, ct->sib);
if (ctype_isfield(ct->info)) {
sct = ctype_rawchild(cts, ct);
if (ctype_isfp(sct->info)) {
r |= sct->size;
if (!isu) n++; else if (n == 0) n = 1;
} else if (ctype_iscomplex(sct->info)) {
r |= (sct->size >> 1);
if (!isu) n += 2; else if (n < 2) n = 2;
} else if (ctype_isstruct(sct->info)) {
goto substruct;
} else {
goto noth;
}
} else if (ctype_isbitfield(ct->info)) {
goto noth;
} else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
sct = ctype_rawchild(cts, ct);
substruct:
if (sct->size > 0) {
unsigned int s = ccall_classify_struct(cts, sct);
if (s <= 1) goto noth;
r |= (s & 255);
if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8);
}
}
}
if ((r == 4 || r == 8) && n <= 4)
return r + (n << 8);
noth: /* Not a homogeneous float/double aggregate. */
return (sz <= 16); /* Return structs of size <= 16 in GPRs. */
}
#endif
/* -- Common C call handling ---------------------------------------------- */ /* -- Common C call handling ---------------------------------------------- */
/* Infer the destination CTypeID for a vararg argument. */ /* Infer the destination CTypeID for a vararg argument. */
@ -937,6 +1073,10 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
#endif #endif
#endif #endif
#if LJ_TARGET_RISCV64
int nff = 0;
#endif
/* Clear unused regs to get some determinism in case of misdeclaration. */ /* Clear unused regs to get some determinism in case of misdeclaration. */
memset(cc->gpr, 0, sizeof(cc->gpr)); memset(cc->gpr, 0, sizeof(cc->gpr));
#if CCALL_NUM_FPR #if CCALL_NUM_FPR
@ -1077,7 +1217,11 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
if (isfp && d->size == sizeof(float)) if (isfp && d->size == sizeof(float))
((float *)dp)[1] = ((float *)dp)[0]; /* Floats occupy high slot. */ ((float *)dp)[1] = ((float *)dp)[0]; /* Floats occupy high slot. */
#endif #endif
#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) #if LJ_TARGET_RISCV64
if (isfp && d->size == sizeof(float))
((uint32_t *)dp)[1] = 0xffffffffu; /* Float NaN boxing */
#endif
#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64
if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info)
#if LJ_TARGET_MIPS64 #if LJ_TARGET_MIPS64
|| (isfp && nsp == 0) || (isfp && nsp == 0)
@ -1107,6 +1251,14 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
CTSize i = (sz >> 2) - 1; CTSize i = (sz >> 2) - 1;
do { ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; } while (i--); do { ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; } while (i--);
} }
#elif LJ_TARGET_RISCV64
if (isfp == 2 && nff <= 2) {
/* Split complex float into separate registers. */
CTSize i = (sz >> 2) - 1;
do {
((uint64_t *)dp)[i] = 0xffffffff00000000ul | ((uint32_t *)dp)[i];
} while (i--);
}
#else #else
UNUSED(isfp); UNUSED(isfp);
#endif #endif
@ -1116,7 +1268,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
if ((int32_t)nsp < 0) nsp = 0; if ((int32_t)nsp < 0) nsp = 0;
#endif #endif
#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) #if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) || LJ_TARGET_RISCV64
cc->nfpr = nfpr; /* Required for vararg functions. */ cc->nfpr = nfpr; /* Required for vararg functions. */
#endif #endif
cc->nsp = (nsp + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1); cc->nsp = (nsp + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1);

View File

@ -129,6 +129,21 @@ typedef union FPRArg {
struct { LJ_ENDIAN_LOHI(float f; , float g;) }; struct { LJ_ENDIAN_LOHI(float f; , float g;) };
} FPRArg; } FPRArg;
#elif LJ_TARGET_RISCV64
#define CCALL_NARG_GPR 8
#define CCALL_NARG_FPR 8
#define CCALL_NRET_GPR 2
#define CCALL_NRET_FPR 2
#define CCALL_SPS_EXTRA 3
#define CCALL_SPS_FREE 1
typedef intptr_t GPRArg;
typedef union FPRArg {
double d;
struct { LJ_ENDIAN_LOHI(float f; , float g;) };
} FPRArg;
#else #else
#error "Missing calling convention definitions for this architecture" #error "Missing calling convention definitions for this architecture"
#endif #endif
@ -175,7 +190,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */ uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */
#elif LJ_TARGET_ARM64 #elif LJ_TARGET_ARM64
void *retp; /* Aggregate return pointer in x8. */ void *retp; /* Aggregate return pointer in x8. */
#elif LJ_TARGET_PPC #elif LJ_TARGET_PPC || LJ_TARGET_RISCV64
uint8_t nfpr; /* Number of arguments in FPRs. */ uint8_t nfpr; /* Number of arguments in FPRs. */
#endif #endif
#if LJ_32 #if LJ_32

View File

@ -71,6 +71,10 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
#define CALLBACK_MCODE_HEAD 52 #define CALLBACK_MCODE_HEAD 52
#elif LJ_TARGET_RISCV64
#define CALLBACK_MCODE_HEAD 68
#else #else
/* Missing support for this architecture. */ /* Missing support for this architecture. */
@ -238,6 +242,39 @@ static void *callback_mcode_init(global_State *g, uint32_t *page)
} }
return p; return p;
} }
#elif LJ_TARGET_RISCV64
static void *callback_mcode_init(global_State *g, uint32_t *page)
{
uint32_t *p = page;
uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback;
uintptr_t ug = (uintptr_t)(void *)g;
uintptr_t target_hi = (target >> 32), target_lo = target & 0xffffffffULL;
uintptr_t ug_hi = (ug >> 32), ug_lo = ug & 0xffffffffULL;
MSize slot;
*p++ = RISCVI_LUI | RISCVF_D(RID_X6) | RISCVF_IMMU(RISCVF_HI(target_hi));
*p++ = RISCVI_LUI | RISCVF_D(RID_X7) | RISCVF_IMMU(RISCVF_HI(ug_hi));
*p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(RISCVF_LO(target_hi));
*p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(RISCVF_LO(ug_hi));
*p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(11);
*p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(11);
*p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(target_lo >> 21);
*p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(ug_lo >> 21);
*p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(11);
*p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(11);
*p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI((target_lo >> 10) & 0x7ff);
*p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI((ug_lo >> 10) & 0x7ff);
*p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(10);
*p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(10);
*p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(target_lo & 0x3ff);
*p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(ug_lo & 0x3ff);
*p++ = RISCVI_JALR | RISCVF_D(RID_X0) | RISCVF_S1(RID_X6) | RISCVF_IMMJ(0);
for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
*p++ = RISCVI_LUI | RISCVF_D(RID_X5) | RISCVF_IMMU(slot);
*p = RISCVI_JAL | RISCVF_IMMJ(((char *)page-(char *)p));
p++;
}
return p;
}
#else #else
/* Missing support for this architecture. */ /* Missing support for this architecture. */
#define callback_mcode_init(g, p) (p) #define callback_mcode_init(g, p) (p)
@ -512,6 +549,31 @@ void lj_ccallback_mcode_free(CTState *cts)
} }
#endif #endif
#define CALLBACK_HANDLE_RET \
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
((float *)dp)[1] = *(float *)dp;
#elif LJ_TARGET_RISCV64
#define CALLBACK_HANDLE_REGARG \
if (isfp) { \
if (nfpr + n <= CCALL_NARG_FPR) { \
sp = &cts->cb.fpr[nfpr]; \
nfpr += n; \
goto done; \
} else if (ngpr + n <= maxgpr) { \
sp = &cts->cb.gpr[ngpr]; \
ngpr += n; \
goto done; \
} \
} else { \
if (ngpr + n <= maxgpr) { \
sp = &cts->cb.gpr[ngpr]; \
ngpr += n; \
goto done; \
} \
}
#define CALLBACK_HANDLE_RET \ #define CALLBACK_HANDLE_RET \
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
((float *)dp)[1] = *(float *)dp; ((float *)dp)[1] = *(float *)dp;
@ -662,7 +724,7 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
*(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp : *(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp :
(int32_t)*(int16_t *)dp; (int32_t)*(int16_t *)dp;
} }
#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) #if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64
/* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */ /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */
if (ctr->size <= 4 && if (ctr->size <= 4 &&
(LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info))) (LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info)))

View File

@ -1,6 +1,6 @@
/* /*
** Definitions for RISC-V CPUs. ** Definitions for RISC-V CPUs.
** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
*/ */
#ifndef _LJ_TARGET_RISCV_H #ifndef _LJ_TARGET_RISCV_H

View File

@ -812,14 +812,29 @@ static void build_subroutines(BuildCtx *ctx)
| mv BASE, TMP2 // Restore caller BASE. | mv BASE, TMP2 // Restore caller BASE.
| ld LFUNC:TMP1, FRAME_FUNC(TMP2) | ld LFUNC:TMP1, FRAME_FUNC(TMP2)
| ld PC, -24(RB) // Restore PC from [cont|PC]. | ld PC, -24(RB) // Restore PC from [cont|PC].
|.if FFI
| sltiu TMP3, TMP0, 2
|.endif
| cleartp LFUNC:TMP1 | cleartp LFUNC:TMP1
| add TMP2, RA, RD | add TMP2, RA, RD
| ld TMP1, LFUNC:TMP1->pc | ld TMP1, LFUNC:TMP1->pc
| sd TISNIL, -8(TMP2) // Ensure one valid arg. | sd TISNIL, -8(TMP2) // Ensure one valid arg.
|.if FFI
| bnez TMP3, >1
|.endif
| // BASE = base, RA = resultptr, RB = meta base | // BASE = base, RA = resultptr, RB = meta base
| ld KBASE, PC2PROTO(k)(TMP1) | ld KBASE, PC2PROTO(k)(TMP1)
| jr TMP0 // Jump to continuation. | jr TMP0 // Jump to continuation.
| |
|.if FFI
|1:
| addi TMP1, RB, -32
| bxnez TMP0, ->cont_ffi_callback // cont = 1: return from FFI callback.
| // cont = 0: tailcall from C function.
| sub RC, TMP1, BASE
| j ->vm_call_tail
|.endif
|
|->cont_cat: // RA = resultptr, RB = meta base |->cont_cat: // RA = resultptr, RB = meta base
| lw INS, -4(PC) | lw INS, -4(PC)
| addi CARG2, RB, -32 | addi CARG2, RB, -32
@ -1019,6 +1034,18 @@ static void build_subroutines(BuildCtx *ctx)
| // Returns 0/1 or TValue * (metamethod). | // Returns 0/1 or TValue * (metamethod).
| j <3 | j <3
| |
|->vmeta_equal_cd:
|.if FFI
| addi PC, PC, -4
| mv CARG1, L
| mv CARG2, INS
| sd BASE, L->base
| sd PC, SAVE_PC(sp)
| call_intern vmeta_equal_cd, lj_meta_equal_cd // (lua_State *L, BCIns op)
| // Returns 0/1 or TValue * (metamethod).
| j <3
|.endif
|
|->vmeta_istype: |->vmeta_istype:
| addi PC, PC, -4 | addi PC, PC, -4
| sd BASE, L->base | sd BASE, L->base
@ -2219,6 +2246,133 @@ static void build_subroutines(BuildCtx *ctx)
|.endif |.endif
| |
|//----------------------------------------------------------------------- |//-----------------------------------------------------------------------
|//-- FFI helper functions -----------------------------------------------
|//-----------------------------------------------------------------------
|
|// Handler for callback functions. Callback slot number in x5, g in x7.
|->vm_ffi_callback:
|.if FFI
|.type CTSTATE, CTState, PC
| saveregs
| ld CTSTATE, GL:x7->ctype_state
| mv GL, x7
| addxi DISPATCH, x7, GG_G2DISP
| srli x5, x5, 12
| sw x5, CTSTATE->cb.slot
| sd CARG1, CTSTATE->cb.gpr[0]
| fsd FARG1, CTSTATE->cb.fpr[0]
| sd CARG2, CTSTATE->cb.gpr[1]
| fsd FARG2, CTSTATE->cb.fpr[1]
| sd CARG3, CTSTATE->cb.gpr[2]
| fsd FARG3, CTSTATE->cb.fpr[2]
| sd CARG4, CTSTATE->cb.gpr[3]
| fsd FARG4, CTSTATE->cb.fpr[3]
| sd CARG5, CTSTATE->cb.gpr[4]
| fsd FARG5, CTSTATE->cb.fpr[4]
| sd CARG6, CTSTATE->cb.gpr[5]
| fsd FARG6, CTSTATE->cb.fpr[5]
| sd CARG7, CTSTATE->cb.gpr[6]
| fsd FARG7, CTSTATE->cb.fpr[6]
| sd CARG8, CTSTATE->cb.gpr[7]
| fsd FARG8, CTSTATE->cb.fpr[7]
| addi TMP0, sp, CFRAME_SPACE
| sd TMP0, CTSTATE->cb.stack
| sd x0, SAVE_PC(sp) // Any value outside of bytecode is ok.
| mv CARG1, CTSTATE
| mv CARG2, sp
| call_intern vm_ffi_callback, lj_ccallback_enter // (CTState *cts, void *cf)
| // Returns lua_State *.
| ld BASE, L:CRET1->base
| ld RC, L:CRET1->top
| mv L, CRET1
| lui TMP3, 0x43380 // TOBIT = Hiword of 2^52 + 2^51 (double).
| ld LFUNC:RB, FRAME_FUNC(BASE)
| li TISNIL, LJ_TNIL
| li TISNUM, LJ_TISNUM
| slli TMP3, TMP3, 32
| li_vmstate INTERP
| subw RC, RC, BASE
| cleartp LFUNC:RB
| st_vmstate
| fmv.d.x TOBIT, TMP3
| ins_callt
|.endif
|
|->cont_ffi_callback: // Return from FFI callback.
|.if FFI
| ld CTSTATE, GL->ctype_state
| sd BASE, L->base
| sd RB, L->top
| sd L, CTSTATE->L
| mv CARG1, CTSTATE
| mv CARG2, RA
| // (CTState *cts, TValue *o)
| call_intern cont_ffi_callback, lj_ccallback_leave
| fld FRET1, CTSTATE->cb.fpr[0]
| ld CRET1, CTSTATE->cb.gpr[0]
| fld FRET2, CTSTATE->cb.fpr[1]
| ld CRET2, CTSTATE->cb.gpr[1]
| j ->vm_leave_unw
|.endif
|
|->vm_ffi_call: // Call C function via FFI.
| // Caveat: needs special frame unwinding, see below.
|.if FFI
| .type CCSTATE, CCallState, CARG1
| lw TMP1, CCSTATE->spadj
| lbu CARG2, CCSTATE->nsp
| lbu CARG3, CCSTATE->nfpr
| mv TMP2, sp
| sub sp, sp, TMP1
| sd ra, -8(TMP2)
| sd x18, -16(TMP2)
| sd CCSTATE, -24(TMP2)
| mv x18, TMP2
| addi TMP1, CCSTATE, offsetof(CCallState, stack)
| mv TMP2, sp
| add TMP3, TMP1, CARG2
| beqz CARG2, >2
|1:
| ld TMP0, 0(TMP1)
| addi TMP1, TMP1, 8
| sd TMP0, 0(TMP2)
| addi TMP2, TMP2, 8
| bltu TMP1, TMP3, <1
|2:
| beqz CARG3, >3
| fld FARG1, CCSTATE->fpr[0]
| fld FARG2, CCSTATE->fpr[1]
| fld FARG3, CCSTATE->fpr[2]
| fld FARG4, CCSTATE->fpr[3]
| fld FARG5, CCSTATE->fpr[4]
| fld FARG6, CCSTATE->fpr[5]
| fld FARG7, CCSTATE->fpr[6]
| fld FARG8, CCSTATE->fpr[7]
|3:
| ld CFUNCADDR, CCSTATE->func
| ld CARG2, CCSTATE->gpr[1]
| ld CARG3, CCSTATE->gpr[2]
| ld CARG4, CCSTATE->gpr[3]
| ld CARG5, CCSTATE->gpr[4]
| ld CARG6, CCSTATE->gpr[5]
| ld CARG7, CCSTATE->gpr[6]
| ld CARG8, CCSTATE->gpr[7]
| ld CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1.
| jalr CFUNCADDR
| ld CCSTATE:TMP1, -24(x18)
| ld TMP0, -16(x18)
| ld ra, -8(x18)
| sd CRET1, CCSTATE:TMP1->gpr[0]
| sd CRET2, CCSTATE:TMP1->gpr[1]
| fsd FRET1, CCSTATE:TMP1->fpr[0]
| fsd FRET2, CCSTATE:TMP1->fpr[1]
| mv sp, x18
| mv x18, TMP0
| ret
|.endif
|// Note: vm_ffi_call must be the last function in this object file!
|
|//-----------------------------------------------------------------------
} }
/* Generate the code for a single instruction. */ /* Generate the code for a single instruction. */
@ -2343,6 +2497,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| beqz TMP0, ->BC_ISNEN_Z | beqz TMP0, ->BC_ISNEN_Z
} }
|// Either or both types are not numbers. |// Either or both types are not numbers.
|.if FFI
| // Check if RA or RD is a cdata.
| xori TMP0, CARG3, LJ_TCDATA
| xori TMP1, CARG4, LJ_TCDATA
| and TMP0, TMP0, TMP1
| bxeqz TMP0, ->vmeta_equal_cd
|.endif
| lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4
| decode_BC4b TMP2 | decode_BC4b TMP2
| addw TMP2, TMP2, TMP3 // (jump-0x8000)<<2 | addw TMP2, TMP2, TMP3 // (jump-0x8000)<<2
@ -2395,10 +2556,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| sub RD, KBASE, RD | sub RD, KBASE, RD
| lhu TMP2, -4+OFS_RD(PC) | lhu TMP2, -4+OFS_RD(PC)
| ld CARG2, -8(RD) // KBASE-8-str_const*8 | ld CARG2, -8(RD) // KBASE-8-str_const*8
|.if FFI
| gettp CARG3, CARG1
| li TMP1, LJ_TCDATA
|.endif
| li TMP0, LJ_TSTR | li TMP0, LJ_TSTR
| decode_BC4b TMP2 | decode_BC4b TMP2
| settp CARG2, TMP0 | settp CARG2, TMP0
| lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4
|.if FFI
| bxeq CARG3, TMP1, ->vmeta_equal_cd
|.endif
| xor TMP0, CARG1, CARG2 // TMP2=0: A==D; TMP2!=0: A!=D | xor TMP0, CARG1, CARG2 // TMP2=0: A==D; TMP2!=0: A!=D
| addw TMP2, TMP2, TMP3 | addw TMP2, TMP2, TMP3
if (vk) { if (vk) {
@ -2453,7 +2621,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| |
|4: // RA is not an integer. |4: // RA is not an integer.
| addw TMP2, TMP2, TMP3 | addw TMP2, TMP2, TMP3
|.if FFI
| bgeu CARG3, TISNUM, >7
|.else
| bgeu CARG3, TISNUM, <2 | bgeu CARG3, TISNUM, <2
|.endif
| fmv.d.x FTMP0, CARG1 | fmv.d.x FTMP0, CARG1
| fmv.d.x FTMP2, CARG2 | fmv.d.x FTMP2, CARG2
| bne CARG4, TISNUM, >5 | bne CARG4, TISNUM, >5
@ -2466,11 +2638,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| j <1 | j <1
| |
|6: // RA is an integer, RD is a number. |6: // RA is an integer, RD is a number.
|.if FFI
| bgeu CARG4, TISNUM, >8
|.else
| bgeu CARG4, TISNUM, <2 | bgeu CARG4, TISNUM, <2
|.endif
| fcvt.d.w FTMP0, CARG1 | fcvt.d.w FTMP0, CARG1
| fmv.d.x FTMP2, CARG2 | fmv.d.x FTMP2, CARG2
| j <5 | j <5
| |
|.if FFI
|7: // RA not int, not number
| li TMP0, LJ_TCDATA
| bne CARG3, TMP0, <2
| j ->vmeta_equal_cd
|
|8: // RD not int, not number
| li TMP0, LJ_TCDATA
| bne CARG4, TMP0, <2
| j ->vmeta_equal_cd
|.endif
break; break;
case BC_ISEQP: case BC_ISNEP: case BC_ISEQP: case BC_ISNEP:
@ -2484,6 +2671,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| gettp TMP1, TMP1 | gettp TMP1, TMP1
| addi PC, PC, 4 | addi PC, PC, 4
| xor TMP0, TMP1, TMP0 // TMP0=0 A=D; TMP0!=0 A!=D | xor TMP0, TMP1, TMP0 // TMP0=0 A=D; TMP0!=0 A!=D
|.if FFI
| li TMP3, LJ_TCDATA
| bxeq TMP1, TMP3, ->vmeta_equal_cd
|.endif
| decode_BC4b TMP2 | decode_BC4b TMP2
| lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4
| addw TMP2, TMP2, TMP3 // TMP2=(jump-0x8000)<<2 | addw TMP2, TMP2, TMP3 // TMP2=(jump-0x8000)<<2
@ -2824,6 +3015,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| ins_next | ins_next
break; break;
case BC_KCDATA: case BC_KCDATA:
|.if FFI
| // RA = dst*8, RD = cdata_const*8 (~)
| sub TMP1, KBASE, RD
| ld TMP0, -8(TMP1) // KBASE-8-cdata_const*8
| li TMP2, LJ_TCDATA
| add RA, BASE, RA
| settp TMP0, TMP2
| sd TMP0, 0(RA)
| ins_next
|.endif
break; break;
case BC_KSHORT: case BC_KSHORT:
| // RA = dst*8, RD = int16_literal*8 | // RA = dst*8, RD = int16_literal*8