Add partial FFI support.

Interestingly, enough to pass all the FFI tests. So s390x now
passes all the tests in LuaJIT-test-cleanup.
This commit is contained in:
Michael Munday 2017-01-06 16:19:56 -05:00
parent 5dec8c2211
commit bb98985db3
8 changed files with 139 additions and 136 deletions

View File

@ -56,7 +56,7 @@ CCOPT_mips=
#
CCDEBUG=
# Uncomment the next line to generate debug information:
CCDEBUG= -g -O0
#CCDEBUG= -g
#
CCWARN= -Wall
# Uncomment the next line to enable more warnings:

View File

@ -370,7 +370,6 @@
#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
#define LJ_TARGET_GC64 1
#define LJ_ARCH_NOJIT 1 /* NYI */
#define LJ_ARCH_NOFFI 1 /* Disable FFI for now. */
#else
#error "No target architecture defined"

View File

@ -555,6 +555,41 @@
goto done; \
}
#elif LJ_TARGET_S390X
/* -- POSIX/s390x calling conventions --------------------------------------- */
#define CCALL_HANDLE_STRUCTRET \
/* Return structs of size 1, 2, 4 or 8 in a GPR. */ \
cc->retref = !(sz == 1 || sz == 2 || sz == 4 || sz == 8); \
if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp;
#define CCALL_HANDLE_COMPLEXRET CCALL_HANDLE_STRUCTRET
#define CCALL_HANDLE_COMPLEXRET2 \
if (!cc->retref) \
*(int64_t *)dp = *(int64_t *)sp; /* Copy complex float from GPRs. */
#define CCALL_HANDLE_STRUCTARG \
/* Pass structs of size 1, 2, 4 or 8 in a GPR by value. */ \
if (!(sz == 1 || sz == 2 || sz == 4 || sz == 8)) { \
rp = cdataptr(lj_cdata_new(cts, did, sz)); \
sz = CTSIZE_PTR; /* Pass all other structs by reference. */ \
}
#define CCALL_HANDLE_COMPLEXARG \
/* Pass complex float in a GPR and complex double by reference. */ \
if (sz != 2*sizeof(float)) { \
rp = cdataptr(lj_cdata_new(cts, did, sz)); \
sz = CTSIZE_PTR; \
}
#define CCALL_HANDLE_REGARG \
if (isfp) { \
if (nfpr < maxgpr) { dp = &cc->fpr[nfpr++]; goto done; } \
} else { \
if (ngpr < CCALL_NARG_FPR) { dp = &cc->gpr[ngpr++]; goto done; } \
}
#else
#error "Missing calling convention definitions for this architecture"
#endif

View File

@ -136,7 +136,11 @@ typedef union FPRArg {
#define CCALL_SPS_FREE 0
typedef intptr_t GPRArg;
typedef double FPRArg;
typedef union FPRArg {
double d;
float f;
} FPRArg;
#else
#error "Missing calling convention definitions for this architecture"
#endif

View File

@ -495,6 +495,15 @@ void lj_ccallback_mcode_free(CTState *cts)
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
((float *)dp)[1] = *(float *)dp;
#elif LJ_TARGET_S390X
#define CALLBACK_HANDLE_REGARG \
if (isfp) { \
if (nfpr < maxgpr) { sp = &cts->cb.fpr[nfpr++]; goto done; } \
} else { \
if (ngpr < CCALL_NARG_FPR) { sp = &cts->cb.gpr[ngpr++]; goto done; } \
}
#else
#error "Missing calling convention definitions for this architecture"
#endif

View File

@ -144,6 +144,8 @@ typedef uint32_t RegCost;
#include "lj_target_ppc.h"
#elif LJ_TARGET_MIPS
#include "lj_target_mips.h"
#elif LJ_TARGET_S390X
#include "lj_target_s390x.h"
#else
#error "Missing include for target CPU"
#endif

View File

@ -1,26 +1,22 @@
/*
** Definitions for S390 CPUs.
** Definitions for IBM z/Architecture (s390x) CPUs.
** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_TARGET_S390_H
#define _LJ_TARGET_S390_H
#ifndef _LJ_TARGET_S390X_H
#define _LJ_TARGET_S390X_H
/* -- Registers IDs ------------------------------------------------------- */
#define GPRDEF(_) \
_(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \
_(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _R(15) \
#if LJ_SOFTFP
#define FPRDEF(_)
#else
_(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15)
#define FPRDEF(_) \
_(F0) _(F1) _(F2) _(F3) \
_(F4) _(F5) _(F6) _(F7) \
_(F8) _(F9) _(F10) _(F11) \
_(F12) _(F13) _(F14) _(F15)
#endif
#define VRIDDEF(_)
// TODO: VREG?
#define RIDENUM(name) RID_##name,
@ -28,84 +24,28 @@ enum {
GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */
FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
RID_MAX,
RID_TMP = RID_LR,
/* Calling conventions. */
RID_RET = RID_R0,
RID_RETLO = RID_R0,
RID_RETHI = RID_R1,
#if LJ_SOFTFP
RID_FPRET = RID_R0,
#else
RID_FPRET = RID_D0,
#endif
RID_SP = RID_R15,
RID_RET = RID_R2,
RID_FPRET = RID_F0,
/* These definitions must match with the *.dasc file(s): */
RID_BASE = RID_R9, /* Interpreter BASE. */
RID_LPC = RID_R6, /* Interpreter PC. */
RID_DISPATCH = RID_R7, /* Interpreter DISPATCH table. */
RID_LREG = RID_R8, /* Interpreter L. */
RID_BASE = RID_R7, /* Interpreter BASE. */
RID_LPC = RID_R9, /* Interpreter PC. */
RID_DISPATCH = RID_R10, /* Interpreter DISPATCH table. */
/* Register ranges [min, max) and number of registers. */
RID_MIN_GPR = RID_R0,
RID_MAX_GPR = RID_PC+1,
RID_MIN_FPR = RID_MAX_GPR,
#if LJ_SOFTFP
RID_MAX_FPR = RID_MIN_FPR,
#else
RID_MAX_FPR = RID_D15+1,
#endif
RID_MIN_FPR = RID_F0,
RID_MAX_GPR = RID_MIN_FPR,
RID_MAX_FPR = RID_MAX,
RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR
RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR,
};
#define RID_NUM_KREF RID_NUM_GPR
#define RID_MIN_KREF RID_R0
/* -- Register sets ------------------------------------------------------- */
/* Make use of all registers, except sp, lr and pc. */
#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_R12+1))
#define RSET_GPREVEN \
(RID2RSET(RID_R0)|RID2RSET(RID_R2)|RID2RSET(RID_R4)|RID2RSET(RID_R6)| \
RID2RSET(RID_R8)|RID2RSET(RID_R10))
#define RSET_GPRODD \
(RID2RSET(RID_R1)|RID2RSET(RID_R3)|RID2RSET(RID_R5)|RID2RSET(RID_R7)| \
RID2RSET(RID_R9)|RID2RSET(RID_R11))
#if LJ_SOFTFP
#define RSET_FPR 0
#else
#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR))
#endif
#define RSET_ALL (RSET_GPR|RSET_FPR)
#define RSET_INIT RSET_ALL
/* ABI-specific register sets. lr is an implicit scratch register. */
#define RSET_SCRATCH_GPR_ (RSET_RANGE(RID_R0, RID_R3+1)|RID2RSET(RID_R12))
#ifdef __APPLE__
#define RSET_SCRATCH_GPR (RSET_SCRATCH_GPR_|RID2RSET(RID_R9))
#else
#define RSET_SCRATCH_GPR RSET_SCRATCH_GPR_
#endif
#if LJ_SOFTFP
#define RSET_SCRATCH_FPR 0
#else
#define RSET_SCRATCH_FPR (RSET_RANGE(RID_D0, RID_D7+1))
#endif
#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
#define REGARG_FIRSTGPR RID_R0
#define REGARG_LASTGPR RID_R3
#define REGARG_NUMGPR 4
#if LJ_ABI_SOFTFP
#define REGARG_FIRSTFPR 0
#define REGARG_LASTFPR 0
#define REGARG_NUMFPR 0
#else
#define REGARG_FIRSTFPR RID_D0
#define REGARG_LASTFPR RID_D7
#define REGARG_NUMFPR 8
#endif
/* -- Spill slots --------------------------------------------------------- */
/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
@ -127,63 +67,14 @@ enum {
/* This definition must match with the *.dasc file(s). */
typedef struct {
#if !LJ_SOFTFP
lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
#endif
int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
int32_t spill[256]; /* Spill slots. */
} ExitState;
/* PC after instruction that caused an exit. Used to find the trace number. */
#define EXITSTATE_PCREG RID_PC
/* Highest exit + 1 indicates stack check. */
#define EXITSTATE_CHECKEXIT 1
#define EXITSTUB_SPACING 4
#define EXITSTUBS_PER_GROUP 32
/* -- Instructions -------------------------------------------------------- */
/* Instruction fields. */
#define ARMF_CC(ai, cc) (((ai) ^ ARMI_CCAL) | ((cc) << 28))
#define ARMF_N(r) ((r) << 16)
#define ARMF_D(r) ((r) << 12)
#define ARMF_S(r) ((r) << 8)
#define ARMF_M(r) (r)
#define ARMF_SH(sh, n) (((sh) << 5) | ((n) << 7))
#define ARMF_RSH(sh, r) (0x10 | ((sh) << 5) | ARMF_S(r))
typedef enum S390xIns {
S390I_SR = 0x1B00000000000000,
S390I_AR = 0x1A00000000000000,
S390I_NR = 0x1400000000000000,
S390I_XR = 0x1700000000000000,
S390I_MR = 0x1C00000000000000,
S390I_LR = 0x1800000000000000,
S390I_C = 0x5900000000000000,
S390I_LH = 0x4800000000000000,
S390I_BASR = 0x0D00000000000000,
S390I_MVCL = 0x0e00000000000000,
S390I_ST = 0x5000000000000000,
S390I_TM = 0x9100000000000000,
S390I_MP = 0xbd00009000000000,
S390I_CLR = 0x1500000000000000,
} S390xIns;
typedef enum S390xShift {
S390SH_SLL, S390SH_SRL, S390SH_SRA
} S390xShift;
/* S390x condition codes. */
typedef enum S390xCC {
/* Z- Zero , LZ - Less thena Zero , GZ - Greater than Zero
O - Overflow , NZ - Not Zero , ZC - Zero with carry
NZC - No Zero with carry , ZNC - Zero with No Carry
EQ - Equal , NE - Not Equal , LO - Loq , HI - High
*/
CC_Z , CC_LZ , CC_GZ , CC_O ,
CC_NZ , CC_ZC , CC_NZC ,
CC_ZNC , CC_EQ , CC_NE , CC_LO , CC_HI
} S390xCC;
#endif

View File

@ -628,7 +628,6 @@ static void build_subroutines(BuildCtx *ctx)
| lg PC, -24(RB) // Restore PC from [cont|PC].
| lg RA, -32(RB)
|.if FFI
| stg r0, 0(r0) // TODO: remove once tested.
| clfi RA, 1
| jle >1
|.endif
@ -641,13 +640,12 @@ static void build_subroutines(BuildCtx *ctx)
|
|.if FFI
|1:
| stg r0, 0(r0) // TODO: remove once tested.
| je ->cont_ffi_callback // cont = 1: return from FFI callback.
| // cont = 0: Tail call from C function.
| sgr RB, BASE
| srl RB, 3(r0)
| ahi RB, -3
| llgf RD, RB
| llgfr RD, RB
| j ->vm_call_tail
|.endif
|
@ -880,8 +878,17 @@ static void build_subroutines(BuildCtx *ctx)
| j <3
|
|->vmeta_equal_cd:
| stg r0, 0(r0)
| stg r0, 0(r0)
|.if FFI
| lay PC, -4(PC)
| lg L:RB, SAVE_L
| stg BASE, L:RB->base
| lgr CARG1, L:RB
| llgf CARG2, -4(PC)
| stg PC, SAVE_PC
| brasl r14, extern lj_meta_equal_cd // (lua_State *L, BCIns ins)
| // 0/1 or TValue * (metamethod) returned in r2 (CRET1).
| j <3
|.endif
|
|->vmeta_istype:
| lg L:RB, SAVE_L
@ -2165,9 +2172,58 @@ static void build_subroutines(BuildCtx *ctx)
| stg r0, 0(r0)
|
|->vm_ffi_call: // Call C function via FFI.
| // Caveat: needs special frame unwinding, see below.
|.if FFI
| .type CCSTATE, CCallState, r10
| stmg r6, r15, 48(sp) // TODO: need to save r6, but might be better in separate store?
| lgr CCSTATE, CARG1
|
| // Readjust stack.
| sgf sp, CCSTATE->spadj
|
| // Copy stack slots.
| llgc r0, CCSTATE->nsp
| cghi r0, 0
| jle >3
| lay r1, (offsetof(CCallState, stack))(CCSTATE) // Source.
| lay r11, (CCALL_SPS_EXTRA*8)(sp) // Destination.
|1:
| cghi r0, 256
| jl >2
| mvc 0(256, r11), 0(r1)
| aghi r1, 256*8
| aghi r11, 256*8
| aghi r0, -256
| j <1
|2:
| cghi r0, 0
| je >3
| // TODO: exrl mvc rather than loop.
| mvc 0(8, r11), 0(r1)
| aghi r1, 8
| aghi r11, 8
| aghi r0, -1
| j <2
|3:
|
| lmg CARG1, CARG5, CCSTATE->gpr[0]
| // TODO: conditionally load FPRs?
| ld FARG1, CCSTATE->fpr[0]
| ld FARG2, CCSTATE->fpr[1]
| ld FARG3, CCSTATE->fpr[2]
| ld FARG4, CCSTATE->fpr[3]
|5:
| lg r1, CCSTATE->func // TODO: move further up?
| basr r14, r1
|
| stg CRET1, CCSTATE->gpr[0]
| stg f0, CCSTATE->fpr[0]
|
| agf sp, CCSTATE->spadj
| lmg r6, r15, 48(sp)
| br r14
|.endif
|// Note: vm_ffi_call must be the last function in this object file!
| stg r0, 0(r0)
| stg r0, 0(r0)
|
|//-----------------------------------------------------------------------
}
@ -2767,8 +2823,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| ins_next
break;
case BC_KCDATA:
| stg r0, 0(r0)
| stg r0, 0(r0)
|.if FFI
| ins_AND // RA = dst, RD = cdata const (~)
| sllg RD, RD, 3(r0)
| sllg RA, RA, 3(r0)
| lg RD, 0(RD, KBASE)
| settp RD, LJ_TCDATA
| stg RD, 0(RA, BASE)
| ins_next
|.endif
break;
case BC_KSHORT:
| ins_AD // RA = dst, RD = signed int16 literal