From bb98985db319889bd0350df82d1b0014d7015d18 Mon Sep 17 00:00:00 2001 From: Michael Munday Date: Fri, 6 Jan 2017 16:19:56 -0500 Subject: [PATCH] Add partial FFI support. Interestingly, enough to pass all the FFI tests. So s390x now passes all the tests in LuaJIT-test-cleanup. --- src/Makefile | 2 +- src/lj_arch.h | 1 - src/lj_ccall.c | 35 +++++++++++ src/lj_ccall.h | 6 +- src/lj_ccallback.c | 9 +++ src/lj_target.h | 2 + src/lj_target_s390x.h | 139 +++++------------------------------------- src/vm_s390x.dasc | 81 +++++++++++++++++++++--- 8 files changed, 139 insertions(+), 136 deletions(-) diff --git a/src/Makefile b/src/Makefile index 8ecd6183..1450adc0 100644 --- a/src/Makefile +++ b/src/Makefile @@ -56,7 +56,7 @@ CCOPT_mips= # CCDEBUG= # Uncomment the next line to generate debug information: -CCDEBUG= -g -O0 +#CCDEBUG= -g # CCWARN= -Wall # Uncomment the next line to enable more warnings: diff --git a/src/lj_arch.h b/src/lj_arch.h index 81f4873e..d17884e5 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -370,7 +370,6 @@ #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL #define LJ_TARGET_GC64 1 #define LJ_ARCH_NOJIT 1 /* NYI */ -#define LJ_ARCH_NOFFI 1 /* Disable FFI for now. */ #else #error "No target architecture defined" diff --git a/src/lj_ccall.c b/src/lj_ccall.c index b599be33..a6b0a8fd 100644 --- a/src/lj_ccall.c +++ b/src/lj_ccall.c @@ -555,6 +555,41 @@ goto done; \ } +#elif LJ_TARGET_S390X +/* -- POSIX/s390x calling conventions --------------------------------------- */ + +#define CCALL_HANDLE_STRUCTRET \ + /* Return structs of size 1, 2, 4 or 8 in a GPR. */ \ + cc->retref = !(sz == 1 || sz == 2 || sz == 4 || sz == 8); \ + if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp; + +#define CCALL_HANDLE_COMPLEXRET CCALL_HANDLE_STRUCTRET + +#define CCALL_HANDLE_COMPLEXRET2 \ + if (!cc->retref) \ + *(int64_t *)dp = *(int64_t *)sp; /* Copy complex float from GPRs. */ + +#define CCALL_HANDLE_STRUCTARG \ + /* Pass structs of size 1, 2, 4 or 8 in a GPR by value. */ \ + if (!(sz == 1 || sz == 2 || sz == 4 || sz == 8)) { \ + rp = cdataptr(lj_cdata_new(cts, did, sz)); \ + sz = CTSIZE_PTR; /* Pass all other structs by reference. */ \ + } + +#define CCALL_HANDLE_COMPLEXARG \ + /* Pass complex float in a GPR and complex double by reference. */ \ + if (sz != 2*sizeof(float)) { \ + rp = cdataptr(lj_cdata_new(cts, did, sz)); \ + sz = CTSIZE_PTR; \ + } + +#define CCALL_HANDLE_REGARG \ + if (isfp) { \ + if (nfpr < maxgpr) { dp = &cc->fpr[nfpr++]; goto done; } \ + } else { \ + if (ngpr < CCALL_NARG_FPR) { dp = &cc->gpr[ngpr++]; goto done; } \ + } + #else #error "Missing calling convention definitions for this architecture" #endif diff --git a/src/lj_ccall.h b/src/lj_ccall.h index 2a10a5e8..84e7926b 100644 --- a/src/lj_ccall.h +++ b/src/lj_ccall.h @@ -136,7 +136,11 @@ typedef union FPRArg { #define CCALL_SPS_FREE 0 typedef intptr_t GPRArg; -typedef double FPRArg; +typedef union FPRArg { + double d; + float f; +} FPRArg; + #else #error "Missing calling convention definitions for this architecture" #endif diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c index 2ca6406c..78040057 100644 --- a/src/lj_ccallback.c +++ b/src/lj_ccallback.c @@ -495,6 +495,15 @@ void lj_ccallback_mcode_free(CTState *cts) if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ ((float *)dp)[1] = *(float *)dp; +#elif LJ_TARGET_S390X + +#define CALLBACK_HANDLE_REGARG \ + if (isfp) { \ + if (nfpr < maxgpr) { sp = &cts->cb.fpr[nfpr++]; goto done; } \ + } else { \ + if (ngpr < CCALL_NARG_FPR) { sp = &cts->cb.gpr[ngpr++]; goto done; } \ + } + #else #error "Missing calling convention definitions for this architecture" #endif diff --git a/src/lj_target.h b/src/lj_target.h index abea8d5b..467860b8 100644 --- a/src/lj_target.h +++ b/src/lj_target.h @@ -144,6 +144,8 @@ typedef uint32_t RegCost; #include "lj_target_ppc.h" #elif LJ_TARGET_MIPS #include "lj_target_mips.h" +#elif LJ_TARGET_S390X +#include "lj_target_s390x.h" #else #error "Missing include for target CPU" #endif diff --git a/src/lj_target_s390x.h b/src/lj_target_s390x.h index 4e35891a..6e0245fe 100644 --- a/src/lj_target_s390x.h +++ b/src/lj_target_s390x.h @@ -1,26 +1,22 @@ /* -** Definitions for S390 CPUs. +** Definitions for IBM z/Architecture (s390x) CPUs. ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h */ -#ifndef _LJ_TARGET_S390_H -#define _LJ_TARGET_S390_H +#ifndef _LJ_TARGET_S390X_H +#define _LJ_TARGET_S390X_H /* -- Registers IDs ------------------------------------------------------- */ #define GPRDEF(_) \ _(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \ - _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _R(15) \ -#if LJ_SOFTFP -#define FPRDEF(_) -#else + _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15) #define FPRDEF(_) \ _(F0) _(F1) _(F2) _(F3) \ _(F4) _(F5) _(F6) _(F7) \ _(F8) _(F9) _(F10) _(F11) \ _(F12) _(F13) _(F14) _(F15) -#endif -#define VRIDDEF(_) +// TODO: VREG? #define RIDENUM(name) RID_##name, @@ -28,84 +24,28 @@ enum { GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ RID_MAX, - RID_TMP = RID_LR, /* Calling conventions. */ - RID_RET = RID_R0, - RID_RETLO = RID_R0, - RID_RETHI = RID_R1, -#if LJ_SOFTFP - RID_FPRET = RID_R0, -#else - RID_FPRET = RID_D0, -#endif + RID_SP = RID_R15, + RID_RET = RID_R2, + RID_FPRET = RID_F0, /* These definitions must match with the *.dasc file(s): */ - RID_BASE = RID_R9, /* Interpreter BASE. */ - RID_LPC = RID_R6, /* Interpreter PC. */ - RID_DISPATCH = RID_R7, /* Interpreter DISPATCH table. */ - RID_LREG = RID_R8, /* Interpreter L. */ + RID_BASE = RID_R7, /* Interpreter BASE. */ + RID_LPC = RID_R9, /* Interpreter PC. */ + RID_DISPATCH = RID_R10, /* Interpreter DISPATCH table. */ /* Register ranges [min, max) and number of registers. */ RID_MIN_GPR = RID_R0, - RID_MAX_GPR = RID_PC+1, - RID_MIN_FPR = RID_MAX_GPR, -#if LJ_SOFTFP - RID_MAX_FPR = RID_MIN_FPR, -#else - RID_MAX_FPR = RID_D15+1, -#endif + RID_MIN_FPR = RID_F0, + RID_MAX_GPR = RID_MIN_FPR, + RID_MAX_FPR = RID_MAX, RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, - RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR + RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR, }; -#define RID_NUM_KREF RID_NUM_GPR -#define RID_MIN_KREF RID_R0 - /* -- Register sets ------------------------------------------------------- */ -/* Make use of all registers, except sp, lr and pc. */ -#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_R12+1)) -#define RSET_GPREVEN \ - (RID2RSET(RID_R0)|RID2RSET(RID_R2)|RID2RSET(RID_R4)|RID2RSET(RID_R6)| \ - RID2RSET(RID_R8)|RID2RSET(RID_R10)) -#define RSET_GPRODD \ - (RID2RSET(RID_R1)|RID2RSET(RID_R3)|RID2RSET(RID_R5)|RID2RSET(RID_R7)| \ - RID2RSET(RID_R9)|RID2RSET(RID_R11)) -#if LJ_SOFTFP -#define RSET_FPR 0 -#else -#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) -#endif -#define RSET_ALL (RSET_GPR|RSET_FPR) -#define RSET_INIT RSET_ALL - -/* ABI-specific register sets. lr is an implicit scratch register. */ -#define RSET_SCRATCH_GPR_ (RSET_RANGE(RID_R0, RID_R3+1)|RID2RSET(RID_R12)) -#ifdef __APPLE__ -#define RSET_SCRATCH_GPR (RSET_SCRATCH_GPR_|RID2RSET(RID_R9)) -#else -#define RSET_SCRATCH_GPR RSET_SCRATCH_GPR_ -#endif -#if LJ_SOFTFP -#define RSET_SCRATCH_FPR 0 -#else -#define RSET_SCRATCH_FPR (RSET_RANGE(RID_D0, RID_D7+1)) -#endif -#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) -#define REGARG_FIRSTGPR RID_R0 -#define REGARG_LASTGPR RID_R3 -#define REGARG_NUMGPR 4 -#if LJ_ABI_SOFTFP -#define REGARG_FIRSTFPR 0 -#define REGARG_LASTFPR 0 -#define REGARG_NUMFPR 0 -#else -#define REGARG_FIRSTFPR RID_D0 -#define REGARG_LASTFPR RID_D7 -#define REGARG_NUMFPR 8 -#endif - /* -- Spill slots --------------------------------------------------------- */ /* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. @@ -127,63 +67,14 @@ enum { /* This definition must match with the *.dasc file(s). */ typedef struct { -#if !LJ_SOFTFP lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ -#endif int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ int32_t spill[256]; /* Spill slots. */ } ExitState; -/* PC after instruction that caused an exit. Used to find the trace number. */ -#define EXITSTATE_PCREG RID_PC -/* Highest exit + 1 indicates stack check. */ -#define EXITSTATE_CHECKEXIT 1 - #define EXITSTUB_SPACING 4 #define EXITSTUBS_PER_GROUP 32 /* -- Instructions -------------------------------------------------------- */ -/* Instruction fields. */ -#define ARMF_CC(ai, cc) (((ai) ^ ARMI_CCAL) | ((cc) << 28)) -#define ARMF_N(r) ((r) << 16) -#define ARMF_D(r) ((r) << 12) -#define ARMF_S(r) ((r) << 8) -#define ARMF_M(r) (r) -#define ARMF_SH(sh, n) (((sh) << 5) | ((n) << 7)) -#define ARMF_RSH(sh, r) (0x10 | ((sh) << 5) | ARMF_S(r)) - -typedef enum S390xIns { - S390I_SR = 0x1B00000000000000, - S390I_AR = 0x1A00000000000000, - S390I_NR = 0x1400000000000000, - S390I_XR = 0x1700000000000000, - S390I_MR = 0x1C00000000000000, - S390I_LR = 0x1800000000000000, - S390I_C = 0x5900000000000000, - S390I_LH = 0x4800000000000000, - S390I_BASR = 0x0D00000000000000, - S390I_MVCL = 0x0e00000000000000, - S390I_ST = 0x5000000000000000, - S390I_TM = 0x9100000000000000, - S390I_MP = 0xbd00009000000000, - S390I_CLR = 0x1500000000000000, -} S390xIns; - -typedef enum S390xShift { - S390SH_SLL, S390SH_SRL, S390SH_SRA -} S390xShift; - -/* S390x condition codes. */ -typedef enum S390xCC { - /* Z- Zero , LZ - Less thena Zero , GZ - Greater than Zero - O - Overflow , NZ - Not Zero , ZC - Zero with carry - NZC - No Zero with carry , ZNC - Zero with No Carry - EQ - Equal , NE - Not Equal , LO - Loq , HI - High - */ - CC_Z , CC_LZ , CC_GZ , CC_O , - CC_NZ , CC_ZC , CC_NZC , - CC_ZNC , CC_EQ , CC_NE , CC_LO , CC_HI -} S390xCC; - #endif diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc index 8fa928b7..6ca7e130 100644 --- a/src/vm_s390x.dasc +++ b/src/vm_s390x.dasc @@ -628,7 +628,6 @@ static void build_subroutines(BuildCtx *ctx) | lg PC, -24(RB) // Restore PC from [cont|PC]. | lg RA, -32(RB) |.if FFI - | stg r0, 0(r0) // TODO: remove once tested. | clfi RA, 1 | jle >1 |.endif @@ -641,13 +640,12 @@ static void build_subroutines(BuildCtx *ctx) | |.if FFI |1: - | stg r0, 0(r0) // TODO: remove once tested. | je ->cont_ffi_callback // cont = 1: return from FFI callback. | // cont = 0: Tail call from C function. | sgr RB, BASE | srl RB, 3(r0) | ahi RB, -3 - | llgf RD, RB + | llgfr RD, RB | j ->vm_call_tail |.endif | @@ -880,8 +878,17 @@ static void build_subroutines(BuildCtx *ctx) | j <3 | |->vmeta_equal_cd: - | stg r0, 0(r0) - | stg r0, 0(r0) + |.if FFI + | lay PC, -4(PC) + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + | lgr CARG1, L:RB + | llgf CARG2, -4(PC) + | stg PC, SAVE_PC + | brasl r14, extern lj_meta_equal_cd // (lua_State *L, BCIns ins) + | // 0/1 or TValue * (metamethod) returned in r2 (CRET1). + | j <3 + |.endif | |->vmeta_istype: | lg L:RB, SAVE_L @@ -2165,9 +2172,58 @@ static void build_subroutines(BuildCtx *ctx) | stg r0, 0(r0) | |->vm_ffi_call: // Call C function via FFI. + | // Caveat: needs special frame unwinding, see below. + |.if FFI + | .type CCSTATE, CCallState, r10 + | stmg r6, r15, 48(sp) // TODO: need to save r6, but might be better in separate store? + | lgr CCSTATE, CARG1 + | + | // Readjust stack. + | sgf sp, CCSTATE->spadj + | + | // Copy stack slots. + | llgc r0, CCSTATE->nsp + | cghi r0, 0 + | jle >3 + | lay r1, (offsetof(CCallState, stack))(CCSTATE) // Source. + | lay r11, (CCALL_SPS_EXTRA*8)(sp) // Destination. + |1: + | cghi r0, 256 + | jl >2 + | mvc 0(256, r11), 0(r1) + | aghi r1, 256*8 + | aghi r11, 256*8 + | aghi r0, -256 + | j <1 + |2: + | cghi r0, 0 + | je >3 + | // TODO: exrl mvc rather than loop. + | mvc 0(8, r11), 0(r1) + | aghi r1, 8 + | aghi r11, 8 + | aghi r0, -1 + | j <2 + |3: + | + | lmg CARG1, CARG5, CCSTATE->gpr[0] + | // TODO: conditionally load FPRs? + | ld FARG1, CCSTATE->fpr[0] + | ld FARG2, CCSTATE->fpr[1] + | ld FARG3, CCSTATE->fpr[2] + | ld FARG4, CCSTATE->fpr[3] + |5: + | lg r1, CCSTATE->func // TODO: move further up? + | basr r14, r1 + | + | stg CRET1, CCSTATE->gpr[0] + | stg f0, CCSTATE->fpr[0] + | + | agf sp, CCSTATE->spadj + | lmg r6, r15, 48(sp) + | br r14 + |.endif |// Note: vm_ffi_call must be the last function in this object file! - | stg r0, 0(r0) - | stg r0, 0(r0) | |//----------------------------------------------------------------------- } @@ -2767,8 +2823,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ins_next break; case BC_KCDATA: - | stg r0, 0(r0) - | stg r0, 0(r0) + |.if FFI + | ins_AND // RA = dst, RD = cdata const (~) + | sllg RD, RD, 3(r0) + | sllg RA, RA, 3(r0) + | lg RD, 0(RD, KBASE) + | settp RD, LJ_TCDATA + | stg RD, 0(RA, BASE) + | ins_next + |.endif break; case BC_KSHORT: | ins_AD // RA = dst, RD = signed int16 literal