diff --git a/dynasm/dasm_ppc.lua b/dynasm/dasm_ppc.lua index e2f704ec..1b0f9add 100644 --- a/dynasm/dasm_ppc.lua +++ b/dynasm/dasm_ppc.lua @@ -257,9 +257,11 @@ map_op = { addic_3 = "30000000RRI", ["addic._3"] = "34000000RRI", addi_3 = "38000000RR0I", + addil_3 = "38000000RR0J", li_2 = "38000000RI", la_2 = "38000000RD", addis_3 = "3c000000RR0I", + addisl_3 = "3c000000RR0J", lis_2 = "3c000000RI", lus_2 = "3c000000RU", bc_3 = "40000000AAK", @@ -842,6 +844,9 @@ map_op = { srdi_3 = op_alias("rldicl_4", function(p) p[4] = p[3]; p[3] = "64-("..p[3]..")" end), + ["srdi._3"] = op_alias("rldicl._4", function(p) + p[4] = p[3]; p[3] = "64-("..p[3]..")" + end), clrldi_3 = op_alias("rldicl_4", function(p) p[4] = p[3]; p[3] = "0" end), diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c index 28419c07..5e5025e6 100644 --- a/src/host/buildvm_asm.c +++ b/src/host/buildvm_asm.c @@ -136,18 +136,14 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n, #else #define TOCPREFIX "" #endif - if ((ins >> 26) == 16) { + if ((ins >> 26) == 14) { + fprintf(ctx->fp, "\taddi %d,%d,%s\n", (ins >> 21) & 31, (ins >> 16) & 31, sym); + } else if ((ins >> 26) == 15) { + fprintf(ctx->fp, "\taddis %d,%d,%s\n", (ins >> 21) & 31, (ins >> 16) & 31, sym); + } else if ((ins >> 26) == 16) { fprintf(ctx->fp, "\t%s %d, %d, " TOCPREFIX "%s\n", (ins & 1) ? "bcl" : "bc", (ins >> 21) & 31, (ins >> 16) & 31, sym); } else if ((ins >> 26) == 18) { -#if LJ_ARCH_PPC64 - const char *suffix = strchr(sym, '@'); - if (suffix && suffix[1] == 'h') { - fprintf(ctx->fp, "\taddis 11, 2, %s\n", sym); - } else if (suffix && suffix[1] == 'l') { - fprintf(ctx->fp, "\tld 12, %s\n", sym); - } else -#endif fprintf(ctx->fp, "\t%s " TOCPREFIX "%s\n", (ins & 1) ? "bl" : "b", sym); } else { fprintf(stderr, @@ -245,7 +241,7 @@ void emit_asm(BuildCtx *ctx) int i, rel; fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch); -#if LJ_ARCH_PPC64 +#if LJ_ARCH_PPC_ELFV2 fprintf(ctx->fp, "\t.abiversion 2\n"); #endif fprintf(ctx->fp, "\t.text\n"); diff --git a/src/lj_arch.h b/src/lj_arch.h index 159fd45d..12080e22 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -269,7 +269,6 @@ #elif LJ_ARCH_BITS == 64 #define LJ_ARCH_PPC32ON64 1 #define LJ_ARCH_NOJIT 1 /* NYI */ -#define LJ_ARCH_NOFFI 1 /* NYI */ #if _CALL_ELF == 2 #define LJ_ARCH_PPC_ELFV2 1 #else diff --git a/src/lj_ccall.c b/src/lj_ccall.c index b599be33..ee34b52f 100644 --- a/src/lj_ccall.c +++ b/src/lj_ccall.c @@ -362,6 +362,82 @@ #elif LJ_TARGET_PPC /* -- PPC calling conventions --------------------------------------------- */ +#if LJ_ARCH_BITS == 64 + +#if LJ_ARCH_PPC_ELFV2 + +#define CCALL_HANDLE_STRUCTRET \ + if (sz > 16 && ccall_classify_fp(cts, ctr) <= 0) { \ + cc->retref = 1; /* Return by reference. */ \ + cc->gpr[ngpr++] = (GPRArg)dp; \ + } + +#define CCALL_HANDLE_STRUCTRET2 \ + int isfp = ccall_classify_fp(cts, ctr); \ + int i; \ + if (isfp == FTYPE_FLOAT) { \ + for (i = 0; i < ctr->size / 4; i++) \ + ((float *)dp)[i] = cc->fpr[i]; \ + } else if (isfp == FTYPE_DOUBLE) { \ + for (i = 0; i < ctr->size / 8; i++) \ + ((double *)dp)[i] = cc->fpr[i]; \ + } else { \ + if (ctr->size < 8 && LJ_BE) { \ + sp += 8 - ctr->size; \ + } \ + memcpy(dp, sp, ctr->size); \ + } + +#else + +#define CCALL_HANDLE_STRUCTRET \ + cc->retref = 1; /* Return all structs by reference. */ \ + cc->gpr[ngpr++] = (GPRArg)dp; + +#endif + +#define CCALL_HANDLE_COMPLEXRET \ + /* Complex values are returned in 2 or 4 GPRs. */ \ + cc->retref = 0; + +#define CCALL_HANDLE_STRUCTARG + +#define CCALL_HANDLE_COMPLEXRET2 \ + if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ + ((float *)dp)[0] = cc->fpr[0]; \ + ((float *)dp)[1] = cc->fpr[1]; \ + } else { /* Copy complex double from FPRs. */ \ + ((double *)dp)[0] = cc->fpr[0]; \ + ((double *)dp)[1] = cc->fpr[1]; \ + } + +#define CCALL_HANDLE_COMPLEXARG \ + isfp = 1; \ + if (d->size == sizeof(float) * 2) { \ + d = ctype_get(cts, CTID_COMPLEX_DOUBLE); \ + isf32 = 1; \ + } + +#define CCALL_HANDLE_REGARG \ + if (isfp && d->size == sizeof(float)) { \ + d = ctype_get(cts, CTID_DOUBLE); \ + isf32 = 1; \ + } \ + if (ngpr < maxgpr) { \ + dp = &cc->gpr[ngpr]; \ + ngpr += n; \ + if (ngpr > maxgpr) { \ + nsp += ngpr - 8; \ + ngpr = 8; \ + if (nsp > CCALL_MAXSTACK) { \ + goto err_nyi; \ + } \ + } \ + goto done; \ + } + +#else + #define CCALL_HANDLE_STRUCTRET \ cc->retref = 1; /* Return all structs by reference. */ \ cc->gpr[ngpr++] = (GPRArg)dp; @@ -370,13 +446,13 @@ /* Complex values are returned in 2 or 4 GPRs. */ \ cc->retref = 0; -#define CCALL_HANDLE_COMPLEXRET2 \ - memcpy(dp, sp, ctr->size); /* Copy complex from GPRs. */ - #define CCALL_HANDLE_STRUCTARG \ rp = cdataptr(lj_cdata_new(cts, did, sz)); \ sz = CTSIZE_PTR; /* Pass all structs by reference. */ +#define CCALL_HANDLE_COMPLEXRET2 \ + memcpy(dp, sp, ctr->size); /* Copy complex from GPRs. */ + #define CCALL_HANDLE_COMPLEXARG \ /* Pass complex by value in 2 or 4 GPRs. */ @@ -403,6 +479,8 @@ } \ } +#endif + #define CCALL_HANDLE_RET \ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ @@ -794,6 +872,50 @@ noth: /* Not a homogeneous float/double aggregate. */ #endif +/* -- PowerPC64 ELFv2 ABI struct classification ------------------- */ + +#if LJ_ARCH_PPC_ELFV2 + +#define FTYPE_FLOAT 1 +#define FTYPE_DOUBLE 2 + +static unsigned int ccall_classify_fp(CTState *cts, CType *ct) { + if (ctype_isfp(ct->info)) { + if (ct->size == sizeof(float)) + return FTYPE_FLOAT; + else + return FTYPE_DOUBLE; + } else if (ctype_iscomplex(ct->info)) { + if (ct->size == sizeof(float) * 2) + return FTYPE_FLOAT; + else + return FTYPE_DOUBLE; + } else if (ctype_isstruct(ct->info)) { + int res = -1; + int sz = ct->size; + while (ct->sib) { + ct = ctype_get(cts, ct->sib); + if (ctype_isfield(ct->info)) { + int sub = ccall_classify_fp(cts, ctype_rawchild(cts, ct)); + if (res == -1) + res = sub; + if (sub != -1 && sub != res) + return 0; + } else if (ctype_isbitfield(ct->info) || + ctype_isxattrib(ct->info, CTA_SUBTYPE)) { + return 0; + } + } + if (res > 0 && sz > res * 4 * 8) + return 0; + return res; + } else { + return 0; + } +} + +#endif + /* -- MIPS64 ABI struct classification ---------------------------- */ #if LJ_TARGET_MIPS64 @@ -967,6 +1089,9 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, CTSize sz; MSize n, isfp = 0, isva = 0; void *dp, *rp = NULL; +#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64 + int isf32 = 0; +#endif if (fid) { /* Get argument type from field. */ CType *ctf = ctype_get(cts, fid); @@ -1023,7 +1148,37 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, *(void **)dp = rp; dp = rp; } +#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64 && LJ_BE + if (ctype_isstruct(d->info) && sz < CTSIZE_PTR) { + dp = (char *)dp + (CTSIZE_PTR - sz); + } +#endif lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg)); +#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64 + if (isfp) { + int i; + for (i = 0; i < d->size / 8 && nfpr < CCALL_NARG_FPR; i++) + cc->fpr[nfpr++] = ((double *)dp)[i]; + } + if (isf32) { + int i; + for (i = 0; i < d->size / 8; i++) + ((float *)dp)[i*2] = ((double *)dp)[i]; + } +#endif +#if LJ_ARCH_PPC_ELFV2 + if (ctype_isstruct(d->info)) { + isfp = ccall_classify_fp(cts, d); + int i; + if (isfp == FTYPE_FLOAT) { + for (i = 0; i < d->size / 4 && nfpr < CCALL_NARG_FPR; i++) + cc->fpr[nfpr++] = ((float *)dp)[i]; + } else if (isfp == FTYPE_DOUBLE) { + for (i = 0; i < d->size / 8 && nfpr < CCALL_NARG_FPR; i++) + cc->fpr[nfpr++] = ((double *)dp)[i]; + } + } +#endif /* Extend passed integers to 32 bits at least. */ if (ctype_isinteger_or_bool(d->info) && d->size < 4) { if (d->info & CTF_UNSIGNED) @@ -1033,6 +1188,15 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, *(int32_t *)dp = d->size == 1 ? (int32_t)*(int8_t *)dp : (int32_t)*(int16_t *)dp; } +#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64 + if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info)) + && d->size <= 4) { + if (d->info & CTF_UNSIGNED) + *(uint64_t *)dp = (uint64_t)*(uint32_t *)dp; + else + *(int64_t *)dp = (int64_t)*(int32_t *)dp; + } +#endif #if LJ_TARGET_MIPS64 if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) || (isfp && nsp == 0)) && d->size <= 4) { diff --git a/src/lj_ccall.h b/src/lj_ccall.h index d97227a6..fcce7792 100644 --- a/src/lj_ccall.h +++ b/src/lj_ccall.h @@ -86,10 +86,23 @@ typedef union FPRArg { #elif LJ_TARGET_PPC #define CCALL_NARG_GPR 8 +#if LJ_ARCH_BITS == 64 +#define CCALL_NARG_FPR 13 +#if LJ_ARCH_PPC_ELFV2 +#define CCALL_NRET_GPR 2 +#define CCALL_NRET_FPR 8 +#define CCALL_SPS_EXTRA 14 +#else +#define CCALL_NRET_GPR 1 +#define CCALL_NRET_FPR 2 +#define CCALL_SPS_EXTRA 16 +#endif +#else #define CCALL_NARG_FPR 8 #define CCALL_NRET_GPR 4 /* For complex double. */ #define CCALL_NRET_FPR 1 #define CCALL_SPS_EXTRA 4 +#endif #define CCALL_SPS_FREE 0 typedef intptr_t GPRArg; diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c index 2ca6406c..c2a55c02 100644 --- a/src/lj_ccallback.c +++ b/src/lj_ccallback.c @@ -61,8 +61,24 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs) #elif LJ_TARGET_PPC +#if LJ_ARCH_PPC_OPD + +#define CALLBACK_SLOT2OFS(slot) (24*(slot)) +#define CALLBACK_OFS2SLOT(ofs) ((ofs)/24) +#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE)) + +#elif LJ_ARCH_PPC_ELFV2 + +#define CALLBACK_SLOT2OFS(slot) (4*(slot)) +#define CALLBACK_OFS2SLOT(ofs) ((ofs)/4) +#define CALLBACK_MAX_SLOT (CALLBACK_MCODE_SIZE/4 - 10) + +#else + #define CALLBACK_MCODE_HEAD 24 +#endif + #elif LJ_TARGET_MIPS32 #define CALLBACK_MCODE_HEAD 20 @@ -188,24 +204,59 @@ static void callback_mcode_init(global_State *g, uint32_t *page) lua_assert(p - page <= CALLBACK_MCODE_SIZE); } #elif LJ_TARGET_PPC +#if LJ_ARCH_PPC_OPD +register void *vm_toc __asm__("r2"); +static void callback_mcode_init(global_State *g, uint64_t *page) +{ + uint64_t *p = page; + void *target = (void *)lj_vm_ffi_callback; + MSize slot; + for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { + *p++ = (uint64_t)target; + *p++ = (uint64_t)vm_toc; + *p++ = (uint64_t)g | ((uint64_t)slot << 47); + } + lua_assert(p - page <= CALLBACK_MCODE_SIZE / 8); +} +#else static void callback_mcode_init(global_State *g, uint32_t *page) { uint32_t *p = page; void *target = (void *)lj_vm_ffi_callback; MSize slot; +#if LJ_ARCH_PPC_ELFV2 + // Needs to be in sync with lj_vm_ffi_callback. + lua_assert(CALLBACK_MCODE_SIZE == 4096); + for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { + *p = PPCI_B | (((page+CALLBACK_MAX_SLOT-p) & 0x00ffffffu) << 2); + p++; + } + *p++ = PPCI_LI | PPCF_T(RID_SYS1) | ((((intptr_t)target) >> 32) & 0xffff); + *p++ = PPCI_LI | PPCF_T(RID_R11) | ((((intptr_t)g) >> 32) & 0xffff); + *p++ = PPCI_RLDICR | PPCF_T(RID_SYS1) | PPCF_A(RID_SYS1) | PPCF_SH(32) | PPCF_M6(63-32); /* sldi */ + *p++ = PPCI_RLDICR | PPCF_T(RID_R11) | PPCF_A(RID_R11) | PPCF_SH(32) | PPCF_M6(63-32); /* sldi */ + *p++ = PPCI_ORIS | PPCF_A(RID_SYS1) | PPCF_T(RID_SYS1) | ((((intptr_t)target) >> 16) & 0xffff); + *p++ = PPCI_ORIS | PPCF_A(RID_R11) | PPCF_T(RID_R11) | ((((intptr_t)g) >> 16) & 0xffff); + *p++ = PPCI_ORI | PPCF_A(RID_SYS1) | PPCF_T(RID_SYS1) | (((intptr_t)target) & 0xffff); + *p++ = PPCI_ORI | PPCF_A(RID_R11) | PPCF_T(RID_R11) | (((intptr_t)g) & 0xffff); + *p++ = PPCI_MTCTR | PPCF_T(RID_SYS1); + *p++ = PPCI_BCTR; +#else *p++ = PPCI_LIS | PPCF_T(RID_TMP) | (u32ptr(target) >> 16); - *p++ = PPCI_LIS | PPCF_T(RID_R12) | (u32ptr(g) >> 16); + *p++ = PPCI_LIS | PPCF_T(RID_R11) | (u32ptr(g) >> 16); *p++ = PPCI_ORI | PPCF_A(RID_TMP)|PPCF_T(RID_TMP) | (u32ptr(target) & 0xffff); - *p++ = PPCI_ORI | PPCF_A(RID_R12)|PPCF_T(RID_R12) | (u32ptr(g) & 0xffff); + *p++ = PPCI_ORI | PPCF_A(RID_R11)|PPCF_T(RID_R11) | (u32ptr(g) & 0xffff); *p++ = PPCI_MTCTR | PPCF_T(RID_TMP); *p++ = PPCI_BCTR; for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { - *p++ = PPCI_LI | PPCF_T(RID_R11) | slot; + *p++ = PPCI_LI | PPCF_T(RID_R12) | slot; *p = PPCI_B | (((page-p) & 0x00ffffffu) << 2); p++; } - lua_assert(p - page <= CALLBACK_MCODE_SIZE); +#endif + lua_assert(p - page <= CALLBACK_MCODE_SIZE / 4); } +#endif #elif LJ_TARGET_MIPS static void callback_mcode_init(global_State *g, uint32_t *page) { @@ -637,6 +688,15 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) *(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp : (int32_t)*(int16_t *)dp; } +#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64 + if (ctr->size <= 4 && + (ctype_isinteger_or_bool(ctr->info) || ctype_isenum(ctr->info))) { + if (ctr->info & CTF_UNSIGNED) + *(uint64_t *)dp = (uint64_t)*(uint32_t *)dp; + else + *(int64_t *)dp = (int64_t)*(int32_t *)dp; + } +#endif #if LJ_TARGET_MIPS64 /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */ if (ctr->size <= 4 && diff --git a/src/lj_ctype.h b/src/lj_ctype.h index e9b426f0..9bba59e6 100644 --- a/src/lj_ctype.h +++ b/src/lj_ctype.h @@ -153,7 +153,7 @@ typedef struct CType { /* Simplify target-specific configuration. Checked in lj_ccall.h. */ #define CCALL_MAX_GPR 8 -#define CCALL_MAX_FPR 8 +#define CCALL_MAX_FPR 14 typedef LJ_ALIGN(8) union FPRCBArg { double d; float f[2]; } FPRCBArg; diff --git a/src/lj_def.h b/src/lj_def.h index 9413399d..a22eb048 100644 --- a/src/lj_def.h +++ b/src/lj_def.h @@ -71,7 +71,11 @@ typedef unsigned int uintptr_t; #define LJ_MAX_IDXCHAIN 100 /* __index/__newindex chain limit. */ #define LJ_STACK_EXTRA (5+2*LJ_FR2) /* Extra stack space (metamethods). */ +#if defined(__powerpc64__) && _CALL_ELF != 2 +#define LJ_NUM_CBPAGE 4 /* Number of FFI callback pages. */ +#else #define LJ_NUM_CBPAGE 1 /* Number of FFI callback pages. */ +#endif /* Minimum table/buffer sizes. */ #define LJ_MIN_GLOBAL 6 /* Min. global table size (hbits). */ diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h index bbf22390..794d29e6 100644 --- a/src/lj_target_ppc.h +++ b/src/lj_target_ppc.h @@ -131,6 +131,8 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) #define PPCF_C(r) ((r) << 6) #define PPCF_MB(n) ((n) << 6) #define PPCF_ME(n) ((n) << 1) +#define PPCF_SH(n) ((((n) & 31) << (11+1)) | (((n) & 32) >> (5-1))) +#define PPCF_M6(n) ((((n) & 31) << (5+1)) | (((n) & 32) << (11-5))) #define PPCF_Y 0x00200000 #define PPCF_DOT 0x00000001 @@ -200,6 +202,13 @@ typedef enum PPCIns { PPCI_RLWINM = 0x54000000, PPCI_RLWIMI = 0x50000000, + PPCI_RLDICL = 0x78000000, + PPCI_RLDICR = 0x78000004, + PPCI_RLDIC = 0x78000008, + PPCI_RLDIMI = 0x7800000c, + PPCI_RLDCL = 0x78000010, + PPCI_RLDCR = 0x78000012, + PPCI_B = 0x48000000, PPCI_BL = 0x48000001, PPCI_BC = 0x40800000, diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc index a9c5e602..a5866e6b 100644 --- a/src/vm_ppc.dasc +++ b/src/vm_ppc.dasc @@ -33,16 +33,16 @@ |.macro lpx, a, b, c; ldx a, b, c; .endmacro |.macro lp, a, b; ld a, b; .endmacro |.macro stp, a, b; std a, b; .endmacro +|.macro stpx, a, b, c; stdx a, b, c; .endmacro |.define decode_OPP, decode_OP8 -|.if FFI -|// Missing: Calling conventions, 64 bit regs, TOC. -|.error lib_ffi not yet implemented for PPC64 -|.endif +|.define WORD_SIZE, 8 |.else |.macro lpx, a, b, c; lwzx a, b, c; .endmacro |.macro lp, a, b; lwz a, b; .endmacro |.macro stp, a, b; stw a, b; .endmacro +|.macro stpx, a, b, c; stwx a, b, c; .endmacro |.define decode_OPP, decode_OP4 +|.define WORD_SIZE, 4 |.endif | |// Convenience macros for TOC handling. @@ -2525,9 +2525,9 @@ static void build_subroutines(BuildCtx *ctx) | bgt >5 // Need to grow stack. | mtctr FUNCREG | bctrl // (lua_State *L) + | .toc lp TOCREG, SAVE_TOC | // Either throws an error, or recovers and returns -1, 0 or nresults+1. | lp BASE, L->base - | .toc lp TOCREG, SAVE_TOC | cmpwi CRET1, 0 | slwi RD, CRET1, 3 | la RA, -8(BASE) @@ -2928,10 +2928,18 @@ static void build_subroutines(BuildCtx *ctx) |->vm_cachesync: |.if JIT or FFI | // Compute start of first cache line and number of cache lines. + | .if GPR64 + | rldicr CARG1, CARG1, 0, 58 + | .else | rlwinm CARG1, CARG1, 0, 0, 26 + | .endif | sub CARG2, CARG2, CARG1 | addi CARG2, CARG2, 31 + | .if GPR64 + | srdi. CARG2, CARG2, 5 + | .else | rlwinm. CARG2, CARG2, 27, 5, 31 + | .endif | beqlr | mtctr CARG2 | mr CARG3, CARG1 @@ -2953,32 +2961,54 @@ static void build_subroutines(BuildCtx *ctx) |//-- FFI helper functions ----------------------------------------------- |//----------------------------------------------------------------------- | - |// Handler for callback functions. Callback slot number in r11, g in r12. + |// Handler for callback functions. Callback slot number in r12, g in r11. |->vm_ffi_callback: |.if FFI |.type CTSTATE, CTState, PC + | .if OPD + | rldicl r12, r11, 17, 47 + | rldicl r11, r11, 0, 17 + | .endif + | .if ELFV2 + | rlwinm r12, r12, 30, 22, 31 + | addisl TOCREG, TOCREG, extern .TOC.-lj_vm_ffi_callback@ha + | addil TOCREG, TOCREG, extern .TOC.-lj_vm_ffi_callback@l + | .endif | saveregs - | lwz CTSTATE, GL:r12->ctype_state - | addi DISPATCH, r12, GG_G2DISP - | stw r11, CTSTATE->cb.slot - | stw r3, CTSTATE->cb.gpr[0] + | lwz CTSTATE, GL:r11->ctype_state + | addi DISPATCH, r11, GG_G2DISP + | stw r12, CTSTATE->cb.slot + | stp r3, CTSTATE->cb.gpr[0] | stfd f1, CTSTATE->cb.fpr[0] - | stw r4, CTSTATE->cb.gpr[1] + | stp r4, CTSTATE->cb.gpr[1] | stfd f2, CTSTATE->cb.fpr[1] - | stw r5, CTSTATE->cb.gpr[2] + | stp r5, CTSTATE->cb.gpr[2] | stfd f3, CTSTATE->cb.fpr[2] - | stw r6, CTSTATE->cb.gpr[3] + | stp r6, CTSTATE->cb.gpr[3] | stfd f4, CTSTATE->cb.fpr[3] - | stw r7, CTSTATE->cb.gpr[4] + | stp r7, CTSTATE->cb.gpr[4] | stfd f5, CTSTATE->cb.fpr[4] - | stw r8, CTSTATE->cb.gpr[5] + | stp r8, CTSTATE->cb.gpr[5] | stfd f6, CTSTATE->cb.fpr[5] - | stw r9, CTSTATE->cb.gpr[6] + | stp r9, CTSTATE->cb.gpr[6] | stfd f7, CTSTATE->cb.fpr[6] - | stw r10, CTSTATE->cb.gpr[7] + | stp r10, CTSTATE->cb.gpr[7] | stfd f8, CTSTATE->cb.fpr[7] + | .if GPR64 + | stfd f9, CTSTATE->cb.fpr[8] + | stfd f10, CTSTATE->cb.fpr[9] + | stfd f11, CTSTATE->cb.fpr[10] + | stfd f12, CTSTATE->cb.fpr[11] + | stfd f13, CTSTATE->cb.fpr[12] + | .endif + | .if ELFV2 + | addi TMP0, sp, CFRAME_SPACE+96 + | .elif GPR64 + | addi TMP0, sp, CFRAME_SPACE+112 + | .else | addi TMP0, sp, CFRAME_SPACE+8 - | stw TMP0, CTSTATE->cb.stack + | .endif + | stp TMP0, CTSTATE->cb.stack | mr CARG1, CTSTATE | stw CTSTATE, SAVE_PC // Any value outside of bytecode is ok. | mr CARG2, sp @@ -3019,9 +3049,21 @@ static void build_subroutines(BuildCtx *ctx) | mr CARG1, CTSTATE | mr CARG2, RA | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) - | lwz CRET1, CTSTATE->cb.gpr[0] + | lp CRET1, CTSTATE->cb.gpr[0] | lfd FARG1, CTSTATE->cb.fpr[0] - | lwz CRET2, CTSTATE->cb.gpr[1] + | lp CRET2, CTSTATE->cb.gpr[1] + | .if GPR64 + | lfd FARG2, CTSTATE->cb.fpr[1] + | .else + | lp CARG3, CTSTATE->cb.gpr[2] + | lp CARG4, CTSTATE->cb.gpr[3] + | .endif + | .elfv2 lfd f3, CTSTATE->cb.fpr[2] + | .elfv2 lfd f4, CTSTATE->cb.fpr[3] + | .elfv2 lfd f5, CTSTATE->cb.fpr[4] + | .elfv2 lfd f6, CTSTATE->cb.fpr[5] + | .elfv2 lfd f7, CTSTATE->cb.fpr[6] + | .elfv2 lfd f8, CTSTATE->cb.fpr[7] | b ->vm_leave_unw |.endif | @@ -3034,23 +3076,46 @@ static void build_subroutines(BuildCtx *ctx) | lbz CARG2, CCSTATE->nsp | lbz CARG3, CCSTATE->nfpr | neg TMP1, TMP1 + | .if GPR64 + | std TMP0, 16(sp) + | .else | stw TMP0, 4(sp) + | .endif | cmpwi cr1, CARG3, 0 | mr TMP2, sp | addic. CARG2, CARG2, -1 + | .if GPR64 + | stdux sp, sp, TMP1 + | .else | stwux sp, sp, TMP1 + | .endif | crnot 4*cr1+eq, 4*cr1+eq // For vararg calls. - | stw r14, -4(TMP2) - | stw CCSTATE, -8(TMP2) + | .if GPR64 + | std r14, -8(TMP2) + | std CCSTATE, -16(TMP2) + | .else + | stw r14, -4(TMP2) + | stw CCSTATE, -8(TMP2) + | .endif | mr r14, TMP2 | la TMP1, CCSTATE->stack + | .if GPR64 + | sldi CARG2, CARG2, 3 + | .else | slwi CARG2, CARG2, 2 + | .endif | blty >2 - | la TMP2, 8(sp) + | .if ELFV2 + | la TMP2, 96(sp) + | .elif GPR64 + | la TMP2, 112(sp) + | .else + | la TMP2, 8(sp) + | .endif |1: - | lwzx TMP0, TMP1, CARG2 - | stwx TMP0, TMP2, CARG2 - | addic. CARG2, CARG2, -4 + | lpx TMP0, TMP1, CARG2 + | stpx TMP0, TMP2, CARG2 + | addic. CARG2, CARG2, -WORD_SIZE | bge <1 |2: | bney cr1, >3 @@ -3062,28 +3127,49 @@ static void build_subroutines(BuildCtx *ctx) | lfd f6, CCSTATE->fpr[5] | lfd f7, CCSTATE->fpr[6] | lfd f8, CCSTATE->fpr[7] + | .if GPR64 + | lfd f9, CCSTATE->fpr[8] + | lfd f10, CCSTATE->fpr[9] + | lfd f11, CCSTATE->fpr[10] + | lfd f12, CCSTATE->fpr[11] + | lfd f13, CCSTATE->fpr[12] + | .endif |3: - | lp TMP0, CCSTATE->func - | lwz CARG2, CCSTATE->gpr[1] - | lwz CARG3, CCSTATE->gpr[2] - | lwz CARG4, CCSTATE->gpr[3] - | lwz CARG5, CCSTATE->gpr[4] - | mtctr TMP0 - | lwz r8, CCSTATE->gpr[5] - | lwz r9, CCSTATE->gpr[6] - | lwz r10, CCSTATE->gpr[7] - | lwz CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. + | .toc std TOCREG, SAVE_TOC + | lp FUNCREG, CCSTATE->func + | lp CARG2, CCSTATE->gpr[1] + | lp CARG3, CCSTATE->gpr[2] + | .opd lp TOCREG, TOC_OFS(FUNCREG) + | .opdenv lp ENVREG, ENV_OFS(FUNCREG) + | .opd lp FUNCREG, 0(FUNCREG) + | lp CARG4, CCSTATE->gpr[3] + | lp CARG5, CCSTATE->gpr[4] + | mtctr FUNCREG + | lp r8, CCSTATE->gpr[5] + | lp r9, CCSTATE->gpr[6] + | lp r10, CCSTATE->gpr[7] + | lp CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. | bctrl - | lwz CCSTATE:TMP1, -8(r14) - | lwz TMP2, -4(r14) + | .toc lp TOCREG, SAVE_TOC + | .if GPR64 + | ld CCSTATE:TMP1, -16(r14) + | ld TMP2, -8(r14) + | ld TMP0, 16(r14) + | .else + | lwz CCSTATE:TMP1, -8(r14) + | lwz TMP2, -4(r14) | lwz TMP0, 4(r14) - | stw CARG1, CCSTATE:TMP1->gpr[0] + | .endif + | stp CARG1, CCSTATE:TMP1->gpr[0] | stfd FARG1, CCSTATE:TMP1->fpr[0] - | stw CARG2, CCSTATE:TMP1->gpr[1] + | stp CARG2, CCSTATE:TMP1->gpr[1] + | .if GPR64 + | stfd FARG2, CCSTATE:TMP1->fpr[1] + | .endif | mtlr TMP0 - | stw CARG3, CCSTATE:TMP1->gpr[2] + | stp CARG3, CCSTATE:TMP1->gpr[2] | mr sp, r14 - | stw CARG4, CCSTATE:TMP1->gpr[3] + | stp CARG4, CCSTATE:TMP1->gpr[3] | mr r14, TMP2 | blr |.endif @@ -5333,9 +5419,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | .opdenv lp ENVREG, ENV_OFS(FUNCREG) | st_vmstate | bctrl // (lua_State *L [, lua_CFunction f]) + | .toc lp TOCREG, SAVE_TOC | // Returns nresults. | lp BASE, L->base - | .toc lp TOCREG, SAVE_TOC | slwi RD, CRET1, 3 | lp TMP1, L->top | li_vmstate INTERP @@ -5436,8 +5522,12 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.long lj_vm_ffi_call\n" #endif "\t.long %d\n" +#if LJ_ARCH_PPC32ON64 + "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -2\n" +#else "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" - "\t.byte 0x8e\n\t.uleb128 2\n" +#endif + "\t.byte 0x8e\n\t.uleb128 1\n" "\t.byte 0xd\n\t.uleb128 0xe\n" "\t.align 2\n" ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); @@ -5519,8 +5609,12 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.long lj_vm_ffi_call-.\n" "\t.long %d\n" "\t.uleb128 0\n" /* augmentation length */ +#if LJ_ARCH_PPC32ON64 + "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -2\n" +#else "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" - "\t.byte 0x8e\n\t.uleb128 2\n" +#endif + "\t.byte 0x8e\n\t.uleb128 1\n" "\t.byte 0xd\n\t.uleb128 0xe\n" "\t.align 2\n" ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);