diff --git a/doc/ext_ffi_api.html b/doc/ext_ffi_api.html index 89ddb0d9..8e99ff48 100644 --- a/doc/ext_ffi_api.html +++ b/doc/ext_ffi_api.html @@ -463,8 +463,10 @@ otherwise. The following parameters are currently defined: winWindows variant of the standard ABI -uwpUniversal Windows Platform +pauthPointer authentication ABI +uwpUniversal Windows Platform + gc6464 bit GC references diff --git a/dynasm/dasm_arm.h b/dynasm/dasm_arm.h index aa16014e..4570cd12 100644 --- a/dynasm/dasm_arm.h +++ b/dynasm/dasm_arm.h @@ -142,6 +142,7 @@ void dasm_setup(Dst_DECL, const void *actionlist) if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); for (i = 0; i < D->maxsection; i++) { D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos; D->sections[i].ofs = 0; } } diff --git a/dynasm/dasm_arm64.h b/dynasm/dasm_arm64.h index e04898f1..dffd64e8 100644 --- a/dynasm/dasm_arm64.h +++ b/dynasm/dasm_arm64.h @@ -144,6 +144,7 @@ void dasm_setup(Dst_DECL, const void *actionlist) if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); for (i = 0; i < D->maxsection; i++) { D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos; D->sections[i].ofs = 0; } } diff --git a/dynasm/dasm_arm64.lua b/dynasm/dasm_arm64.lua index 1f581ba0..fee902d5 100644 --- a/dynasm/dasm_arm64.lua +++ b/dynasm/dasm_arm64.lua @@ -823,6 +823,13 @@ map_op = { tbz_3 = "36000000DTBw|36000000DTBx", tbnz_3 = "37000000DTBw|37000000DTBx", + -- ARM64e: Pointer authentication codes (PAC). + blraaz_1 = "d63f081fNx", + braa_2 = "d71f0800NDx", + braaz_1 = "d61f081fNx", + pacibsp_0 = "d503237f", + retab_0 = "d65f0fff", + -- Miscellaneous instructions. -- TODO: hlt, hvc, smc, svc, eret, dcps[123], drps, mrs, msr -- TODO: sys, sysl, ic, dc, at, tlbi diff --git a/dynasm/dasm_mips.h b/dynasm/dasm_mips.h index 495eaa0e..2273dba2 100644 --- a/dynasm/dasm_mips.h +++ b/dynasm/dasm_mips.h @@ -141,6 +141,7 @@ void dasm_setup(Dst_DECL, const void *actionlist) if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); for (i = 0; i < D->maxsection; i++) { D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos; D->sections[i].ofs = 0; } } diff --git a/dynasm/dasm_ppc.h b/dynasm/dasm_ppc.h index 30b757e3..14db019d 100644 --- a/dynasm/dasm_ppc.h +++ b/dynasm/dasm_ppc.h @@ -141,6 +141,7 @@ void dasm_setup(Dst_DECL, const void *actionlist) if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); for (i = 0; i < D->maxsection; i++) { D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos; D->sections[i].ofs = 0; } } diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h index 66a68ea5..ae5cb429 100644 --- a/dynasm/dasm_x86.h +++ b/dynasm/dasm_x86.h @@ -140,6 +140,7 @@ void dasm_setup(Dst_DECL, const void *actionlist) if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); for (i = 0; i < D->maxsection; i++) { D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos; D->sections[i].ofs = 0; } } diff --git a/src/Makefile b/src/Makefile index 30d64be2..f6d093bb 100644 --- a/src/Makefile +++ b/src/Makefile @@ -433,6 +433,10 @@ ifneq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH))) DASM_AFLAGS+= -D NO_UNWIND TARGET_ARCH+= -DLUAJIT_NO_UNWIND endif +ifneq (,$(findstring LJ_ABI_PAUTH 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D PAUTH + TARGET_ARCH+= -DLJ_ABI_PAUTH=1 +endif DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subst LJ_ARCH_VERSION ,LJ_ARCH_VERSION_,$(TARGET_TESTARCH)))) ifeq (Windows,$(TARGET_SYS)) DASM_AFLAGS+= -D WIN diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c index 7baa011f..54e92cab 100644 --- a/src/host/buildvm_asm.c +++ b/src/host/buildvm_asm.c @@ -243,6 +243,12 @@ void emit_asm(BuildCtx *ctx) fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch); fprintf(ctx->fp, "\t.text\n"); +#if LJ_TARGET_MIPS32 && !LJ_ABI_SOFTFP + fprintf(ctx->fp, "\t.module fp=32\n"); +#endif +#if LJ_TARGET_MIPS + fprintf(ctx->fp, "\t.set nomips16\n\t.abicalls\n\t.set noreorder\n\t.set nomacro\n"); +#endif emit_asm_align(ctx, 4); #if LJ_TARGET_PS3 @@ -268,9 +274,6 @@ void emit_asm(BuildCtx *ctx) ".save {r4, r5, r6, r7, r8, r9, r10, r11, lr}\n" ".pad #28\n"); #endif -#endif -#if LJ_TARGET_MIPS - fprintf(ctx->fp, ".set nomips16\n.abicalls\n.set noreorder\n.set nomacro\n"); #endif for (i = rel = 0; i < ctx->nsym; i++) { diff --git a/src/lib_ffi.c b/src/lib_ffi.c index 1b1fa389..3133cab2 100644 --- a/src/lib_ffi.c +++ b/src/lib_ffi.c @@ -745,6 +745,9 @@ LJLIB_CF(ffi_abi) LJLIB_REC(.) #if LJ_ABI_WIN "\003win" #endif +#if LJ_ABI_PAUTH + "\007pauth" +#endif #if LJ_TARGET_UWP "\003uwp" #endif diff --git a/src/lib_jit.c b/src/lib_jit.c index 2867d420..2300f1da 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c @@ -422,7 +422,8 @@ LJLIB_CF(jit_util_ircalladdr) { uint32_t idx = (uint32_t)lj_lib_checkint(L, 1); if (idx < IRCALL__MAX) { - setintptrV(L->top-1, (intptr_t)(void *)lj_ir_callinfo[idx].func); + ASMFunction func = lj_ir_callinfo[idx].func; + setintptrV(L->top-1, (intptr_t)(void *)lj_ptr_strip(func)); return 1; } return 0; diff --git a/src/lj_arch.h b/src/lj_arch.h index 4e48c296..c4398121 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -259,6 +259,9 @@ #define LJ_ARCH_NAME "arm64" #define LJ_ARCH_ENDIAN LUAJIT_LE #endif +#if !defined(LJ_ABI_PAUTH) && defined(__arm64e__) +#define LJ_ABI_PAUTH 1 +#endif #define LJ_TARGET_ARM64 1 #define LJ_TARGET_EHRETREG 0 #define LJ_TARGET_EHRAREG 30 @@ -603,6 +606,10 @@ #define LJ_SOFTFP (!LJ_ARCH_HASFPU) #define LJ_SOFTFP32 (LJ_SOFTFP && LJ_32) +#ifndef LJ_ABI_PAUTH +#define LJ_ABI_PAUTH 0 +#endif + #if LJ_ARCH_ENDIAN == LUAJIT_BE #define LJ_LE 0 #define LJ_BE 1 diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index d3e4bb63..c537c514 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -421,8 +421,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) uint32_t n, nargs = CCI_XNARGS(ci); int32_t ofs = 0; Reg gpr, fpr = REGARG_FIRSTFPR; - if ((void *)ci->func) - emit_call(as, (void *)ci->func); + if (ci->func) + emit_call(as, ci->func); for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) as->cost[gpr] = REGCOST(~0u, ASMREF_L); gpr = REGARG_FIRSTGPR; @@ -501,7 +501,7 @@ static void asm_callx(ASMState *as, IRIns *ir) ci.func = (ASMFunction)(ir_k64(irf)->u64); } else { /* Need a non-argument register for indirect calls. */ Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED); - emit_n(as, A64I_BLR, freg); + emit_n(as, A64I_BLR_AUTH, freg); ci.func = (ASMFunction)(void *)0; } asm_gencall(as, &ci, args); @@ -935,7 +935,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir) emit_nm(as, A64I_CMPx, key, ra_allock(as, k, rset_exclude(allow, key))); emit_lso(as, A64I_LDRx, key, idx, kofs); if (bigofs) - emit_opk(as, A64I_ADDx, dest, node, ofs, RSET_GPR); + emit_opk(as, A64I_ADDx, dest, node, ofs, rset_exclude(RSET_GPR, node)); } static void asm_uref(ASMState *as, IRIns *ir) diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h index e9cce916..8824081e 100644 --- a/src/lj_asm_mips.h +++ b/src/lj_asm_mips.h @@ -1337,8 +1337,8 @@ static void asm_fload(ASMState *as, IRIns *ir) } } ofs = field_ofs[ir->op2]; + lj_assertA(!irt_isfp(ir->t), "bad FP FLOAD"); } - lj_assertA(!irt_isfp(ir->t), "bad FP FLOAD"); emit_tsi(as, mi, dest, idx, ofs); } diff --git a/src/lj_bcwrite.c b/src/lj_bcwrite.c index 2c70ff47..d3986ea4 100644 --- a/src/lj_bcwrite.c +++ b/src/lj_bcwrite.c @@ -189,7 +189,8 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt) goto save_int; } else { /* Write a 33 bit ULEB128 for the int (lsb=0) or loword (lsb=1). */ - if (!LJ_DUALNUM) { /* Narrow number constants to integers. */ + if (!LJ_DUALNUM && o->u32.hi != LJ_KEYINDEX) { + /* Narrow number constants to integers. */ lua_Number num = numV(o); k = lj_num2int(num); if (num == (lua_Number)k) { /* -0 is never a constant. */ diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c index 43e44305..98e9e02b 100644 --- a/src/lj_ccallback.c +++ b/src/lj_ccallback.c @@ -171,13 +171,13 @@ static void *callback_mcode_init(global_State *g, uint32_t *page) static void *callback_mcode_init(global_State *g, uint32_t *page) { uint32_t *p = page; - void *target = (void *)lj_vm_ffi_callback; + ASMFunction target = lj_vm_ffi_callback; MSize slot; *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4)); *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5)); - *p++ = A64I_LE(A64I_BR | A64F_N(RID_X11)); + *p++ = A64I_LE(A64I_BR_AUTH | A64F_N(RID_X11)); *p++ = A64I_LE(A64I_NOP); - ((void **)p)[0] = target; + ((ASMFunction *)p)[0] = target; ((void **)p)[1] = g; p += 4; for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h index fcc9c1d8..65463a5e 100644 --- a/src/lj_emit_arm64.h +++ b/src/lj_emit_arm64.h @@ -348,16 +348,22 @@ static void emit_cnb(ASMState *as, A64Ins ai, Reg r, MCode *target) #define emit_jmp(as, target) emit_branch(as, A64I_B, (target)) -static void emit_call(ASMState *as, void *target) +static void emit_call(ASMState *as, ASMFunction target) { MCode *p = --as->mcp; - ptrdiff_t delta = (char *)target - (char *)p; +#if LJ_ABI_PAUTH + char *targetp = ptrauth_auth_data((char *)target, + ptrauth_key_function_pointer, 0); +#else + char *targetp = (char *)target; +#endif + ptrdiff_t delta = targetp - (char *)p; if (A64F_S_OK(delta>>2, 26)) { *p = A64I_BL | A64F_S26(delta>>2); } else { /* Target out of range: need indirect call. But don't use R0-R7. */ Reg r = ra_allock(as, i64ptr(target), RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED); - *p = A64I_BLR | A64F_N(r); + *p = A64I_BLR_AUTH | A64F_N(r); } } diff --git a/src/lj_err.c b/src/lj_err.c index 3ee70b86..a0a28692 100644 --- a/src/lj_err.c +++ b/src/lj_err.c @@ -444,10 +444,10 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions, if ((actions & _UA_FORCE_UNWIND)) { return _URC_CONTINUE_UNWIND; } else if (cf) { + ASMFunction ip; _Unwind_SetGR(ctx, LJ_TARGET_EHRETREG, errcode); - _Unwind_SetIP(ctx, (uintptr_t)(cframe_unwind_ff(cf) ? - lj_vm_unwind_ff_eh : - lj_vm_unwind_c_eh)); + ip = cframe_unwind_ff(cf) ? lj_vm_unwind_ff_eh : lj_vm_unwind_c_eh; + _Unwind_SetIP(ctx, (uintptr_t)lj_ptr_strip(ip)); return _URC_INSTALL_CONTEXT; } #if LJ_TARGET_X86ORX64 @@ -580,9 +580,17 @@ extern void __deregister_frame(const void *); uint8_t *lj_err_register_mcode(void *base, size_t sz, uint8_t *info) { - void **handler; + ASMFunction handler = (ASMFunction)err_unwind_jit; memcpy(info, err_frame_jit_template, sizeof(err_frame_jit_template)); - handler = (void *)err_unwind_jit; +#if LJ_ABI_PAUTH +#if LJ_TARGET_ARM64 + handler = ptrauth_auth_and_resign(handler, + ptrauth_key_function_pointer, 0, + ptrauth_key_process_independent_code, info + ERR_FRAME_JIT_OFS_HANDLER); +#else +#error "missing pointer authentication support for this architecture" +#endif +#endif memcpy(info + ERR_FRAME_JIT_OFS_HANDLER, &handler, sizeof(handler)); *(uint32_t *)(info + ERR_FRAME_JIT_OFS_CODE_SIZE) = (uint32_t)(sz - sizeof(err_frame_jit_template) - (info - (uint8_t *)base)); @@ -875,6 +883,10 @@ LJ_NORET LJ_NOINLINE static void err_msgv(lua_State *L, ErrMsg em, ...) const char *msg; va_list argp; va_start(argp, em); + if (LJ_HASJIT) { + TValue *base = tvref(G(L)->jit_base); + if (base) L->base = base; + } if (curr_funcisL(L)) L->top = curr_topL(L); msg = lj_strfmt_pushvf(L, err2msg(em), argp); va_end(argp); diff --git a/src/lj_jit.h b/src/lj_jit.h index 7f081730..0fae60ad 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -273,6 +273,9 @@ typedef struct GCtrace { BCIns startins; /* Original bytecode of starting instruction. */ MSize szmcode; /* Size of machine code. */ MCode *mcode; /* Start of machine code. */ +#if LJ_ABI_PAUTH + ASMFunction mcauth; /* Start of machine code, with ptr auth applied. */ +#endif MSize mcloop; /* Offset of loop start in machine code. */ uint16_t nchild; /* Number of child traces (root trace only). */ uint16_t spadjust; /* Stack pointer adjustment (offset in bytes). */ diff --git a/src/lj_obj.h b/src/lj_obj.h index e541387f..52c7bc03 100644 --- a/src/lj_obj.h +++ b/src/lj_obj.h @@ -1042,4 +1042,18 @@ LJ_DATA const char *const lj_obj_itypename[~LJ_TNUMX+1]; LJ_FUNC int LJ_FASTCALL lj_obj_equal(cTValue *o1, cTValue *o2); LJ_FUNC const void * LJ_FASTCALL lj_obj_ptr(global_State *g, cTValue *o); +#if LJ_ABI_PAUTH +#if LJ_TARGET_ARM64 +#include +#define lj_ptr_sign(ptr, ctx) \ + ptrauth_sign_unauthenticated((ptr), ptrauth_key_function_pointer, (ctx)) +#define lj_ptr_strip(ptr) ptrauth_strip((ptr), ptrauth_key_function_pointer) +#else +#error "No support for pointer authentication for this architecture" +#endif +#else +#define lj_ptr_sign(ptr, ctx) (ptr) +#define lj_ptr_strip(ptr) (ptr) +#endif + #endif diff --git a/src/lj_parse.c b/src/lj_parse.c index fd3688df..8e0e9113 100644 --- a/src/lj_parse.c +++ b/src/lj_parse.c @@ -2518,6 +2518,7 @@ static int predict_next(LexState *ls, FuncState *fs, BCPos pc) cTValue *o; switch (bc_op(ins)) { case BC_MOV: + if (bc_d(ins) >= fs->nactvar) return 0; name = gco2str(gcref(var_get(ls, fs, bc_d(ins)).name)); break; case BC_UGET: diff --git a/src/lj_record.c b/src/lj_record.c index 33767405..3542e451 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -1942,8 +1942,12 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */ ptrdiff_t i; if (nvararg < 0) nvararg = 0; - if (nresults == -1) nresults = nvararg; - J->maxslot = dst + (BCReg)nresults; + if (nresults != 1) { + if (nresults == -1) nresults = nvararg; + J->maxslot = dst + (BCReg)nresults; + } else if (dst >= J->maxslot) { + J->maxslot = dst + 1; + } if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS) lj_trace_err(J, LJ_TRERR_STACKOV); for (i = 0; i < nresults; i++) @@ -1973,7 +1977,9 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) } for (i = nvararg; i < nresults; i++) J->base[dst+i] = TREF_NIL; - J->maxslot = dst + (BCReg)nresults; + if (nresults != 1 || dst >= J->maxslot) { + J->maxslot = dst + (BCReg)nresults; + } } else if (select_detect(J)) { /* y = select(x, ...) */ TRef tridx = J->base[dst-1]; TRef tr = TREF_NIL; diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h index d45af2e4..c9c6b80f 100644 --- a/src/lj_target_arm64.h +++ b/src/lj_target_arm64.h @@ -260,6 +260,9 @@ typedef enum A64Ins { A64I_CBZ = 0x34000000, A64I_CBNZ = 0x35000000, + A64I_BRAAZ = 0xd61f081f, + A64I_BLRAAZ = 0xd63f081f, + A64I_NOP = 0xd503201f, /* FP */ @@ -317,6 +320,9 @@ typedef enum A64Ins { A64I_FMOV_DI = 0x1e601000, } A64Ins; +#define A64I_BR_AUTH (LJ_ABI_PAUTH ? A64I_BRAAZ : A64I_BR) +#define A64I_BLR_AUTH (LJ_ABI_PAUTH ? A64I_BLRAAZ : A64I_BLR) + typedef enum A64Shift { A64SH_LSL, A64SH_LSR, A64SH_ASR, A64SH_ROR } A64Shift; diff --git a/src/lj_trace.c b/src/lj_trace.c index c2329394..e019a79f 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -153,6 +153,9 @@ static void trace_save(jit_State *J, GCtrace *T) newwhite(J2G(J), T); T->gct = ~LJ_TTRACE; T->ir = (IRIns *)p - J->cur.nk; /* The IR has already been copied above. */ +#if LJ_ABI_PAUTH + T->mcauth = lj_ptr_sign((ASMFunction)T->mcode, T); +#endif p += szins; TRACE_APPENDVEC(snap, nsnap, SnapShot) TRACE_APPENDVEC(snapmap, nsnapmap, SnapEntry) @@ -428,6 +431,12 @@ static void trace_start(jit_State *J) return; } + /* Ensuring forward progress for BC_ITERN can trigger hotcount again. */ + if (!J->parent && bc_op(*J->pc) == BC_JLOOP) { /* Already compiled. */ + J->state = LJ_TRACE_IDLE; /* Silently ignored. */ + return; + } + /* Get a new trace number. */ traceno = trace_findfree(J); if (LJ_UNLIKELY(traceno == 0)) { /* No free trace? */ @@ -864,7 +873,7 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) ExitDataCP exd; int errcode, exitcode = J->exitcode; TValue exiterr; - const BCIns *pc; + const BCIns *pc, *retpc; void *cf; GCtrace *T; @@ -916,22 +925,7 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) } else { trace_hotside(J, pc); } - if (bc_op(*pc) == BC_JLOOP) { - BCIns *retpc = &traceref(J, bc_d(*pc))->startins; - int isret = bc_isret(bc_op(*retpc)); - if (isret || bc_op(*retpc) == BC_ITERN) { - if (J->state == LJ_TRACE_RECORD) { - J->patchins = *pc; - J->patchpc = (BCIns *)pc; - *J->patchpc = *retpc; - J->bcskip = 1; - } else if (isret) { - pc = retpc; - setcframe_pc(cf, pc); - } - } - } - /* Return MULTRES or 0. */ + /* Return MULTRES or 0 or -17. */ ERRNO_RESTORE switch (bc_op(*pc)) { case BC_CALLM: case BC_CALLMT: @@ -940,6 +934,18 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) return (int)((BCReg)(L->top - L->base) + 1 - bc_a(*pc) - bc_d(*pc)); case BC_TSETM: return (int)((BCReg)(L->top - L->base) + 1 - bc_a(*pc)); + case BC_JLOOP: + retpc = &traceref(J, bc_d(*pc))->startins; + if (bc_isret(bc_op(*retpc)) || bc_op(*retpc) == BC_ITERN) { + /* Dispatch to original ins to ensure forward progress. */ + if (J->state != LJ_TRACE_RECORD) return -17; + /* Unpatch bytecode when recording. */ + J->patchins = *pc; + J->patchpc = (BCIns *)pc; + *J->patchpc = *retpc; + J->bcskip = 1; + } + return 0; default: if (bc_op(*pc) >= BC_FUNCF) return (int)((BCReg)(L->top - L->base) + 1); diff --git a/src/lj_vm.h b/src/lj_vm.h index c66db004..c7095941 100644 --- a/src/lj_vm.h +++ b/src/lj_vm.h @@ -54,8 +54,8 @@ LJ_ASMF void lj_vm_profhook(void); LJ_ASMF void lj_vm_IITERN(void); /* Trace exit handling. */ -LJ_ASMF void lj_vm_exit_handler(void); -LJ_ASMF void lj_vm_exit_interp(void); +LJ_ASMF char lj_vm_exit_handler[]; +LJ_ASMF char lj_vm_exit_interp[]; /* Internal math helper functions. */ #if LJ_TARGET_PPC || LJ_TARGET_ARM64 || (LJ_TARGET_MIPS && LJ_ABI_SOFTFP) @@ -111,6 +111,6 @@ LJ_ASMF void lj_cont_stitch(void); /* Trace stitching. */ LJ_ASMF char lj_vm_asm_begin[]; /* Bytecode offsets are relative to lj_vm_asm_begin. */ -#define makeasmfunc(ofs) ((ASMFunction)(lj_vm_asm_begin + (ofs))) +#define makeasmfunc(ofs) lj_ptr_sign((ASMFunction)(lj_vm_asm_begin + (ofs)), 0) #endif diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc index 770c1602..4df4b488 100644 --- a/src/vm_arm.dasc +++ b/src/vm_arm.dasc @@ -2196,8 +2196,8 @@ static void build_subroutines(BuildCtx *ctx) |.if JIT | ldr L, SAVE_L |1: - | cmp CARG1, #0 - | blt >9 // Check for error from exit. + | cmn CARG1, #LUA_ERRERR + | bhs >9 // Check for error from exit. | lsl RC, CARG1, #3 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] | str RC, SAVE_MULTRES @@ -2213,6 +2213,8 @@ static void build_subroutines(BuildCtx *ctx) | ldr INS, [PC], #4 | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. | st_vmstate CARG4 + | cmn CARG1, #17 // Static dispatch? + | beq >5 | cmp OP, #BC_FUNCC+2 // Fast function? | bhs >4 |2: @@ -2238,6 +2240,17 @@ static void build_subroutines(BuildCtx *ctx) | ldr KBASE, [CARG3, #PC2PROTO(k)] | b <2 | + |5: // Dispatch to static entry of original ins replaced by BC_JLOOP. + | ldr CARG1, [DISPATCH, #DISPATCH_J(trace)] + | decode_RD RC, INS + | ldr TRACE:CARG1, [CARG1, RC, lsl #2] + | ldr INS, TRACE:CARG1->startins + | decode_OP OP, INS + | decode_RA8 RA, INS + | add OP, DISPATCH, OP, lsl #2 + | decode_RD RC, INS + | ldr pc, [OP, #GG_DISP2STATIC] + | |9: // Rethrow error from the right C frame. | rsb CARG2, CARG1, #0 | mov CARG1, L diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index 36a036ae..effb8d91 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc @@ -77,6 +77,23 @@ |.define CRET1, x0 |.define CRET1w, w0 | +|//----------------------------------------------------------------------- +| +|// ARM64e pointer authentication codes (PAC). +|.if PAUTH +|.macro sp_auth; pacibsp; .endmacro +|.macro br_auth, reg; braaz reg; .endmacro +|.macro blr_auth, reg; blraaz reg; .endmacro +|.macro ret_auth; retab; .endmacro +|.else +|.macro sp_auth; .endmacro +|.macro br_auth, reg; br reg; .endmacro +|.macro blr_auth, reg; blr reg; .endmacro +|.macro ret_auth; ret; .endmacro +|.endif +| +|//----------------------------------------------------------------------- +| |// Stack layout while in interpreter. Must match with lj_frame.h. | |.define CFRAME_SPACE, 208 @@ -106,6 +123,7 @@ |.endmacro | |.macro saveregs +| sp_auth | sub sp, sp, # CFRAME_SPACE | stp fp, lr, [sp, # SAVE_FP_LR_] | add fp, sp, # SAVE_FP_LR_ @@ -180,7 +198,7 @@ | decode_RA RA, INS | ldr TMP0, [TMP1, #GG_G2DISP] | decode_RD RC, INS -| br TMP0 +| br_auth TMP0 |.endmacro | |// Instruction footer. @@ -209,7 +227,7 @@ | decode_RA RA, INS | ldr TMP0, [TMP1, #GG_G2DISP] | add RA, BASE, RA, lsl #3 -| br TMP0 +| br_auth TMP0 |.endmacro | |.macro ins_call @@ -356,7 +374,7 @@ static void build_subroutines(BuildCtx *ctx) | |->vm_leave_unw: | restoreregs - | ret + | ret_auth | |6: | bgt >7 // Less results wanted? @@ -542,7 +560,7 @@ static void build_subroutines(BuildCtx *ctx) | str RC, SAVE_CFRAME | str TMP0, L->cframe // Add our C frame to cframe chain. | str L, GL->cur_L - | blr CARG4 // (lua_State *L, lua_CFunction func, void *ud) + | blr_auth CARG4 // (lua_State *L, lua_CFunction func, void *ud) | mov BASE, CRET1 | mov PC, #FRAME_CP | cbnz BASE, <3 // Else continue with the call. @@ -573,7 +591,7 @@ static void build_subroutines(BuildCtx *ctx) | ldr CARG3, LFUNC:CARG3->pc | ldr KBASE, [CARG3, #PC2PROTO(k)] | // BASE = base, RA = resultptr, CARG4 = meta base - | br CARG1 + | br_auth CARG1 | |.if FFI |1: @@ -1707,7 +1725,7 @@ static void build_subroutines(BuildCtx *ctx) | cmp TMP1, TMP2 | mov CARG1, L | bhi >5 // Need to grow stack. - | blr CARG3 // (lua_State *L) + | blr_auth CARG3 // (lua_State *L) | // Either throws an error, or recovers and returns -1, 0 or nresults+1. | ldr BASE, L->base | cmp CRET1w, #0 @@ -1743,6 +1761,7 @@ static void build_subroutines(BuildCtx *ctx) | |->fff_gcstep: // Call GC step function. | // BASE = new base, RC = nargs*8 + | sp_auth | add CARG2, BASE, NARGS8:RC // Calculate L->top. | mov RA, lr | stp BASE, CARG2, L->base @@ -1754,7 +1773,7 @@ static void build_subroutines(BuildCtx *ctx) | mov lr, RA // Help return address predictor. | sub NARGS8:RC, CARG2, BASE // Calculate nargs*8. | and CFUNC:CARG3, CARG3, #LJ_GCVMASK - | ret + | ret_auth | |//----------------------------------------------------------------------- |//-- Special dispatch targets ------------------------------------------- @@ -1781,7 +1800,7 @@ static void build_subroutines(BuildCtx *ctx) | tbz TMP2w, #HOOK_ACTIVE_SHIFT, >1 // Hook already active? |5: // Re-dispatch to static ins. | ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC] - | br TMP0 + | br_auth TMP0 | |->vm_inshook: // Dispatch target for instr/line hooks. | ldrb TMP2w, GL->hookmask @@ -1807,7 +1826,7 @@ static void build_subroutines(BuildCtx *ctx) | decode_RA RA, INS | ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC] | decode_RD RC, INS - | br TMP0 + | br_auth TMP0 | |->cont_hook: // Continue from hook yield. | ldr CARG1, [CARG4, #-40] @@ -1857,7 +1876,7 @@ static void build_subroutines(BuildCtx *ctx) | sub NARGS8:RC, TMP1, BASE | ldr INSw, [PC, #-4] | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - | br CRET1 + | br_auth CRET1 | |->cont_stitch: // Trace stitching. |.if JIT @@ -1986,8 +2005,8 @@ static void build_subroutines(BuildCtx *ctx) |.if JIT | ldr L, SAVE_L |1: - | cmp CARG1w, #0 - | blt >9 // Check for error from exit. + | cmn CARG1w, #LUA_ERRERR + | bhs >9 // Check for error from exit. | lsl RC, CARG1, #3 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 @@ -2004,6 +2023,8 @@ static void build_subroutines(BuildCtx *ctx) | ldrb RBw, [PC, # OFS_OP] | ldr INSw, [PC], #4 | st_vmstate CARG4w + | cmn CARG1w, #17 // Static dispatch? + | beq >5 | cmp RBw, #BC_FUNCC+2 // Fast function? | add TMP1, GL, INS, uxtb #3 | bhs >4 @@ -2014,13 +2035,13 @@ static void build_subroutines(BuildCtx *ctx) | decode_RA RA, INS | lsr TMP0, INS, #16 | csel RC, TMP0, RC, lo - | blo >5 + | blo >3 | ldr CARG3, [BASE, FRAME_FUNC] | sub RC, RC, #8 | add RA, BASE, RA, lsl #3 // Yes: RA = BASE+framesize*8, RC = nargs*8 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - |5: - | br RB + |3: + | br_auth RB | |4: // Check frame below fast function. | ldr CARG1, [BASE, FRAME_PC] @@ -2036,6 +2057,17 @@ static void build_subroutines(BuildCtx *ctx) | ldr KBASE, [CARG3, #PC2PROTO(k)] | b <2 | + |5: // Dispatch to static entry of original ins replaced by BC_JLOOP. + | ldr RA, [GL, #GL_J(trace)] + | decode_RD RC, INS + | ldr TRACE:RA, [RA, RC, lsl #3] + | ldr INSw, TRACE:RA->startins + | add TMP0, GL, INS, uxtb #3 + | decode_RA RA, INS + | ldr RB, [TMP0, #GG_G2DISP+GG_DISP2STATIC] + | decode_RD RC, INS + | br_auth RB + | |9: // Rethrow error from the right C frame. | neg CARG2w, CARG1w | mov CARG1, L @@ -2182,6 +2214,7 @@ static void build_subroutines(BuildCtx *ctx) | // Caveat: needs special frame unwinding, see below. |.if FFI | .type CCSTATE, CCallState, x19 + | sp_auth | stp x20, CCSTATE, [sp, #-32]! | stp fp, lr, [sp, #16] | add fp, sp, #16 @@ -2208,14 +2241,14 @@ static void build_subroutines(BuildCtx *ctx) | ldp x6, x7, CCSTATE->gpr[6] | ldp d6, d7, CCSTATE->fpr[6] | ldr x8, CCSTATE->retp - | blr TMP3 + | blr_auth TMP3 | sub sp, fp, #16 | stp x0, x1, CCSTATE->gpr[0] | stp d0, d1, CCSTATE->fpr[0] | stp d2, d3, CCSTATE->fpr[2] | ldp fp, lr, [sp, #16] | ldp x20, CCSTATE, [sp], #32 - | ret + | ret_auth |.endif |// Note: vm_ffi_call must be the last function in this object file! | @@ -3786,12 +3819,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | mov CARG2w, #0 // Traces on ARM64 don't store the trace #, so use 0. | ldr TRACE:RC, [CARG1, RC, lsl #3] | st_vmstate CARG2w + |.if PAUTH + | ldr RA, TRACE:RC->mcauth + |.else | ldr RA, TRACE:RC->mcode + |.endif | str BASE, GL->jit_base | str L, GL->tmpbuf.L | sub sp, sp, #16 // See SPS_FIXED. Avoids sp adjust in every root trace. + |.if PAUTH + | braa RA, RC + |.else | br RA |.endif + |.endif break; case BC_JMP: @@ -3901,7 +3942,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | mov CARG1, L | bhi ->vm_growstack_c // Need to grow stack. | st_vmstate TMP0w - | blr CARG4 // (lua_State *L [, lua_CFunction f]) + | blr_auth CARG4 // (lua_State *L [, lua_CFunction f]) | // Returns nresults. | ldp BASE, TMP1, L->base | str L, GL->cur_L diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc index 34645bf1..bfdcfc1e 100644 --- a/src/vm_mips.dasc +++ b/src/vm_mips.dasc @@ -2466,7 +2466,8 @@ static void build_subroutines(BuildCtx *ctx) | addiu DISPATCH, JGL, -GG_DISP2G-32768 | sw BASE, L->base |1: - | bltz CRET1, >9 // Check for error from exit. + | sltiu TMP0, CRET1, -LUA_ERRERR // Check for error from exit. + | beqz TMP0, >9 |. lw LFUNC:RB, FRAME_FUNC(BASE) | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | sll MULTRES, CRET1, 3 @@ -2480,14 +2481,16 @@ static void build_subroutines(BuildCtx *ctx) | .FPU cvt.d.s TOBIT, TOBIT | // Modified copy of ins_next which handles function header dispatch, too. | lw INS, 0(PC) - | addiu PC, PC, 4 + | addiu CRET1, CRET1, 17 // Static dispatch? | // Assumes TISNIL == ~LJ_VMST_INTERP == -1 | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) + | decode_RD8a RD, INS + | beqz CRET1, >5 + |. addiu PC, PC, 4 | decode_OP4a TMP1, INS | decode_OP4b TMP1 - | sltiu TMP2, TMP1, BC_FUNCF*4 | addu TMP0, DISPATCH, TMP1 - | decode_RD8a RD, INS + | sltiu TMP2, TMP1, BC_FUNCF*4 | lw AT, 0(TMP0) | decode_RA8a RA, INS | beqz TMP2, >2 @@ -2515,6 +2518,22 @@ static void build_subroutines(BuildCtx *ctx) | jr AT |. addu RA, RA, BASE | + |5: // Dispatch to static entry of original ins replaced by BC_JLOOP. + | lw TMP0, DISPATCH_J(trace)(DISPATCH) + | decode_RD4b RD + | addu TMP0, TMP0, RD + | lw TRACE:TMP2, 0(TMP0) + | lw INS, TRACE:TMP2->startins + | decode_OP4a TMP1, INS + | decode_OP4b TMP1 + | addu TMP0, DISPATCH, TMP1 + | decode_RD8a RD, INS + | lw AT, GG_DISP2STATIC(TMP0) + | decode_RA8a RA, INS + | decode_RD8b RD + | jr AT + |. decode_RA8b RA + | |9: // Rethrow error from the right C frame. | load_got lj_err_trace | sub CARG2, r0, CRET1 diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc index 651bc42e..801087b3 100644 --- a/src/vm_mips64.dasc +++ b/src/vm_mips64.dasc @@ -2571,7 +2571,8 @@ static void build_subroutines(BuildCtx *ctx) | daddiu DISPATCH, JGL, -GG_DISP2G-32768 | sd BASE, L->base |1: - | bltz CRET1, >9 // Check for error from exit. + | sltiu TMP0, CRET1, -LUA_ERRERR // Check for error from exit. + | beqz TMP0, >9 |. ld LFUNC:RB, FRAME_FUNC(BASE) | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | dsll MULTRES, CRET1, 3 @@ -2586,14 +2587,16 @@ static void build_subroutines(BuildCtx *ctx) | .FPU cvt.d.s TOBIT, TOBIT | // Modified copy of ins_next which handles function header dispatch, too. | lw INS, 0(PC) - | daddiu PC, PC, 4 + | addiu CRET1, CRET1, 17 // Static dispatch? | // Assumes TISNIL == ~LJ_VMST_INTERP == -1 | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) + | decode_RD8a RD, INS + | beqz CRET1, >5 + |. daddiu PC, PC, 4 | decode_OP8a TMP1, INS | decode_OP8b TMP1 - | sltiu TMP2, TMP1, BC_FUNCF*8 | daddu TMP0, DISPATCH, TMP1 - | decode_RD8a RD, INS + | sltiu TMP2, TMP1, BC_FUNCF*8 | ld AT, 0(TMP0) | decode_RA8a RA, INS | beqz TMP2, >2 @@ -2622,6 +2625,22 @@ static void build_subroutines(BuildCtx *ctx) | jr AT |. daddu RA, RA, BASE | + |5: // Dispatch to static entry of original ins replaced by BC_JLOOP. + | ld TMP0, DISPATCH_J(trace)(DISPATCH) + | decode_RD8b RD + | daddu TMP0, TMP0, RD + | ld TRACE:TMP2, 0(TMP0) + | lw INS, TRACE:TMP2->startins + | decode_OP8a TMP1, INS + | decode_OP8b TMP1 + | daddu TMP0, DISPATCH, TMP1 + | decode_RD8a RD, INS + | ld AT, GG_DISP2STATIC(TMP0) + | decode_RA8a RA, INS + | decode_RD8b RD + | jr AT + |. decode_RA8b RA + | |9: // Rethrow error from the right C frame. | load_got lj_err_trace | sub CARG2, r0, CRET1 diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc index 3cad37d2..73d60ae4 100644 --- a/src/vm_ppc.dasc +++ b/src/vm_ppc.dasc @@ -3015,8 +3015,9 @@ static void build_subroutines(BuildCtx *ctx) | addi DISPATCH, JGL, -GG_DISP2G-32768 | stp BASE, L->base |1: - | cmpwi CARG1, 0 - | blt >9 // Check for error from exit. + | li TMP2, -LUA_ERRERR + | cmplw CARG1, TMP2 + | bge >9 // Check for error from exit. | lwz LFUNC:RB, FRAME_FUNC(BASE) | slwi MULTRES, CARG1, 3 | li TMP2, 0 @@ -3041,6 +3042,8 @@ static void build_subroutines(BuildCtx *ctx) | addi PC, PC, 4 | // Assumes TISNIL == ~LJ_VMST_INTERP == -1. | stw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) + | cmpwi CARG1, -17 // Static dispatch? + | beq >5 | decode_OPP TMP1, INS | decode_RA8 RA, INS | lpx TMP0, DISPATCH, TMP1 @@ -3070,6 +3073,21 @@ static void build_subroutines(BuildCtx *ctx) | add RA, RA, BASE | bctr | + |5: // Dispatch to static entry of original ins replaced by BC_JLOOP. + | lwz TMP1, DISPATCH_J(trace)(DISPATCH) + | decode_RD4 RD, INS + | lwzx TRACE:TMP1, TMP1, RD + | lwz INS, TRACE:TMP1->startins + | decode_OPP TMP1, INS + | addi TMP1, TMP1, GG_DISP2STATIC + | lpx TMP0, DISPATCH, TMP1 + | mtctr TMP0 + | decode_RB8 RB, INS + | decode_RD8 RD, INS + | decode_RA8 RA, INS + | decode_RC8 RC, INS + | bctr + | |9: // Rethrow error from the right C frame. | neg CARG2, CARG1 | mr CARG1, L diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index 03d96557..5983eeed 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc @@ -2453,7 +2453,7 @@ static void build_subroutines(BuildCtx *ctx) | mov r12, [RA] | mov rsp, RA // Reposition stack to C frame. |.endif - | test RDd, RDd; js >9 // Check for error from exit. + | cmp RDd, -LUA_ERRERR; jae >9 // Check for error from exit. | mov L:RB, SAVE_L | mov MULTRES, RDd | mov LFUNC:KBASE, [BASE-16] @@ -2469,6 +2469,8 @@ static void build_subroutines(BuildCtx *ctx) | movzx OP, RCL | add PC, 4 | shr RCd, 16 + | cmp MULTRES, -17 // Static dispatch? + | je >5 | cmp OP, BC_FUNCF // Function header? | jb >3 | cmp OP, BC_FUNCC+2 // Fast function? @@ -2491,6 +2493,15 @@ static void build_subroutines(BuildCtx *ctx) | mov KBASE, [KBASE+PC2PROTO(k)] | jmp <2 | + |5: // Dispatch to static entry of original ins replaced by BC_JLOOP. + | mov RA, [DISPATCH+DISPATCH_J(trace)] + | mov TRACE:RA, [RA+RD*8] + | mov RCd, TRACE:RA->startins + | movzx RAd, RCH + | movzx OP, RCL + | shr RCd, 16 + | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] + | |9: // Rethrow error from the right C frame. | mov CARG2d, RDd | mov CARG1, L:RB diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index 18ca87b5..f7847762 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc @@ -2902,7 +2902,7 @@ static void build_subroutines(BuildCtx *ctx) | mov r13, TMPa | mov r12, TMPQ |.endif - | test RD, RD; js >9 // Check for error from exit. + | cmp RD, -LUA_ERRERR; jae >9 // Check for error from exit. | mov L:RB, SAVE_L | mov MULTRES, RD | mov LFUNC:KBASE, [BASE-8] @@ -2917,6 +2917,8 @@ static void build_subroutines(BuildCtx *ctx) | movzx OP, RCL | add PC, 4 | shr RC, 16 + | cmp MULTRES, -17 // Static dispatch? + | je >5 | cmp OP, BC_FUNCF // Function header? | jb >3 | cmp OP, BC_FUNCC+2 // Fast function? @@ -2942,6 +2944,19 @@ static void build_subroutines(BuildCtx *ctx) | mov KBASE, [KBASE+PC2PROTO(k)] | jmp <2 | + |5: // Dispatch to static entry of original ins replaced by BC_JLOOP. + | mov RA, [DISPATCH+DISPATCH_J(trace)] + | mov TRACE:RA, [RA+RD*4] + | mov RC, TRACE:RA->startins + | movzx RA, RCH + | movzx OP, RCL + | shr RC, 16 + |.if X64 + | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] + |.else + | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] + |.endif + | |9: // Rethrow error from the right C frame. | mov FCARG2, RD | mov FCARG1, L:RB