From 19707009bfb8d1fe59a5c328034e8e8ad1b56232 Mon Sep 17 00:00:00 2001
From: Mike Pall
-For MSVC for Windows 64 bit this requires compilation of your C++ code
-with /EHa.
-Open a "Visual Studio Command Prompt" (either x86 or x64), cd to the +Open a "Visual Studio Command Prompt" (x86, x64 or ARM64), cd to the directory with the source code and run these commands:
From 42ca6e120feebca85f1618da1c80cfa80b1d63ca Mon Sep 17 00:00:00 2001 From: Mike PallDate: Sun, 17 Sep 2023 10:09:58 +0200 Subject: [PATCH 37/95] ARM64: Set fixed interpreter registers before rethrow. Thanks to Peter Cawley. #593 --- src/vm_arm64.dasc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index 34d29982..61a3ba6d 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc @@ -2029,13 +2029,13 @@ static void build_subroutines(BuildCtx *ctx) |.if JIT | ldr L, SAVE_L |1: + | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 + | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 + | movn TISNIL, #0 | cmn CARG1w, #LUA_ERRERR | bhs >9 // Check for error from exit. - | lsl RC, CARG1, #3 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 - | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 - | movn TISNIL, #0 + | lsl RC, CARG1, #3 | and LFUNC:CARG2, CARG2, #LJ_GCVMASK | str RCw, SAVE_MULTRES | str BASE, L->base From 7a2b83a0c5d980bf3db0aeda33c79e7bb4b3da01 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 17 Sep 2023 10:31:00 +0200 Subject: [PATCH 38/95] IR_MIN/IR_MAX is non-commutative due to underlying FPU ops. Thanks to Peter Cawley. #1082 --- src/lj_ir.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lj_ir.h b/src/lj_ir.h index b32bd095..dbfba258 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h @@ -76,8 +76,8 @@ \ _(ABS, N , ref, ref) \ _(LDEXP, N , ref, ref) \ - _(MIN, C , ref, ref) \ - _(MAX, C , ref, ref) \ + _(MIN, N , ref, ref) \ + _(MAX, N , ref, ref) \ _(FPMATH, N , ref, lit) \ \ /* Overflow-checking arithmetic ops. */ \ From e897c5743f97a6b05c59852709092e7da4119914 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 17 Sep 2023 10:44:04 +0200 Subject: [PATCH 39/95] Windows/ARM64: Add MSVC cross-build support for x64 to ARM64. Thanks to invertego. #1081 --- doc/install.html | 3 +++ src/msvcbuild.bat | 15 ++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/doc/install.html b/doc/install.html index 2c685c85..04bfe26d 100644 --- a/doc/install.html +++ b/doc/install.html @@ -214,6 +214,9 @@ msvcbuild Check the msvcbuild.bat file for more options. Then follow the installation instructions below. + +For an x64 to ARM64 cross-build run this first: vcvarsall.bat x64_arm64 +
Building with MinGW or Cygwin
Open a command prompt window and make sure the MinGW or Cygwin programs diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat index 2cfcf26e..cd25beee 100644 --- a/src/msvcbuild.bat +++ b/src/msvcbuild.bat @@ -27,12 +27,15 @@ @set BUILDTYPE=release @set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c +@setlocal +@call :SETHOSTVARS %LJCOMPILE% host\minilua.c @if errorlevel 1 goto :BAD %LJLINK% /out:minilua.exe minilua.obj @if errorlevel 1 goto :BAD if exist minilua.exe.manifest^ %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe +@endlocal @set DASMFLAGS=-D WIN -D JIT -D FFI -D ENDIAN_LE -D FPU -D P64 @set LJARCH=x64 @@ -46,6 +49,7 @@ if exist minilua.exe.manifest^ :NO32 @if "%VSCMD_ARG_TGT_ARCH%" neq "arm64" goto :X64 @set DASC=vm_arm64.dasc +@set DASMTARGET=-D LUAJIT_TARGET=LUAJIT_ARCH_ARM64 @set LJARCH=arm64 @goto :DA :X64 @@ -60,12 +64,15 @@ minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC% if exist ..\.git ( git show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt ) minilua host\genversion.lua -%LJCOMPILE% /I "." /I %DASMDIR% host\buildvm*.c +@setlocal +@call :SETHOSTVARS +%LJCOMPILE% /I "." /I %DASMDIR% %DASMTARGET% host\buildvm*.c @if errorlevel 1 goto :BAD %LJLINK% /out:buildvm.exe buildvm*.obj @if errorlevel 1 goto :BAD if exist buildvm.exe.manifest^ %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe +@endlocal buildvm -m peobj -o lj_vm.obj @if errorlevel 1 goto :BAD @@ -124,6 +131,12 @@ if exist luajit.exe.manifest^ @echo. @echo === Successfully built LuaJIT for Windows/%LJARCH% === +@goto :END +:SETHOSTVARS +@if "%VSCMD_ARG_HOST_ARCH%_%VSCMD_ARG_TGT_ARCH%" equ "x64_arm64" ( + call "%VSINSTALLDIR%Common7\Tools\VsDevCmd.bat" -arch=%VSCMD_ARG_HOST_ARCH% -no_logo + echo on +) @goto :END :BAD @echo. From d2f6c55b05c716e5dbb479b7e684abaee7cf6e12 Mon Sep 17 00:00:00 2001 From: Mike Pall
Date: Thu, 21 Sep 2023 01:58:43 +0200 Subject: [PATCH 40/95] Cleanup stack overflow handling. Reported by Peter Cawley. #962 --- src/lj_state.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/lj_state.c b/src/lj_state.c index d7befaff..1a3473b4 100644 --- a/src/lj_state.c +++ b/src/lj_state.c @@ -97,8 +97,17 @@ void lj_state_shrinkstack(lua_State *L, MSize used) void LJ_FASTCALL lj_state_growstack(lua_State *L, MSize need) { MSize n; - if (L->stacksize > LJ_STACK_MAXEX) /* Overflow while handling overflow? */ - lj_err_throw(L, LUA_ERRERR); + if (L->stacksize >= LJ_STACK_MAXEX) { + /* 4. Throw 'error in error handling' when we are _over_ the limit. */ + if (L->stacksize > LJ_STACK_MAXEX) + lj_err_throw(L, LUA_ERRERR); /* Does not invoke an error handler. */ + /* 1. We are _at_ the limit after the last growth. */ + if (!L->status) { /* 2. Throw 'stack overflow'. */ + L->status = LUA_ERRRUN; /* Prevent ending here again for pushed msg. */ + lj_err_msg(L, LJ_ERR_STKOV); /* May invoke an error handler. */ + } + /* 3. Add space (over the limit) for pushed message and error handler. */ + } n = L->stacksize + need; if (n > LJ_STACK_MAX) { n += 2*LUA_MINSTACK; @@ -108,8 +117,6 @@ void LJ_FASTCALL lj_state_growstack(lua_State *L, MSize need) n = LJ_STACK_MAX; } resizestack(L, n); - if (L->stacksize >= LJ_STACK_MAXEX) - lj_err_msg(L, LJ_ERR_STKOV); } void LJ_FASTCALL lj_state_growstack1(lua_State *L) From 92b89d005ab721a61bce6d471b052bcb236b81d7 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 21 Sep 2023 02:10:18 +0200 Subject: [PATCH 41/95] Add missing coercion when recording select(string, ...) Thanks to Peter Cawley. #1083 --- src/lj_record.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/lj_record.c b/src/lj_record.c index dfcc3f65..a49f942a 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -1570,8 +1570,11 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) TRef tr = TREF_NIL; ptrdiff_t idx = lj_ffrecord_select_mode(J, tridx, &J->L->base[dst-1]); if (idx < 0) goto nyivarg; - if (idx != 0 && !tref_isinteger(tridx)) + if (idx != 0 && !tref_isinteger(tridx)) { + if (tref_isstr(tridx)) + tridx = emitir(IRTG(IR_STRTO, IRT_NUM), tridx, 0); tridx = emitir(IRTGI(IR_CONV), tridx, IRCONV_INT_NUM|IRCONV_INDEX); + } if (idx != 0 && tref_isk(tridx)) { emitir(IRTGI(idx <= nvararg ? IR_GE : IR_LT), fr, lj_ir_kint(J, frofs+8*(int32_t)idx)); From b138ccfa918518a152bc830fef3d53cd0a922e36 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 21 Sep 2023 02:15:16 +0200 Subject: [PATCH 42/95] Handle all stack layouts in (delayed) TRACE vmevent. Thanks to Sergey Bronnikov and Peter Cawley. #1087 --- src/lj_trace.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/src/lj_trace.c b/src/lj_trace.c index a72e73a3..25e610b5 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -524,21 +524,27 @@ static int trace_abort(jit_State *J) J->cur.link = 0; J->cur.linktype = LJ_TRLINK_NONE; lj_vmevent_send(L, TRACE, - TValue *frame; + cTValue *bot = tvref(L->stack); + cTValue *frame; const BCIns *pc; - GCfunc *fn; + BCPos pos = 0; setstrV(L, L->top++, lj_str_newlit(L, "abort")); setintV(L->top++, traceno); /* Find original Lua function call to generate a better error message. */ - frame = J->L->base-1; - pc = J->pc; - while (!isluafunc(frame_func(frame))) { - pc = (frame_iscont(frame) ? frame_contpc(frame) : frame_pc(frame)) - 1; - frame = frame_prev(frame); + for (frame = J->L->base-1, pc = J->pc; ; frame = frame_prev(frame)) { + if (isluafunc(frame_func(frame))) { + pos = proto_bcpos(funcproto(frame_func(frame)), pc); + break; + } else if (frame_prev(frame) <= bot) { + break; + } else if (frame_iscont(frame)) { + pc = frame_contpc(frame) - 1; + } else { + pc = frame_pc(frame) - 1; + } } - fn = frame_func(frame); - setfuncV(L, L->top++, fn); - setintV(L->top++, proto_bcpos(funcproto(fn), pc)); + setfuncV(L, L->top++, frame_func(frame)); + setintV(L->top++, pos); copyTV(L, L->top++, restorestack(L, errobj)); copyTV(L, L->top++, &J->errinfo); ); From fca1f51bf8209a41f8d7cd13ff09f113ac0d87b6 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 21 Sep 2023 02:38:29 +0200 Subject: [PATCH 43/95] ARM64: Fuse negative 32 bit constants into arithmetic ops again. Thanks to Peter Cawley. #1065 --- src/lj_asm_arm64.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index c2b17737..8673f7df 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -222,7 +222,8 @@ static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow) return A64F_M(ir->r); } else if (irref_isk(ref)) { int64_t k = get_k64val(as, ref); - uint32_t m = logical ? emit_isk13(k, irt_is64(ir->t)) : emit_isk12(k); + uint32_t m = logical ? emit_isk13(k, irt_is64(ir->t)) : + emit_isk12(irt_is64(ir->t) ? k : (int32_t)k); if (m) return m; } else if (mayfuse(as, ref)) { From 91592899275cbb540ca67bbf95b41a2200e4fdbd Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 21 Sep 2023 02:48:12 +0200 Subject: [PATCH 44/95] ARM64: Fix IR_HREF code generation for constant FP keys. Reported by swarn. Fix for 435d8c63 by Peter Cawley. #1090 --- src/lj_asm_arm64.h | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index 8673f7df..82f14405 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -787,7 +787,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) int destused = ra_used(ir); Reg dest = ra_dest(as, ir, allow); Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); - Reg key = 0, tmp = RID_TMP, type = RID_NONE, tkey; + Reg tmp = RID_TMP, type = RID_NONE, key, tkey; IRRef refkey = ir->op2; IRIns *irkey = IR(refkey); int isk = irref_isk(refkey); @@ -797,26 +797,22 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) MCLabel l_end, l_loop; rset_clear(allow, tab); - /* Allocate registers outside of the loop. */ - if (irkey->o != IR_KNUM || !(k = emit_isk12((int64_t)ir_knum(irkey)->u64))) { - key = ra_alloc1(as, refkey, irt_isnum(kt) ? RSET_FPR : allow); - rset_clear(allow, key); - } - if (!isk) { - tkey = ra_scratch(as, allow); - rset_clear(allow, tkey); - } else if (irt_isnum(kt)) { - tkey = key; /* Assumes -0.0 is already canonicalized to +0.0. */ - } else { + /* Allocate register for tkey outside of the loop. */ + if (isk) { int64_t kk; if (irt_isaddr(kt)) { kk = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64; + } else if (irt_isnum(kt)) { + kk = (int64_t)ir_knum(irkey)->u64; + /* Assumes -0.0 is already canonicalized to +0.0. */ } else { lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type"); kk = ~((int64_t)~irt_toitype(kt) << 47); } - tkey = ra_allock(as, kk, allow); - rset_clear(allow, tkey); + k = emit_isk12(kk); + tkey = k ? 0 : ra_allock(as, kk, allow); + } else { + tkey = ra_scratch(as, allow); } /* Key not found in chain: jump to exit (if merged) or load niltv. */ @@ -849,10 +845,13 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) /* Construct tkey as canonicalized or tagged key. */ if (!isk) { if (irt_isnum(kt)) { + key = ra_alloc1(as, refkey, RSET_FPR); emit_dnm(as, A64I_CSELx | A64F_CC(CC_EQ), tkey, RID_ZERO, tkey); + /* A64I_FMOV_R_D from key to tkey done below. */ } else { lj_assertA(irt_isaddr(kt), "bad HREF key type"); - type = ra_allock(as, irt_toitype(kt) << 15, allow); + key = ra_alloc1(as, refkey, allow); + type = ra_allock(as, irt_toitype(kt) << 15, rset_clear(allow, key)); emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 32), tkey, key, type); } } From b8919781d4717d8c3171b0002d230e03304d8174 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 21 Sep 2023 03:46:33 +0200 Subject: [PATCH 45/95] Consistently use 64 bit constants for 64 bit IR instructions. Thanks to Peter Cawley. #1084 --- src/lj_asm_x86.h | 3 ++- src/lj_ffrecord.c | 23 +++++++++++------------ src/lj_iropt.h | 6 ++++++ src/lj_record.c | 9 +++++---- 4 files changed, 24 insertions(+), 17 deletions(-) diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 9f779bf5..c92de3d8 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -140,7 +140,8 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref) } } else if (irb->o == IR_ADD && irref_isk(irb->op2)) { /* Fuse base offset (vararg load). */ - as->mrm.ofs = IR(irb->op2)->i; + IRIns *irk = IR(irb->op2); + as->mrm.ofs = irk->o == IR_KINT ? irk->i : (int32_t)ir_kint64(irk)->u64; return irb->op1; } return ref; /* Otherwise use the given array base. */ diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index 8ebf4165..1233e5f7 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c @@ -1130,7 +1130,7 @@ static TRef recff_sbufx_check(jit_State *J, RecordFFData *rd, ptrdiff_t arg) /* Emit BUFHDR for write to extended string buffer. */ static TRef recff_sbufx_write(jit_State *J, TRef ud) { - TRef trbuf = emitir(IRT(IR_ADD, IRT_PGC), ud, lj_ir_kint(J, sizeof(GCudata))); + TRef trbuf = emitir(IRT(IR_ADD, IRT_PGC), ud, lj_ir_kintpgc(J, sizeof(GCudata))); return emitir(IRT(IR_BUFHDR, IRT_PGC), trbuf, IRBUFHDR_WRITE); } @@ -1164,20 +1164,19 @@ static void LJ_FASTCALL recff_buffer_method_reset(jit_State *J, RecordFFData *rd SBufExt *sbx = bufV(&rd->argv[0]); int iscow = (int)sbufiscow(sbx); TRef trl = recff_sbufx_get_L(J, ud); - TRef trcow = emitir(IRT(IR_BAND, IRT_IGC), trl, lj_ir_kint(J, SBUF_FLAG_COW)); - TRef zero = lj_ir_kint(J, 0); - emitir(IRTG(iscow ? IR_NE : IR_EQ, IRT_IGC), trcow, zero); + TRef trcow = emitir(IRT(IR_BAND, IRT_IGC), trl, lj_ir_kintpgc(J, SBUF_FLAG_COW)); + TRef zeropgc = lj_ir_kintpgc(J, 0); + emitir(IRTG(iscow ? IR_NE : IR_EQ, IRT_IGC), trcow, zeropgc); if (iscow) { - trl = emitir(IRT(IR_BXOR, IRT_IGC), trl, - LJ_GC64 ? lj_ir_kint64(J, SBUF_FLAG_COW) : - lj_ir_kint(J, SBUF_FLAG_COW)); - recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, zero); - recff_sbufx_set_ptr(J, ud, IRFL_SBUF_E, zero); - recff_sbufx_set_ptr(J, ud, IRFL_SBUF_B, zero); + TRef zerop = lj_ir_kintp(J, 0); + trl = emitir(IRT(IR_BXOR, IRT_IGC), trl, lj_ir_kintpgc(J, SBUF_FLAG_COW)); + recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, zerop); + recff_sbufx_set_ptr(J, ud, IRFL_SBUF_E, zerop); + recff_sbufx_set_ptr(J, ud, IRFL_SBUF_B, zerop); recff_sbufx_set_L(J, ud, trl); emitir(IRT(IR_FSTORE, IRT_PGC), - emitir(IRT(IR_FREF, IRT_PGC), ud, IRFL_SBUF_REF), zero); - recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, zero); + emitir(IRT(IR_FREF, IRT_PGC), ud, IRFL_SBUF_REF), zeropgc); + recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, zerop); } else { TRef trb = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_B); recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, trb); diff --git a/src/lj_iropt.h b/src/lj_iropt.h index 458a5511..a71a717b 100644 --- a/src/lj_iropt.h +++ b/src/lj_iropt.h @@ -56,6 +56,12 @@ LJ_FUNC TRef lj_ir_ktrace(jit_State *J); #define lj_ir_kintp(J, k) lj_ir_kint(J, (int32_t)(k)) #endif +#if LJ_GC64 +#define lj_ir_kintpgc lj_ir_kintp +#else +#define lj_ir_kintpgc lj_ir_kint +#endif + static LJ_AINLINE TRef lj_ir_knum(jit_State *J, lua_Number n) { TValue tv; diff --git a/src/lj_record.c b/src/lj_record.c index 7a970628..d44f7737 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -1781,7 +1781,7 @@ noconstify: emitir(IRTG(IR_EQ, IRT_PGC), REF_BASE, emitir(IRT(IR_ADD, IRT_PGC), uref, - lj_ir_kint(J, (slot - 1 - LJ_FR2) * -8))); + lj_ir_kintpgc(J, (slot - 1 - LJ_FR2) * -8))); slot -= (int32_t)J->baseslot; /* Note: slot number may be negative! */ if (val == 0) { return getslot(J, slot); @@ -1794,7 +1794,7 @@ noconstify: } emitir(IRTG(IR_UGT, IRT_PGC), emitir(IRT(IR_SUB, IRT_PGC), uref, REF_BASE), - lj_ir_kint(J, (J->baseslot + J->maxslot) * 8)); + lj_ir_kintpgc(J, (J->baseslot + J->maxslot) * 8)); } else { needbarrier = 1; uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_PGC), fn, uv)); @@ -1972,7 +1972,8 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) emitir(IRTGI(IR_EQ), fr, lj_ir_kint(J, (int32_t)frame_ftsz(J->L->base-1))); vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr); - vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, lj_ir_kint(J, frofs-8*(1+LJ_FR2))); + vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, + lj_ir_kintpgc(J, frofs-8*(1+LJ_FR2))); for (i = 0; i < nload; i++) { IRType t = itype2irt(&J->L->base[i-1-LJ_FR2-nvararg]); J->base[dst+i] = lj_record_vload(J, vbase, (MSize)i, t); @@ -2023,7 +2024,7 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) IRType t; TRef aref, vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr); vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, - lj_ir_kint(J, frofs-(8< L->base[idx-2-LJ_FR2-nvararg]); aref = emitir(IRT(IR_AREF, IRT_PGC), vbase, tridx); tr = lj_record_vload(J, aref, 0, t); From e86990f7f24a94b0897061f25a84547fe1108bed Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 21 Sep 2023 03:54:08 +0200 Subject: [PATCH 46/95] Restore cur_L for specific Lua/C API use case. Thanks to Peter Cawley. #1066 --- src/lj_err.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/lj_err.c b/src/lj_err.c index cadc76bd..7b11e4d0 100644 --- a/src/lj_err.c +++ b/src/lj_err.c @@ -174,12 +174,15 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) case FRAME_PCALL: /* FF pcall() frame. */ case FRAME_PCALLH: /* FF pcall() frame inside hook. */ if (errcode) { + global_State *g; if (errcode == LUA_YIELD) { frame = frame_prevd(frame); break; } + g = G(L); + setgcref(g->cur_L, obj2gco(L)); if (frame_typep(frame) == FRAME_PCALL) - hook_leave(G(L)); + hook_leave(g); L->base = frame_prevd(frame) + 1; L->cframe = cf; unwindstack(L, L->base); From a5d2f70c73e406beb617afa829a7af5b8c1d842c Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 21 Sep 2023 04:40:48 +0200 Subject: [PATCH 47/95] Handle OOM error on stack resize in coroutine.resume and lua_checkstack. Thanks to Peter Cawley. #1066 --- src/lib_base.c | 5 ++++- src/lj_api.c | 7 ++++++- src/lj_state.c | 12 ++++++++++++ src/lj_state.h | 1 + 4 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/lib_base.c b/src/lib_base.c index dd54b9f9..4e6f8a30 100644 --- a/src/lib_base.c +++ b/src/lib_base.c @@ -616,7 +616,10 @@ static int ffh_resume(lua_State *L, lua_State *co, int wrap) setstrV(L, L->base-LJ_FR2, lj_err_str(L, em)); return FFH_RES(2); } - lj_state_growstack(co, (MSize)(L->top - L->base)); + if (lj_state_cpgrowstack(co, (MSize)(L->top - L->base)) != LUA_OK) { + cTValue *msg = --co->top; + lj_err_callermsg(L, strVdata(msg)); + } return FFH_RETRY; } diff --git a/src/lj_api.c b/src/lj_api.c index 386bcada..d4048d79 100644 --- a/src/lj_api.c +++ b/src/lj_api.c @@ -104,7 +104,12 @@ LUA_API int lua_checkstack(lua_State *L, int size) if (size > LUAI_MAXCSTACK || (L->top - L->base + size) > LUAI_MAXCSTACK) { return 0; /* Stack overflow. */ } else if (size > 0) { - lj_state_checkstack(L, (MSize)size); + int avail = (int)(mref(L->maxstack, TValue) - L->top); + if (size > avail && + lj_state_cpgrowstack(L, (MSize)(size - avail)) != LUA_OK) { + L->top--; + return 0; /* Out of memory. */ + } } return 1; } diff --git a/src/lj_state.c b/src/lj_state.c index 6b3f58ff..569e3f38 100644 --- a/src/lj_state.c +++ b/src/lj_state.c @@ -130,6 +130,18 @@ void LJ_FASTCALL lj_state_growstack1(lua_State *L) lj_state_growstack(L, 1); } +static TValue *cpgrowstack(lua_State *co, lua_CFunction dummy, void *ud) +{ + UNUSED(dummy); + lj_state_growstack(co, *(MSize *)ud); + return NULL; +} + +int LJ_FASTCALL lj_state_cpgrowstack(lua_State *L, MSize need) +{ + return lj_vm_cpcall(L, NULL, &need, cpgrowstack); +} + /* Allocate basic stack for new state. */ static void stack_init(lua_State *L1, lua_State *L) { diff --git a/src/lj_state.h b/src/lj_state.h index db67f03b..3850e5a1 100644 --- a/src/lj_state.h +++ b/src/lj_state.h @@ -18,6 +18,7 @@ LJ_FUNC void lj_state_relimitstack(lua_State *L); LJ_FUNC void lj_state_shrinkstack(lua_State *L, MSize used); LJ_FUNCA void LJ_FASTCALL lj_state_growstack(lua_State *L, MSize need); LJ_FUNC void LJ_FASTCALL lj_state_growstack1(lua_State *L); +LJ_FUNC int LJ_FASTCALL lj_state_cpgrowstack(lua_State *L, MSize need); static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need) { From aa6b15c1a8922848bd6f596ba384824ca3fe0f5f Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 21 Sep 2023 04:43:40 +0200 Subject: [PATCH 48/95] Follow-up fix for stack overflow handling cleanup. --- src/lj_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lj_state.c b/src/lj_state.c index 1a3473b4..c2f0b115 100644 --- a/src/lj_state.c +++ b/src/lj_state.c @@ -102,7 +102,7 @@ void LJ_FASTCALL lj_state_growstack(lua_State *L, MSize need) if (L->stacksize > LJ_STACK_MAXEX) lj_err_throw(L, LUA_ERRERR); /* Does not invoke an error handler. */ /* 1. We are _at_ the limit after the last growth. */ - if (!L->status) { /* 2. Throw 'stack overflow'. */ + if (L->status < LUA_ERRRUN) { /* 2. Throw 'stack overflow'. */ L->status = LUA_ERRRUN; /* Prevent ending here again for pushed msg. */ lj_err_msg(L, LJ_ERR_STKOV); /* May invoke an error handler. */ } From d1a2fef8a8f53b0055ee041f7f63d83a27444ffa Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 21 Sep 2023 05:19:55 +0200 Subject: [PATCH 49/95] LJ_FR2: Fix stack checks in vararg calls. Thanks to Peter Cawley. #1048 --- src/lj_def.h | 2 +- src/lj_dispatch.c | 2 +- src/vm_arm64.dasc | 1 + src/vm_mips64.dasc | 1 + 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/lj_def.h b/src/lj_def.h index 1461d3d7..0d6c346b 100644 --- a/src/lj_def.h +++ b/src/lj_def.h @@ -69,7 +69,7 @@ typedef unsigned int uintptr_t; #define LJ_MAX_UPVAL 60 /* Max. # of upvalues. */ #define LJ_MAX_IDXCHAIN 100 /* __index/__newindex chain limit. */ -#define LJ_STACK_EXTRA (5+2*LJ_FR2) /* Extra stack space (metamethods). */ +#define LJ_STACK_EXTRA (5+3*LJ_FR2) /* Extra stack space (metamethods). */ #define LJ_NUM_CBPAGE 1 /* Number of FFI callback pages. */ diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c index 57809e62..b9748bba 100644 --- a/src/lj_dispatch.c +++ b/src/lj_dispatch.c @@ -453,7 +453,7 @@ static int call_init(lua_State *L, GCfunc *fn) int numparams = pt->numparams; int gotparams = (int)(L->top - L->base); int need = pt->framesize; - if ((pt->flags & PROTO_VARARG)) need += 1+gotparams; + if ((pt->flags & PROTO_VARARG)) need += 1+LJ_FR2+gotparams; lj_state_checkstack(L, (MSize)need); numparams -= gotparams; return numparams >= 0 ? numparams : 0; diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index 61a3ba6d..3044a8ac 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc @@ -3916,6 +3916,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | add TMP2, BASE, RC | add LFUNC:CARG3, CARG3, TMP0, lsl #47 | add RA, RA, RC + | sub CARG1, CARG1, #8 | add TMP0, RC, #16+FRAME_VARG | str LFUNC:CARG3, [TMP2], #8 // Store (tagged) copy of LFUNC. | ldr KBASE, [PC, #-4+PC2PROTO(k)] diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc index 6c215f2b..ef0d901d 100644 --- a/src/vm_mips64.dasc +++ b/src/vm_mips64.dasc @@ -5396,6 +5396,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | settp LFUNC:RB, TMP0 | daddu TMP0, RA, RC | sd LFUNC:RB, 0(TMP1) // Store (tagged) copy of LFUNC. + | daddiu TMP2, TMP2, -8 | daddiu TMP3, RC, 16+FRAME_VARG | sltu AT, TMP0, TMP2 | ld KBASE, -4+PC2PROTO(k)(PC) From f72c19e482b6f918b7cf42b0436e2b117d160a29 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Fri, 22 Sep 2023 21:04:22 +0200 Subject: [PATCH 50/95] Maintain chain invariant in DCE. Thanks to Peter Cawley. #1094 --- src/lj_opt_dce.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lj_opt_dce.c b/src/lj_opt_dce.c index c6c3e1bc..e6fcc552 100644 --- a/src/lj_opt_dce.c +++ b/src/lj_opt_dce.c @@ -44,12 +44,12 @@ static void dce_propagate(jit_State *J) IRIns *ir = IR(ins); if (irt_ismarked(ir->t)) { irt_clearmark(ir->t); - pchain[ir->o] = &ir->prev; } else if (!ir_sideeff(ir)) { *pchain[ir->o] = ir->prev; /* Reroute original instruction chain. */ lj_ir_nop(ir); continue; } + pchain[ir->o] = &ir->prev; if (ir->op1 >= REF_FIRST) irt_setmark(IR(ir->op1)->t); if (ir->op2 >= REF_FIRST) irt_setmark(IR(ir->op2)->t); } From becf5cc65d966a8926466dd43407c48bfea0fa13 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 25 Sep 2023 16:56:17 +0200 Subject: [PATCH 51/95] FFI: Fix ffi.abi("pauth"). Thanks to Peter Cawley. #1098 --- src/lib_ffi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib_ffi.c b/src/lib_ffi.c index 6dee2e74..ba783173 100644 --- a/src/lib_ffi.c +++ b/src/lib_ffi.c @@ -746,7 +746,7 @@ LJLIB_CF(ffi_abi) LJLIB_REC(.) "\003win" #endif #if LJ_ABI_PAUTH - "\007pauth" + "\005pauth" #endif #if LJ_TARGET_UWP "\003uwp" From 007e4dce13673b01a38b19384f54fa50a79a66de Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 8 Oct 2023 21:17:43 +0200 Subject: [PATCH 52/95] ARM64: Restore fp before sp in C stack unwinders. Thanks to Peter Cawley. #1096 --- src/host/buildvm_peobj.c | 3 ++- src/vm_arm64.dasc | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/host/buildvm_peobj.c b/src/host/buildvm_peobj.c index 7ce3b05a..8f04c496 100644 --- a/src/host/buildvm_peobj.c +++ b/src/host/buildvm_peobj.c @@ -373,11 +373,12 @@ void emit_peobj(BuildCtx *ctx) /* Unwind codes for .text section with handler. */ p = uwc; + CADD_FP(192); /* +2 */ CSAVE_REGS(19, 28, 176); /* +5*2 */ CSAVE_FREGS(8, 15, 96); /* +4*2 */ CSAVE_FPLR(192); /* +1 */ CALLOC_S(208); /* +1 */ - CEND_ALIGN; /* +1 +3 -> 24 */ + CEND_ALIGN; /* +1 +1 -> 24 */ u32 = ((24u >> 2) << 27) | (1u << 20) | (fcofs >> 2); owrite(ctx, &u32, 4); diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index 3044a8ac..26973686 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc @@ -430,6 +430,7 @@ static void build_subroutines(BuildCtx *ctx) | |->vm_unwind_c: // Unwind C stack, return from vm_pcall. | // (void *cframe, int errcode) + | add fp, CARG1, # SAVE_FP_LR_ | mov sp, CARG1 | mov CRET1, CARG2 | ldr L, SAVE_L @@ -441,7 +442,8 @@ static void build_subroutines(BuildCtx *ctx) | |->vm_unwind_ff: // Unwind C stack, return from ff pcall. | // (void *cframe) - | and sp, CARG1, #CFRAME_RAWMASK + | add fp, CARG1, # SAVE_FP_LR_ + | mov sp, CARG1 | ldr L, SAVE_L | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 From 1e93951b258cdf885779992434201c6114445665 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 8 Oct 2023 21:20:10 +0200 Subject: [PATCH 53/95] ARM64: Fix register hint for FFI calls with FP results. Thanks to Peter Cawley. #1096 --- src/lj_asm_arm64.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index 82f14405..9f165fa8 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -1985,7 +1985,7 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) as->evenspill = nslots; } #endif - return REGSP_HINT(RID_RET); + return REGSP_HINT(irt_isfp(ir->t) ? RID_FPRET : RID_RET); } static void asm_setup_target(ASMState *as) From 9cc8bbb7ae3675382d016e33b6d8b022101077b8 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 8 Oct 2023 21:22:50 +0200 Subject: [PATCH 54/95] ARM: Fix register hint for FFI calls with FP results. --- src/lj_asm_arm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index a003d5ca..ac3d1b58 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h @@ -2255,7 +2255,7 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) } if (nslots > as->evenspill) /* Leave room for args in stack slots. */ as->evenspill = nslots; - return REGSP_HINT(RID_RET); + return REGSP_HINT(irt_isfp(ir->t) ? RID_FPRET : RID_RET); } static void asm_setup_target(ASMState *as) From c5b075eb313e7ed4f3184382f6e70bc48b15ec72 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 8 Oct 2023 21:39:40 +0200 Subject: [PATCH 55/95] ARM64: Unify constant register handling in interpreter. Plus minor optimizations. Simplifications for out-of-tree ARM64EC. Thanks to Peter Cawley. #1096 --- src/vm_arm64.dasc | 150 ++++++++++++++++++++++------------------------ 1 file changed, 73 insertions(+), 77 deletions(-) diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index 26973686..2aaa64cb 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc @@ -291,8 +291,17 @@ | blo target |.endmacro | +|.macro init_constants +| movn TISNIL, #0 +| movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 +| movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 +|.endmacro +| |.macro mov_false, reg; movn reg, #0x8000, lsl #32; .endmacro |.macro mov_true, reg; movn reg, #0x0001, lsl #48; .endmacro +|.macro mov_nil, reg; mov reg, TISNIL; .endmacro +|.macro cmp_nil, reg; cmp reg, TISNIL; .endmacro +|.macro add_TISNUM, dst, src; add dst, src, TISNUM; .endmacro | #define GL_J(field) (GG_G2J + (int)offsetof(jit_State, field)) | @@ -445,9 +454,7 @@ static void build_subroutines(BuildCtx *ctx) | add fp, CARG1, # SAVE_FP_LR_ | mov sp, CARG1 | ldr L, SAVE_L - | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 - | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 - | movn TISNIL, #0 + | init_constants | ldr GL, L->glref // Setup pointer to global state. |->vm_unwind_ff_eh: // Landing pad for external unwinder. | mov RC, #16 // 2 results: false + error message. @@ -512,11 +519,9 @@ static void build_subroutines(BuildCtx *ctx) | str L, GL->cur_L | mov RA, BASE | ldp BASE, CARG1, L->base - | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 - | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 + | init_constants | ldr PC, [BASE, FRAME_PC] | strb wzr, L->status - | movn TISNIL, #0 | sub RC, CARG1, BASE | ands CARG1, PC, #FRAME_TYPE | add RC, RC, #8 @@ -552,10 +557,8 @@ static void build_subroutines(BuildCtx *ctx) |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). | str L, GL->cur_L | ldp RB, CARG1, L->base // RB = old base (for vmeta_call). - | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 - | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 | add PC, PC, BASE - | movn TISNIL, #0 + | init_constants | sub PC, PC, RB // PC = frame delta + frame type | sub NARGS8:RC, CARG1, BASE | st_vmstate ST_INTERP @@ -664,7 +667,7 @@ static void build_subroutines(BuildCtx *ctx) | b >1 | |->vmeta_tgetb: // RB = table, RC = index - | add RC, RC, TISNUM + | add_TISNUM RC, RC | add CARG2, BASE, RB, lsl #3 | add CARG3, sp, TMPDofs | str RC, TMPD @@ -699,7 +702,7 @@ static void build_subroutines(BuildCtx *ctx) | sxtw CARG2, TMP1w | bl extern lj_tab_getinth // (GCtab *t, int32_t key) | // Returns cTValue * or NULL. - | mov TMP0, TISNIL + | mov_nil TMP0 | cbz CRET1, ->BC_TGETR_Z | ldr TMP0, [CRET1] | b ->BC_TGETR_Z @@ -722,7 +725,7 @@ static void build_subroutines(BuildCtx *ctx) | b >1 | |->vmeta_tsetb: // RB = table, RC = index - | add RC, RC, TISNUM + | add_TISNUM RC, RC | add CARG2, BASE, RB, lsl #3 | add CARG3, sp, TMPDofs | str RC, TMPD @@ -1036,7 +1039,7 @@ static void build_subroutines(BuildCtx *ctx) |1: // Field metatable must be at same offset for GCtab and GCudata! | ldr TAB:RB, TAB:CARG1->metatable |2: - | mov CARG1, TISNIL + | mov_nil CARG1 | ldr STR:RC, GL->gcroot[GCROOT_MMNAME+MM_metatable] | cbz TAB:RB, ->fff_restv | ldr TMP1w, TAB:RB->hmask @@ -1058,7 +1061,7 @@ static void build_subroutines(BuildCtx *ctx) | movk CARG1, #(LJ_TTAB>>1)&0xffff, lsl #48 | b ->fff_restv |5: - | cmp TMP0, TISNIL + | cmp_nil TMP0 | bne ->fff_restv | b <4 | @@ -1158,8 +1161,8 @@ static void build_subroutines(BuildCtx *ctx) | cbnz TAB:CARG2, ->fff_fallback #endif | mov RC, #(3+1)*8 - | stp CARG1, TISNIL, [BASE, #-8] - | str CFUNC:CARG4, [BASE, #-16] + | stp CFUNC:CARG4, CARG1, [BASE, #-16] + | str TISNIL, [BASE] | b ->fff_res | |.ffunc_2 ipairs_aux @@ -1171,14 +1174,14 @@ static void build_subroutines(BuildCtx *ctx) | add CARG2w, CARG2w, #1 | cmp CARG2w, TMP1w | ldr PC, [BASE, FRAME_PC] - | add TMP2, CARG2, TISNUM + | add_TISNUM TMP2, CARG2 | mov RC, #(0+1)*8 | str TMP2, [BASE, #-16] | bhs >2 // Not in array part? | ldr TMP0, [CARG3, CARG2, lsl #3] |1: | mov TMP1, #(2+1)*8 - | cmp TMP0, TISNIL + | cmp_nil TMP0 | str TMP0, [BASE, #-8] | csel RC, RC, TMP1, eq | b ->fff_res @@ -1201,8 +1204,8 @@ static void build_subroutines(BuildCtx *ctx) | cbnz TAB:CARG2, ->fff_fallback #endif | mov RC, #(3+1)*8 - | stp CARG1, TISNUM, [BASE, #-8] - | str CFUNC:CARG4, [BASE, #-16] + | stp CFUNC:CARG4, CARG1, [BASE, #-16] + | str TISNUM, [BASE] | b ->fff_res | |//-- Base library: catch errors ---------------------------------------- @@ -1392,7 +1395,7 @@ static void build_subroutines(BuildCtx *ctx) | eor CARG2w, CARG1w, CARG1w, asr #31 | movz CARG3, #0x41e0, lsl #48 // 2^31. | subs CARG1w, CARG2w, CARG1w, asr #31 - | add CARG1, CARG1, TISNUM + | add_TISNUM CARG1, CARG1 | csel CARG1, CARG1, CARG3, pl | // Fallthrough. | @@ -1483,7 +1486,7 @@ static void build_subroutines(BuildCtx *ctx) | ldr PC, [BASE, FRAME_PC] | str d0, [BASE, #-16] | mov RC, #(2+1)*8 - | add CARG2, CARG2, TISNUM + | add_TISNUM CARG2, CARG2 | str CARG2, [BASE, #-8] | b ->fff_res | @@ -1549,7 +1552,7 @@ static void build_subroutines(BuildCtx *ctx) | bne ->fff_fallback | ldrb TMP0w, STR:CARG1[1] // Access is always ok (NUL at end). | ldr CARG3w, STR:CARG1->len - | add TMP0, TMP0, TISNUM + | add_TISNUM TMP0, TMP0 | str TMP0, [BASE, #-16] | mov RC, #(0+1)*8 | cbz CARG3, ->fff_res @@ -1695,17 +1698,17 @@ static void build_subroutines(BuildCtx *ctx) |.ffunc_bit tobit | mov TMP0w, CARG1w |9: // Label reused by .ffunc_bit_op users. - | add CARG1, TMP0, TISNUM + | add_TISNUM CARG1, TMP0 | b ->fff_restv | |.ffunc_bit bswap | rev TMP0w, CARG1w - | add CARG1, TMP0, TISNUM + | add_TISNUM CARG1, TMP0 | b ->fff_restv | |.ffunc_bit bnot | mvn TMP0w, CARG1w - | add CARG1, TMP0, TISNUM + | add_TISNUM CARG1, TMP0 | b ->fff_restv | |.macro .ffunc_bit_sh, name, ins, shmod @@ -1726,7 +1729,7 @@ static void build_subroutines(BuildCtx *ctx) | checkint CARG1, ->vm_tobit_fb |2: | ins TMP0w, CARG1w, TMP1w - | add CARG1, TMP0, TISNUM + | add_TISNUM CARG1, TMP0 | b ->fff_restv |.endmacro | @@ -1915,8 +1918,7 @@ static void build_subroutines(BuildCtx *ctx) | and CARG3, CARG3, #LJ_GCVMASK | beq >2 |1: // Move results down. - | ldr CARG1, [RA] - | add RA, RA, #8 + | ldr CARG1, [RA], #8 | subs RB, RB, #8 | str CARG1, [BASE, RC, lsl #3] | add RC, RC, #1 @@ -2031,9 +2033,7 @@ static void build_subroutines(BuildCtx *ctx) |.if JIT | ldr L, SAVE_L |1: - | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 - | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 - | movn TISNIL, #0 + | init_constants | cmn CARG1w, #LUA_ERRERR | bhs >9 // Check for error from exit. | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] @@ -2212,9 +2212,7 @@ static void build_subroutines(BuildCtx *ctx) | bl extern lj_ccallback_enter // (CTState *cts, void *cf) | // Returns lua_State *. | ldp BASE, RC, L:CRET1->base - | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 - | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 - | movn TISNIL, #0 + | init_constants | mov L, CRET1 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] | sub RC, RC, BASE @@ -2593,7 +2591,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | bne >5 | negs TMP0w, TMP0w | movz CARG3, #0x41e0, lsl #48 // 2^31. - | add TMP0, TMP0, TISNUM + | add_TISNUM TMP0, TMP0 | csel TMP0, TMP0, CARG3, vc |5: | str TMP0, [BASE, RA, lsl #3] @@ -2608,7 +2606,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | bne >2 | ldr CARG1w, STR:CARG1->len |1: - | add CARG1, CARG1, TISNUM + | add_TISNUM CARG1, CARG1 | str CARG1, [BASE, RA, lsl #3] | ins_next | @@ -2716,7 +2714,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | intins CARG1w, CARG1w, CARG2w | ins_arithfallback bvs |.endif - | add CARG1, CARG1, TISNUM + | add_TISNUM CARG1, CARG1 | str CARG1, [BASE, RA, lsl #3] |4: | ins_next @@ -2809,7 +2807,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_KSHORT: | // RA = dst, RC = int16_literal | sxth RCw, RCw - | add TMP0, RC, TISNUM + | add_TISNUM TMP0, RC | str TMP0, [BASE, RA, lsl #3] | ins_next break; @@ -3032,7 +3030,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | cmp TMP1w, CARG1w // In array part? | bhs ->vmeta_tgetv | ldr TMP0, [CARG3] - | cmp TMP0, TISNIL + | cmp_nil TMP0 | beq >5 |1: | str TMP0, [BASE, RA, lsl #3] @@ -3075,7 +3073,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ldr NODE:CARG3, NODE:CARG3->next | cmp CARG1, CARG4 | bne >4 - | cmp TMP0, TISNIL + | cmp_nil TMP0 | beq >5 |3: | str TMP0, [BASE, RA, lsl #3] @@ -3084,7 +3082,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |4: // Follow hash chain. | cbnz NODE:CARG3, <1 | // End of hash chain: key not found, nil result. - | mov TMP0, TISNIL + | mov_nil TMP0 | |5: // Check for __index if table value is nil. | ldr TAB:CARG1, TAB:CARG2->metatable @@ -3105,7 +3103,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | cmp RCw, CARG1w // In array part? | bhs ->vmeta_tgetb | ldr TMP0, [CARG3] - | cmp TMP0, TISNIL + | cmp_nil TMP0 | beq >5 |1: | str TMP0, [BASE, RA, lsl #3] @@ -3152,7 +3150,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ldr TMP1, [CARG3] | ldr TMP0, [BASE, RA, lsl #3] | ldrb TMP2w, TAB:CARG2->marked - | cmp TMP1, TISNIL // Previous value is nil? + | cmp_nil TMP1 // Previous value is nil? | beq >5 |1: | str TMP0, [CARG3] @@ -3204,7 +3202,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | cmp CARG1, CARG4 | bne >5 | ldr TMP0, [BASE, RA, lsl #3] - | cmp TMP1, TISNIL // Previous value is nil? + | cmp_nil TMP1 // Previous value is nil? | beq >4 |2: | str TMP0, NODE:CARG3->val @@ -3263,7 +3261,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ldr TMP1, [CARG3] | ldr TMP0, [BASE, RA, lsl #3] | ldrb TMP2w, TAB:CARG2->marked - | cmp TMP1, TISNIL // Previous value is nil? + | cmp_nil TMP1 // Previous value is nil? | beq >5 |1: | str TMP0, [CARG3] @@ -3362,9 +3360,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |->BC_CALL_Z: | mov RB, BASE // Save old BASE for vmeta_call. | add BASE, BASE, RA, lsl #3 - | ldr CARG3, [BASE] + | ldr CARG3, [BASE], #16 | sub NARGS8:RC, NARGS8:RC, #8 - | add BASE, BASE, #16 | checkfunc CARG3, ->vmeta_call | ins_call break; @@ -3380,9 +3377,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // RA = base, (RB = 0,) RC = (nargs+1)*8 |->BC_CALLT1_Z: | add RA, BASE, RA, lsl #3 - | ldr TMP1, [RA] + | ldr TMP1, [RA], #16 | sub NARGS8:RC, NARGS8:RC, #8 - | add RA, RA, #16 | checktp CARG3, TMP1, LJ_TFUNC, ->vmeta_callt | ldr PC, [BASE, FRAME_PC] |->BC_CALLT2_Z: @@ -3462,10 +3458,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | add CARG3, CARG2, CARG1, lsl #3 | bhs >5 // Index points after array part? | ldr TMP0, [CARG3] - | cmp TMP0, TISNIL + | cmp_nil TMP0 | cinc CARG1, CARG1, eq // Skip holes in array part. | beq <1 - | add CARG1, CARG1, TISNUM + | add_TISNUM CARG1, CARG1 | stp CARG1, TMP0, [RA] | add CARG1, CARG1, #1 |3: @@ -3483,7 +3479,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | add NODE:CARG3, NODE:RB, CARG1, lsl #3 // node = tab->node + idx*3*8 | bhi <4 | ldp TMP0, CARG1, NODE:CARG3->val - | cmp TMP0, TISNIL + | cmp_nil TMP0 | add RC, RC, #1 | beq <6 // Skip holes in hash part. | stp CARG1, TMP0, [RA] @@ -3501,8 +3497,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | checkfunc CFUNC:CARG1, >5 | asr TMP0, TAB:CARG3, #47 | ldrb TMP1w, CFUNC:CARG1->ffid - | cmn TMP0, #-LJ_TTAB - | ccmp CARG4, TISNIL, #0, eq + | cmp_nil CARG4 + | ccmn TMP0, #-LJ_TTAB, #0, eq | ccmp TMP1w, #FF_next_N, #0, eq | bne >5 | mov TMP0w, #0xfffe7fff // LJ_KEYINDEX @@ -3542,51 +3538,51 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | and RC, RC, #255 | // RA = base, RB = (nresults+1), RC = numparams | ldr TMP1, [BASE, FRAME_PC] - | add RC, BASE, RC, lsl #3 - | add RA, BASE, RA, lsl #3 - | add RC, RC, #FRAME_VARG - | add TMP2, RA, RB, lsl #3 - | sub RC, RC, TMP1 // RC = vbase - | // Note: RC may now be even _above_ BASE if nargs was < numparams. + | add TMP0, BASE, RC, lsl #3 + | add RC, BASE, RA, lsl #3 // RC = destination + | add TMP0, TMP0, #FRAME_VARG + | add TMP2, RC, RB, lsl #3 + | sub RA, TMP0, TMP1 // RA = vbase + | // Note: RA may now be even _above_ BASE if nargs was < numparams. | sub TMP3, BASE, #16 // TMP3 = vtop | cbz RB, >5 | sub TMP2, TMP2, #16 |1: // Copy vararg slots to destination slots. - | cmp RC, TMP3 - | ldr TMP0, [RC], #8 - | csel TMP0, TMP0, TISNIL, lo - | cmp RA, TMP2 - | str TMP0, [RA], #8 + | cmp RA, TMP3 + | ldr TMP0, [RA], #8 + | csinv TMP0, TMP0, xzr, lo // TISNIL = ~xzr + | cmp RC, TMP2 + | str TMP0, [RC], #8 | blo <1 |2: | ins_next | |5: // Copy all varargs. | ldr TMP0, L->maxstack - | subs TMP2, TMP3, RC + | subs TMP2, TMP3, RA | csel RB, xzr, TMP2, le // MULTRES = (max(vtop-vbase,0)+1)*8 | add RB, RB, #8 - | add TMP1, RA, TMP2 + | add TMP1, RC, TMP2 | str RBw, SAVE_MULTRES | ble <2 // Nothing to copy. | cmp TMP1, TMP0 | bhi >7 |6: - | ldr TMP0, [RC], #8 - | str TMP0, [RA], #8 - | cmp RC, TMP3 + | ldr TMP0, [RA], #8 + | str TMP0, [RC], #8 + | cmp RA, TMP3 | blo <6 | b <2 | |7: // Grow stack for varargs. | lsr CARG2, TMP2, #3 - | stp BASE, RA, L->base + | stp BASE, RC, L->base | mov CARG1, L - | sub RC, RC, BASE // Need delta, because BASE may change. + | sub RA, RA, BASE // Need delta, because BASE may change. | str PC, SAVE_PC | bl extern lj_state_growstack // (lua_State *L, int n) - | ldp BASE, RA, L->base - | add RC, BASE, RC + | ldp BASE, RC, L->base + | add RA, BASE, RA | sub TMP3, BASE, #16 | b <6 break; @@ -3730,7 +3726,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) } else { | adds CARG1w, CARG1w, CARG3w | bvs >2 - | add TMP0, CARG1, TISNUM + | add_TISNUM TMP0, CARG1 | tbnz CARG3w, #31, >4 | cmp CARG1w, CARG2w } @@ -3809,7 +3805,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // RA = base, RC = target | ldr CARG1, [BASE, RA, lsl #3] | add TMP1, BASE, RA, lsl #3 - | cmp CARG1, TISNIL + | cmp_nil CARG1 | beq >1 // Stop if iterator returned nil. if (op == BC_JITERL) { | str CARG1, [TMP1, #-8] From 14866a6828939d86e716939cfd2921ac5aaeca8e Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 8 Oct 2023 21:57:04 +0200 Subject: [PATCH 56/95] ARM64: Fix disassembly of U12 loads. Thanks to Peter Cawley. #1100 --- src/jit/dis_arm64.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/jit/dis_arm64.lua b/src/jit/dis_arm64.lua index 3d199bf2..a7a29494 100644 --- a/src/jit/dis_arm64.lua +++ b/src/jit/dis_arm64.lua @@ -948,7 +948,7 @@ local function disass_ins(ctx) elseif p == "U" then local rn = map_regs.x[band(rshift(op, 5), 31)] local sz = band(rshift(op, 30), 3) - local imm12 = lshift(arshift(lshift(op, 10), 20), sz) + local imm12 = lshift(rshift(lshift(op, 10), 20), sz) if imm12 ~= 0 then x = "["..rn..", #"..imm12.."]" else From d2a5487fd79b0ce9cd303f84eae13ce12d4db4b7 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 8 Oct 2023 22:10:02 +0200 Subject: [PATCH 57/95] ARM64: Use ADR and ADRP to form constants. Thanks to Peter Cawley. #1100 --- src/lj_emit_arm64.h | 36 +++++++++++++++++++++++++++++------- src/lj_target_arm64.h | 2 ++ 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h index 3c510492..51d0c351 100644 --- a/src/lj_emit_arm64.h +++ b/src/lj_emit_arm64.h @@ -193,6 +193,32 @@ static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int is64) return 0; /* Failed. */ } +#define glofs(as, k) \ + ((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g)) +#define mcpofs(as, k) \ + ((intptr_t)((uintptr_t)(k) - (uintptr_t)(as->mcp - 1))) +#define checkmcpofs(as, k) \ + (A64F_S_OK(mcpofs(as, k)>>2, 19)) + +/* Try to form a const as ADR or ADRP or ADRP + ADD. */ +static int emit_kadrp(ASMState *as, Reg rd, uint64_t k) +{ + A64Ins ai = A64I_ADR; + int64_t ofs = mcpofs(as, k); + if (!A64F_S_OK((uint64_t)ofs, 21)) { + uint64_t kpage = k & ~0xfffull; + MCode *adrp = as->mcp - 1 - (k != kpage); + ofs = (int64_t)(kpage - ((uint64_t)adrp & ~0xfffull)) >> 12; + if (!A64F_S_OK(ofs, 21)) + return 0; /* Failed. */ + if (k != kpage) + emit_dn(as, (A64I_ADDx^A64I_K12)|A64F_U12(k - kpage), rd, rd); + ai = A64I_ADRP; + } + emit_d(as, ai|(((uint32_t)ofs&3)<<29)|A64F_S19(ofs>>2), rd); + return 1; +} + static void emit_loadk(ASMState *as, Reg rd, uint64_t u64) { int zeros = 0, ones = 0, neg, lshift = 0; @@ -213,6 +239,9 @@ static void emit_loadk(ASMState *as, Reg rd, uint64_t u64) if (emit_kdelta(as, rd, u64, is64)) { return; } + if (emit_kadrp(as, rd, u64)) { /* Either 1 or 2 ins. */ + return; + } } if (neg) { u64 = ~u64; @@ -240,13 +269,6 @@ static void emit_loadk(ASMState *as, Reg rd, uint64_t u64) /* Load a 64 bit constant into a GPR. */ #define emit_loadu64(as, rd, i) emit_loadk(as, rd, i) -#define glofs(as, k) \ - ((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g)) -#define mcpofs(as, k) \ - ((intptr_t)((uintptr_t)(k) - (uintptr_t)(as->mcp - 1))) -#define checkmcpofs(as, k) \ - (A64F_S_OK(mcpofs(as, k)>>2, 19)) - static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); /* Get/set from constant pointer. */ diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h index 65a14307..c34f1e59 100644 --- a/src/lj_target_arm64.h +++ b/src/lj_target_arm64.h @@ -234,6 +234,8 @@ typedef enum A64Ins { A64I_MOVZx = 0xd2800000, A64I_MOVNw = 0x12800000, A64I_MOVNx = 0x92800000, + A64I_ADR = 0x10000000, + A64I_ADRP = 0x90000000, A64I_LDRB = 0x39400000, A64I_LDRH = 0x79400000, From 656ecbcf8f669feb94e0d0ec4b4f59190bcd2e48 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 8 Oct 2023 22:12:01 +0200 Subject: [PATCH 58/95] DynASM/ARM64: Support ldp/stp of q registers. Thanks to Peter Cawley. #1096 --- dynasm/dasm_arm64.lua | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dynasm/dasm_arm64.lua b/dynasm/dasm_arm64.lua index e69f8ef3..05ea3e22 100644 --- a/dynasm/dasm_arm64.lua +++ b/dynasm/dasm_arm64.lua @@ -549,7 +549,7 @@ end local function parse_load_pair(params, nparams, n, op) if params[n+2] then werror("too many operands") end local pn, p2 = params[n], params[n+1] - local scale = shr(op, 30) == 0 and 2 or 3 + local scale = 2 + shr(op, 31 - band(shr(op, 26), 1)) local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$") if not p1 then if not p2 then @@ -806,8 +806,8 @@ map_op = { ["ldrsw_*"] = "98000000DxB|b8800000DxL", -- NOTE: ldur etc. are handled by ldr et al. - ["stp_*"] = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP", - ["ldp_*"] = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP", + ["stp_*"] = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP|ac000000DAqP", + ["ldp_*"] = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP|ac400000DAqP", ["ldpsw_*"] = "68400000DAxP", -- Branches. @@ -942,7 +942,7 @@ local function parse_template(params, template, nparams, pos) werror("bad register type") end parse_reg_type = false - elseif p == "x" or p == "w" or p == "d" or p == "s" then + elseif p == "x" or p == "w" or p == "d" or p == "s" or p == "q" then if parse_reg_type ~= p then werror("register size mismatch") end From db944b2b56c86fcf133745976763604d96110285 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sat, 21 Oct 2023 13:11:50 +0200 Subject: [PATCH 59/95] FFI: Fix dangling reference to CType in carith_checkarg(). Reported by Sergey Kaplun. #1108 --- src/lj_carith.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/lj_carith.c b/src/lj_carith.c index 96384e87..bad5fe66 100644 --- a/src/lj_carith.c +++ b/src/lj_carith.c @@ -42,9 +42,13 @@ static int carith_checkarg(lua_State *L, CTState *cts, CDArith *ca) p = (uint8_t *)cdata_getptr(p, ct->size); if (ctype_isref(ct->info)) ct = ctype_rawchild(cts, ct); } else if (ctype_isfunc(ct->info)) { + CTypeID id0 = i ? ctype_typeid(cts, ca->ct[0]) : 0; p = (uint8_t *)*(void **)p; ct = ctype_get(cts, lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|id), CTSIZE_PTR)); + if (i) { /* cts->tab may have been reallocated. */ + ca->ct[0] = ctype_get(cts, id0); + } } if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct); ca->ct[i] = ct; From 4eb47df605883e983dadb78f303b22dd0232dd03 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sat, 21 Oct 2023 13:18:51 +0200 Subject: [PATCH 60/95] FFI/Windows: Fix type declaration for int64_t and uint64_t. Thanks to Peter Cawley. #1106 --- src/lj_ctype.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/lj_ctype.h b/src/lj_ctype.h index 45e7234e..cde1cf01 100644 --- a/src/lj_ctype.h +++ b/src/lj_ctype.h @@ -276,6 +276,8 @@ typedef struct CTState { #define CTTYDEFP(_) #endif +#define CTF_LONG_IF8 (CTF_LONG * (sizeof(long) == 8)) + /* Common types. */ #define CTTYDEF(_) \ _(NONE, 0, CT_ATTRIB, CTATTRIB(CTA_BAD)) \ @@ -289,8 +291,8 @@ typedef struct CTState { _(UINT16, 2, CT_NUM, CTF_UNSIGNED|CTALIGN(1)) \ _(INT32, 4, CT_NUM, CTALIGN(2)) \ _(UINT32, 4, CT_NUM, CTF_UNSIGNED|CTALIGN(2)) \ - _(INT64, 8, CT_NUM, CTF_LONG|CTALIGN(3)) \ - _(UINT64, 8, CT_NUM, CTF_UNSIGNED|CTF_LONG|CTALIGN(3)) \ + _(INT64, 8, CT_NUM, CTF_LONG_IF8|CTALIGN(3)) \ + _(UINT64, 8, CT_NUM, CTF_UNSIGNED|CTF_LONG_IF8|CTALIGN(3)) \ _(FLOAT, 4, CT_NUM, CTF_FP|CTALIGN(2)) \ _(DOUBLE, 8, CT_NUM, CTF_FP|CTALIGN(3)) \ _(COMPLEX_FLOAT, 8, CT_ARRAY, CTF_COMPLEX|CTALIGN(2)|CTID_FLOAT) \ From e826d0c101d750fac8334d71e221c50d8dbe236c Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sat, 21 Oct 2023 13:31:45 +0200 Subject: [PATCH 61/95] Add 'cc' file type for saving bytecode. Contributed by Sergey Bronnikov. #1105 --- doc/running.html | 3 ++- src/jit/bcsave.lua | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/running.html b/doc/running.html index 3afc1b56..9dd2b411 100644 --- a/doc/running.html +++ b/doc/running.html @@ -120,7 +120,8 @@ file name:
- c — C source file, exported bytecode data.
-- h — C header file, static bytecode data.
+- cc — C++ source file, exported bytecode data.
+- h — C/C++ header file, static bytecode data.
- obj or o — Object file, exported bytecode data (OS- and architecture-specific).
- raw or any other extension — Raw bytecode file (portable). diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua index 74699f3d..390d297c 100644 --- a/src/jit/bcsave.lua +++ b/src/jit/bcsave.lua @@ -38,7 +38,7 @@ Save LuaJIT bytecode: luajit -b[options] input output -- Stop handling options. - Use stdin as input and/or stdout as output. -File types: c h obj o raw (default) +File types: c cc h obj o raw (default) ]] os.exit(1) end @@ -81,7 +81,7 @@ end ------------------------------------------------------------------------------ local map_type = { - raw = "raw", c = "c", h = "h", o = "obj", obj = "obj", + raw = "raw", c = "c", cc = "c", h = "h", o = "obj", obj = "obj", } local map_arch = { From f2e955dae8411ccdce693806f15b1f221a49015c Mon Sep 17 00:00:00 2001 From: Mike Pall
Date: Sun, 5 Nov 2023 11:27:35 +0100 Subject: [PATCH 62/95] Windows/x86: _BitScan*64 are only available on 64 bit archs. Reported by memcorrupt. #1109 --- src/lj_def.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/lj_def.h b/src/lj_def.h index 0d6c346b..2a1d7b56 100644 --- a/src/lj_def.h +++ b/src/lj_def.h @@ -259,12 +259,8 @@ static LJ_AINLINE uint32_t lj_fls(uint32_t x) #else unsigned char _BitScanForward(unsigned long *, unsigned long); unsigned char _BitScanReverse(unsigned long *, unsigned long); -unsigned char _BitScanForward64(unsigned long *, uint64_t); -unsigned char _BitScanReverse64(unsigned long *, uint64_t); #pragma intrinsic(_BitScanForward) #pragma intrinsic(_BitScanReverse) -#pragma intrinsic(_BitScanForward64) -#pragma intrinsic(_BitScanReverse64) static LJ_AINLINE uint32_t lj_ffs(uint32_t x) { @@ -276,6 +272,12 @@ static LJ_AINLINE uint32_t lj_fls(uint32_t x) unsigned long r; _BitScanReverse(&r, x); return (uint32_t)r; } +#if defined(_M_X64) || defined(_M_ARM64) +unsigned char _BitScanForward64(unsigned long *, uint64_t); +unsigned char _BitScanReverse64(unsigned long *, uint64_t); +#pragma intrinsic(_BitScanForward64) +#pragma intrinsic(_BitScanReverse64) + static LJ_AINLINE uint32_t lj_ffs64(uint64_t x) { unsigned long r; _BitScanForward64(&r, x); return (uint32_t)r; @@ -286,6 +288,7 @@ static LJ_AINLINE uint32_t lj_fls64(uint64_t x) unsigned long r; _BitScanReverse64(&r, x); return (uint32_t)r; } #endif +#endif unsigned long _byteswap_ulong(unsigned long); uint64_t _byteswap_uint64(uint64_t); From d133d67c881f363f0b5584ebd21a965eb3435aa1 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 5 Nov 2023 11:31:08 +0100 Subject: [PATCH 63/95] x64: Properly fix __call metamethod return dispatch. Reported by Sergey Kaplun. #1110 --- src/vm_x86.dasc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index 5b3356dc..56712f90 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc @@ -1243,7 +1243,7 @@ static void build_subroutines(BuildCtx *ctx) | mov LFUNC:RB, [RA-8] | add NARGS:RD, 1 | // This is fragile. L->base must not move, KBASE must always be defined. - |.if x64 + |.if X64 | cmp KBASEa, rdx // Continue with CALLT if flag set. |.else | cmp KBASE, BASE // Continue with CALLT if flag set. From 07b3cd3cf9b57a3801a1ebc48144767e31671f21 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 5 Nov 2023 16:34:46 +0100 Subject: [PATCH 64/95] Check for upvalue state transition in IR_UREFO. Thanks to Peter Cawley. #1085 --- src/lj_asm_arm.h | 32 +++++++++++++++++++------------ src/lj_asm_arm64.h | 20 ++++++++++++++------ src/lj_asm_mips.h | 27 ++++++++++++++++---------- src/lj_asm_ppc.h | 29 +++++++++++++++++----------- src/lj_asm_x86.h | 27 ++++++++++++++++---------- src/lj_opt_fold.c | 47 +++++++++++++++++++++++++++++++++++++--------- src/lj_opt_mem.c | 15 ++++++++++----- src/lj_record.c | 13 +++++++++++-- src/lj_state.c | 7 +++++-- 9 files changed, 150 insertions(+), 67 deletions(-) diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index ac3d1b58..348cd79f 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h @@ -969,24 +969,32 @@ static void asm_hrefk(ASMState *as, IRIns *ir) static void asm_uref(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); - if (irref_isk(ir->op1)) { + int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC); + if (irref_isk(ir->op1) && !guarded) { GCfunc *fn = ir_kfunc(IR(ir->op1)); MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; emit_lsptr(as, ARMI_LDR, dest, v); } else { - Reg uv = ra_scratch(as, RSET_GPR); - Reg func = ra_alloc1(as, ir->op1, RSET_GPR); - if (ir->o == IR_UREFC) { - asm_guardcc(as, CC_NE); + if (guarded) { + asm_guardcc(as, ir->o == IR_UREFC ? CC_NE : CC_EQ); emit_n(as, ARMI_CMP|ARMI_K12|1, RID_TMP); - emit_opk(as, ARMI_ADD, dest, uv, - (int32_t)offsetof(GCupval, tv), RSET_GPR); - emit_lso(as, ARMI_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); - } else { - emit_lso(as, ARMI_LDR, dest, uv, (int32_t)offsetof(GCupval, v)); } - emit_lso(as, ARMI_LDR, uv, func, - (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); + if (ir->o == IR_UREFC) + emit_opk(as, ARMI_ADD, dest, dest, + (int32_t)offsetof(GCupval, tv), RSET_GPR); + else + emit_lso(as, ARMI_LDR, dest, dest, (int32_t)offsetof(GCupval, v)); + if (guarded) + emit_lso(as, ARMI_LDRB, RID_TMP, dest, + (int32_t)offsetof(GCupval, closed)); + if (irref_isk(ir->op1)) { + GCfunc *fn = ir_kfunc(IR(ir->op1)); + int32_t k = (int32_t)gcrefu(fn->l.uvptr[(ir->op2 >> 8)]); + emit_loadi(as, dest, k); + } else { + emit_lso(as, ARMI_LDR, dest, ra_alloc1(as, ir->op1, RSET_GPR), + (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); + } } } diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index 9f165fa8..5b40f4cc 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -931,22 +931,30 @@ static void asm_hrefk(ASMState *as, IRIns *ir) static void asm_uref(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); - if (irref_isk(ir->op1)) { + int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC); + if (irref_isk(ir->op1) && !guarded) { GCfunc *fn = ir_kfunc(IR(ir->op1)); MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; emit_lsptr(as, A64I_LDRx, dest, v); } else { - if (ir->o == IR_UREFC) { - asm_guardcnb(as, A64I_CBZ, RID_TMP); + if (guarded) + asm_guardcnb(as, ir->o == IR_UREFC ? A64I_CBZ : A64I_CBNZ, RID_TMP); + if (ir->o == IR_UREFC) emit_opk(as, A64I_ADDx, dest, dest, (int32_t)offsetof(GCupval, tv), RSET_GPR); + else + emit_lso(as, A64I_LDRx, dest, dest, (int32_t)offsetof(GCupval, v)); + if (guarded) emit_lso(as, A64I_LDRB, RID_TMP, dest, (int32_t)offsetof(GCupval, closed)); + if (irref_isk(ir->op1)) { + GCfunc *fn = ir_kfunc(IR(ir->op1)); + uint64_t k = gcrefu(fn->l.uvptr[(ir->op2 >> 8)]); + emit_loadu64(as, dest, k); } else { - emit_lso(as, A64I_LDRx, dest, dest, (int32_t)offsetof(GCupval, v)); + emit_lso(as, A64I_LDRx, dest, ra_alloc1(as, ir->op1, RSET_GPR), + (int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8)); } - emit_lso(as, A64I_LDRx, dest, ra_alloc1(as, ir->op1, RSET_GPR), - (int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8)); } } diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h index b02da663..d4e40c91 100644 --- a/src/lj_asm_mips.h +++ b/src/lj_asm_mips.h @@ -1207,22 +1207,29 @@ nolo: static void asm_uref(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); - if (irref_isk(ir->op1)) { + int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC); + if (irref_isk(ir->op1) && !guarded) { GCfunc *fn = ir_kfunc(IR(ir->op1)); MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; emit_lsptr(as, MIPSI_AL, dest, v, RSET_GPR); } else { - Reg uv = ra_scratch(as, RSET_GPR); - Reg func = ra_alloc1(as, ir->op1, RSET_GPR); - if (ir->o == IR_UREFC) { - asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); - emit_tsi(as, MIPSI_AADDIU, dest, uv, (int32_t)offsetof(GCupval, tv)); - emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); + if (guarded) + asm_guard(as, ir->o == IR_UREFC ? MIPSI_BEQ : MIPSI_BNE, RID_TMP, RID_ZERO); + if (ir->o == IR_UREFC) + emit_tsi(as, MIPSI_AADDIU, dest, dest, (int32_t)offsetof(GCupval, tv)); + else + emit_tsi(as, MIPSI_AL, dest, dest, (int32_t)offsetof(GCupval, v)); + if (guarded) + emit_tsi(as, MIPSI_LBU, RID_TMP, dest, (int32_t)offsetof(GCupval, closed)); + if (irref_isk(ir->op1)) { + GCfunc *fn = ir_kfunc(IR(ir->op1)); + GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]); + emit_loada(as, dest, o); } else { - emit_tsi(as, MIPSI_AL, dest, uv, (int32_t)offsetof(GCupval, v)); + emit_tsi(as, MIPSI_AL, dest, ra_alloc1(as, ir->op1, RSET_GPR), + (int32_t)offsetof(GCfuncL, uvptr) + + (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); } - emit_tsi(as, MIPSI_AL, uv, func, (int32_t)offsetof(GCfuncL, uvptr) + - (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); } } diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index 6555312d..8e9a92a4 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h @@ -840,23 +840,30 @@ static void asm_hrefk(ASMState *as, IRIns *ir) static void asm_uref(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); - if (irref_isk(ir->op1)) { + int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC); + if (irref_isk(ir->op1) && !guarded) { GCfunc *fn = ir_kfunc(IR(ir->op1)); MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; emit_lsptr(as, PPCI_LWZ, dest, v, RSET_GPR); } else { - Reg uv = ra_scratch(as, RSET_GPR); - Reg func = ra_alloc1(as, ir->op1, RSET_GPR); - if (ir->o == IR_UREFC) { - asm_guardcc(as, CC_NE); + if (guarded) { + asm_guardcc(as, ir->o == IR_UREFC ? CC_NE : CC_EQ); emit_ai(as, PPCI_CMPWI, RID_TMP, 1); - emit_tai(as, PPCI_ADDI, dest, uv, (int32_t)offsetof(GCupval, tv)); - emit_tai(as, PPCI_LBZ, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); - } else { - emit_tai(as, PPCI_LWZ, dest, uv, (int32_t)offsetof(GCupval, v)); } - emit_tai(as, PPCI_LWZ, uv, func, - (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); + if (ir->o == IR_UREFC) + emit_tai(as, PPCI_ADDI, dest, dest, (int32_t)offsetof(GCupval, tv)); + else + emit_tai(as, PPCI_LWZ, dest, dest, (int32_t)offsetof(GCupval, v)); + if (guarded) + emit_tai(as, PPCI_LBZ, RID_TMP, dest, (int32_t)offsetof(GCupval, closed)); + if (irref_isk(ir->op1)) { + GCfunc *fn = ir_kfunc(IR(ir->op1)); + int32_t k = (int32_t)gcrefu(fn->l.uvptr[(ir->op2 >> 8)]); + emit_loadi(as, dest, k); + } else { + emit_tai(as, PPCI_LWZ, dest, ra_alloc1(as, ir->op1, RSET_GPR), + (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); + } } } diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index c92de3d8..0e0b28a4 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -1373,24 +1373,31 @@ static void asm_hrefk(ASMState *as, IRIns *ir) static void asm_uref(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); - if (irref_isk(ir->op1)) { + int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC); + if (irref_isk(ir->op1) && !guarded) { GCfunc *fn = ir_kfunc(IR(ir->op1)); MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; emit_rma(as, XO_MOV, dest|REX_GC64, v); } else { Reg uv = ra_scratch(as, RSET_GPR); - Reg func = ra_alloc1(as, ir->op1, RSET_GPR); - if (ir->o == IR_UREFC) { + if (ir->o == IR_UREFC) emit_rmro(as, XO_LEA, dest|REX_GC64, uv, offsetof(GCupval, tv)); - asm_guardcc(as, CC_NE); - emit_i8(as, 1); - emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed)); - } else { + else emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v)); + if (guarded) { + asm_guardcc(as, ir->o == IR_UREFC ? CC_E : CC_NE); + emit_i8(as, 0); + emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed)); + } + if (irref_isk(ir->op1)) { + GCfunc *fn = ir_kfunc(IR(ir->op1)); + GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]); + emit_loada(as, uv, o); + } else { + emit_rmro(as, XO_MOV, uv|REX_GC64, ra_alloc1(as, ir->op1, RSET_GPR), + (int32_t)offsetof(GCfuncL, uvptr) + + (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); } - emit_rmro(as, XO_MOV, uv|REX_GC64, func, - (int32_t)offsetof(GCfuncL, uvptr) + - (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); } } diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 743dfb07..ce78505b 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -2134,8 +2134,26 @@ LJFOLDX(lj_opt_fwd_uload) LJFOLD(ALEN any any) LJFOLDX(lj_opt_fwd_alen) +/* Try to merge UREFO/UREFC into referenced instruction. */ +static TRef merge_uref(jit_State *J, IRRef ref, IRIns* ir) +{ + if (ir->o == IR_UREFO && irt_isguard(ir->t)) { + /* Might be pointing to some other coroutine's stack. + ** And GC might shrink said stack, thereby repointing the upvalue. + ** GC might even collect said coroutine, thereby closing the upvalue. + */ + if (gcstep_barrier(J, ref)) + return EMITFOLD; /* So cannot merge. */ + /* Current fins wants a check, but ir doesn't have one. */ + if ((irt_t(fins->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC) && + irt_type(ir->t) == IRT_IGC) + ir->t.irt += IRT_PGC-IRT_IGC; /* So install a check. */ + } + return ref; /* Not a TRef, but the caller doesn't care. */ +} + /* Upvalue refs are really loads, but there are no corresponding stores. -** So CSE is ok for them, except for UREFO across a GC step (see below). +** So CSE is ok for them, except for guarded UREFO across a GC step. ** If the referenced function is const, its upvalue addresses are const, too. ** This can be used to improve CSE by looking for the same address, ** even if the upvalues originate from a different function. @@ -2153,9 +2171,7 @@ LJFOLDF(cse_uref) if (irref_isk(ir->op1)) { GCfunc *fn2 = ir_kfunc(IR(ir->op1)); if (gco2uv(gcref(fn2->l.uvptr[(ir->op2 >> 8)])) == uv) { - if (fins->o == IR_UREFO && gcstep_barrier(J, ref)) - break; - return ref; + return merge_uref(J, ref, ir); } } ref = ir->prev; @@ -2164,6 +2180,24 @@ LJFOLDF(cse_uref) return EMITFOLD; } +/* Custom CSE for UREFO. */ +LJFOLD(UREFO any any) +LJFOLDF(cse_urefo) +{ + if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) { + IRRef ref = J->chain[IR_UREFO]; + IRRef lim = fins->op1; + IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16); + while (ref > lim) { + IRIns *ir = IR(ref); + if (ir->op12 == op12) + return merge_uref(J, ref, ir); + ref = ir->prev; + } + } + return EMITFOLD; +} + LJFOLD(HREFK any any) LJFOLDX(lj_opt_fwd_hrefk) @@ -2384,14 +2418,9 @@ LJFOLDF(fold_base) /* Write barriers are amenable to CSE, but not across any incremental ** GC steps. -** -** The same logic applies to open upvalue references, because a stack -** may be resized during a GC step (not the current stack, but maybe that -** of a coroutine). */ LJFOLD(TBAR any) LJFOLD(OBAR any any) -LJFOLD(UREFO any any) LJFOLDF(barrier_tab) { TRef tr = lj_opt_cse(J); diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c index 351d958c..631ac9e4 100644 --- a/src/lj_opt_mem.c +++ b/src/lj_opt_mem.c @@ -464,18 +464,23 @@ doemit: */ static AliasRet aa_uref(IRIns *refa, IRIns *refb) { - if (refa->o != refb->o) - return ALIAS_NO; /* Different UREFx type. */ if (refa->op1 == refb->op1) { /* Same function. */ if (refa->op2 == refb->op2) return ALIAS_MUST; /* Same function, same upvalue idx. */ else return ALIAS_NO; /* Same function, different upvalue idx. */ } else { /* Different functions, check disambiguation hash values. */ - if (((refa->op2 ^ refb->op2) & 0xff)) + if (((refa->op2 ^ refb->op2) & 0xff)) { return ALIAS_NO; /* Upvalues with different hash values cannot alias. */ - else - return ALIAS_MAY; /* No conclusion can be drawn for same hash value. */ + } else if (refa->o != refb->o) { + /* Different UREFx type, but need to confirm the UREFO really is open. */ + if (irt_type(refa->t) == IRT_IGC) refa->t.irt += IRT_PGC-IRT_IGC; + else if (irt_type(refb->t) == IRT_IGC) refb->t.irt += IRT_PGC-IRT_IGC; + return ALIAS_NO; + } else { + /* No conclusion can be drawn for same hash value and same UREFx type. */ + return ALIAS_MAY; + } } } diff --git a/src/lj_record.c b/src/lj_record.c index d44f7737..1dd310d4 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -1772,12 +1772,12 @@ noconstify: /* Note: this effectively limits LJ_MAX_UPVAL to 127. */ uv = (uv << 8) | (hashrot(uvp->dhash, uvp->dhash + HASH_BIAS) & 0xff); if (!uvp->closed) { - uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_PGC), fn, uv)); /* In current stack? */ if (uvval(uvp) >= tvref(J->L->stack) && uvval(uvp) < tvref(J->L->maxstack)) { int32_t slot = (int32_t)(uvval(uvp) - (J->L->base - J->baseslot)); if (slot >= 0) { /* Aliases an SSA slot? */ + uref = tref_ref(emitir(IRT(IR_UREFO, IRT_PGC), fn, uv)); emitir(IRTG(IR_EQ, IRT_PGC), REF_BASE, emitir(IRT(IR_ADD, IRT_PGC), uref, @@ -1792,12 +1792,21 @@ noconstify: } } } + /* IR_UREFO+IRT_IGC is not checked for open-ness at runtime. + ** Always marked as a guard, since it might get promoted to IRT_PGC later. + */ + uref = emitir(IRTG(IR_UREFO, tref_isgcv(val) ? IRT_PGC : IRT_IGC), fn, uv); + uref = tref_ref(uref); emitir(IRTG(IR_UGT, IRT_PGC), emitir(IRT(IR_SUB, IRT_PGC), uref, REF_BASE), lj_ir_kintpgc(J, (J->baseslot + J->maxslot) * 8)); } else { + /* If fn is constant, then so is the GCupval*, and the upvalue cannot + ** transition back to open, so no guard is required in this case. + */ + IRType t = (tref_isk(fn) ? 0 : IRT_GUARD) | IRT_PGC; + uref = tref_ref(emitir(IRT(IR_UREFC, t), fn, uv)); needbarrier = 1; - uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_PGC), fn, uv)); } if (val == 0) { /* Upvalue load */ IRType t = itype2irt(uvval(uvp)); diff --git a/src/lj_state.c b/src/lj_state.c index 6efe189d..7e4961bd 100644 --- a/src/lj_state.c +++ b/src/lj_state.c @@ -346,8 +346,11 @@ void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L) lj_assertG(L != mainthread(g), "free of main thread"); if (obj2gco(L) == gcref(g->cur_L)) setgcrefnull(g->cur_L); - lj_func_closeuv(L, tvref(L->stack)); - lj_assertG(gcref(L->openupval) == NULL, "stale open upvalues"); + if (gcref(L->openupval) != NULL) { + lj_func_closeuv(L, tvref(L->stack)); + lj_trace_abort(g); /* For aa_uref soundness. */ + lj_assertG(gcref(L->openupval) == NULL, "stale open upvalues"); + } lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); lj_mem_freet(g, L); } From ce2cd617398412984c52ca90f833b30ae3dbd08b Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 6 Nov 2023 23:14:22 +0100 Subject: [PATCH 65/95] ARM64: Fix disassembly of ldp/stp offsets. Thanks to Peter Cawley. #1113 --- src/jit/dis_arm64.lua | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/jit/dis_arm64.lua b/src/jit/dis_arm64.lua index a7a29494..84677666 100644 --- a/src/jit/dis_arm64.lua +++ b/src/jit/dis_arm64.lua @@ -985,8 +985,7 @@ local function disass_ins(ctx) x = x.."]" end elseif p == "P" then - local opcv, sh = rshift(op, 26), 2 - if opcv >= 0x2a then sh = 4 elseif opcv >= 0x1b then sh = 3 end + local sh = 2 + rshift(op, 31 - band(rshift(op, 26), 1)) local imm7 = lshift(arshift(lshift(op, 10), 25), sh) local rn = map_regs.x[band(rshift(op, 5), 31)] local ind = band(rshift(op, 23), 3) From 433d7e8d8d182f44e88b5cfdc4b2d3026469dfb7 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Tue, 7 Nov 2023 22:25:42 +0100 Subject: [PATCH 66/95] FFI: Fix pragma push stack limit check and throw on overflow. Reported by Sergey Kaplun. #1114 --- src/lj_cparse.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/lj_cparse.c b/src/lj_cparse.c index f807c5ae..2ef7dbe1 100644 --- a/src/lj_cparse.c +++ b/src/lj_cparse.c @@ -1747,9 +1747,11 @@ static void cp_pragma(CPState *cp, BCLine pragmaline) cp_check(cp, '('); if (cp->tok == CTOK_IDENT) { if (cp->str->hash == H_(738e923c,a1b65954)) { /* push */ - if (cp->curpack < CPARSE_MAX_PACKSTACK) { + if (cp->curpack < CPARSE_MAX_PACKSTACK-1) { cp->packstack[cp->curpack+1] = cp->packstack[cp->curpack]; cp->curpack++; + } else { + cp_errmsg(cp, cp->tok, LJ_ERR_XLEVELS); } } else if (cp->str->hash == H_(6c71cf27,6c71cf27)) { /* pop */ if (cp->curpack > 0) cp->curpack--; From 65c849390702b1150d52e64db86cbc6b3c98413e Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 9 Nov 2023 11:02:36 +0100 Subject: [PATCH 67/95] Invalidate SCEV entry when returning to lower frame. Thanks to Zhongwei Yao. #1115 --- src/lj_record.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lj_record.c b/src/lj_record.c index a49f942a..0122105b 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -755,6 +755,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) emitir(IRTG(IR_RETF, IRT_P32), trpt, trpc); J->retdepth++; J->needsnap = 1; + J->scev.idx = REF_NIL; lua_assert(J->baseslot == 1); /* Shift result slots up and clear the slots of the new frame below. */ memmove(J->base + cbase, J->base-1, sizeof(TRef)*nresults); From a4c1640432a9d8a60624cdc8065b15078c228e36 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 12 Nov 2023 14:42:24 +0100 Subject: [PATCH 68/95] Add stack check to pcall/xpcall. Analyzed by Peter Cawley. #1048 --- src/vm_arm.dasc | 7 +++++++ src/vm_mips.dasc | 10 +++++++++- src/vm_ppc.dasc | 8 ++++++++ src/vm_ppcspe.dasc | 8 ++++++++ src/vm_x86.dasc | 6 ++++++ 5 files changed, 38 insertions(+), 1 deletion(-) diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc index 7dae1a53..872de45a 100644 --- a/src/vm_arm.dasc +++ b/src/vm_arm.dasc @@ -1155,8 +1155,11 @@ static void build_subroutines(BuildCtx *ctx) |//-- Base library: catch errors ---------------------------------------- | |.ffunc pcall + | ldr RB, L->maxstack + | add INS, BASE, NARGS8:RC | ldrb RA, [DISPATCH, #DISPATCH_GL(hookmask)] | cmp NARGS8:RC, #8 + | cmphs RB, INS | blo ->fff_fallback | tst RA, #HOOK_ACTIVE // Remember active hook before pcall. | mov RB, BASE @@ -1167,7 +1170,11 @@ static void build_subroutines(BuildCtx *ctx) | b ->vm_call_dispatch | |.ffunc_2 xpcall + | ldr RB, L->maxstack + | add INS, BASE, NARGS8:RC | ldrb RA, [DISPATCH, #DISPATCH_GL(hookmask)] + | cmp RB, INS + | blo ->fff_fallback | checkfunc CARG4, ->fff_fallback // Traceback must be a function. | mov RB, BASE | strd CARG12, [BASE, #8] // Swap function and traceback. diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc index f6f801f2..c4c0a416 100644 --- a/src/vm_mips.dasc +++ b/src/vm_mips.dasc @@ -1244,9 +1244,13 @@ static void build_subroutines(BuildCtx *ctx) |//-- Base library: catch errors ---------------------------------------- | |.ffunc pcall + | lw TMP1, L->maxstack + | addu TMP2, BASE, NARGS8:RC | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) | beqz NARGS8:RC, ->fff_fallback - | move TMP2, BASE + |. sltu AT, TMP1, TMP2 + | bnez AT, ->fff_fallback + |. move TMP2, BASE | addiu BASE, BASE, 8 | // Remember active hook before pcall. | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT @@ -1256,8 +1260,12 @@ static void build_subroutines(BuildCtx *ctx) |. addiu NARGS8:RC, NARGS8:RC, -8 | |.ffunc xpcall + | lw TMP1, L->maxstack + | addu TMP2, BASE, NARGS8:RC | sltiu AT, NARGS8:RC, 16 | lw CARG4, 8+HI(BASE) + | sltu TMP1, TMP1, TMP2 + | or AT, AT, TMP1 | bnez AT, ->fff_fallback |. ldc1 FARG2, 8(BASE) | ldc1 FARG1, 0(BASE) diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc index 61ebbb04..d6792f2c 100644 --- a/src/vm_ppc.dasc +++ b/src/vm_ppc.dasc @@ -1537,8 +1537,12 @@ static void build_subroutines(BuildCtx *ctx) |//-- Base library: catch errors ---------------------------------------- | |.ffunc pcall + | lwz TMP1, L->maxstack + | add TMP2, BASE, NARGS8:RC | cmplwi NARGS8:RC, 8 | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) + | cmplw cr1, TMP1, TMP2 + | cror 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | blt ->fff_fallback | mr TMP2, BASE | la BASE, 8(BASE) @@ -1549,9 +1553,13 @@ static void build_subroutines(BuildCtx *ctx) | b ->vm_call_dispatch | |.ffunc xpcall + | lwz TMP1, L->maxstack + | add TMP2, BASE, NARGS8:RC | cmplwi NARGS8:RC, 16 | lwz CARG4, 8(BASE) + | cmplw cr1, TMP1, TMP2 | lfd FARG2, 8(BASE) + | cror 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | lfd FARG1, 0(BASE) | blt ->fff_fallback | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH) diff --git a/src/vm_ppcspe.dasc b/src/vm_ppcspe.dasc index c4a44191..ea33c08b 100644 --- a/src/vm_ppcspe.dasc +++ b/src/vm_ppcspe.dasc @@ -1184,8 +1184,12 @@ static void build_subroutines(BuildCtx *ctx) |//-- Base library: catch errors ---------------------------------------- | |.ffunc pcall + | lwz TMP1, L->maxstack + | add TMP2, BASE, NARGS8:RC | cmplwi NARGS8:RC, 8 | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) + | cmplw cr1, TMP1, TMP2 + | cror 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | blt ->fff_fallback | mr TMP2, BASE | la BASE, 8(BASE) @@ -1196,8 +1200,12 @@ static void build_subroutines(BuildCtx *ctx) | b ->vm_call_dispatch | |.ffunc_2 xpcall + | lwz TMP1, L->maxstack + | add TMP2, BASE, NARGS8:RC | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) | mr TMP2, BASE + | cmplw TMP1, TMP2 + | blt ->fff_fallback | checkfunc CARG2 // Traceback must be a function. | checkfail ->fff_fallback | la BASE, 16(BASE) diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index 56712f90..811d5e75 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc @@ -1720,6 +1720,9 @@ static void build_subroutines(BuildCtx *ctx) |//-- Base library: catch errors ---------------------------------------- | |.ffunc_1 pcall + | mov L:RB, SAVE_L + | lea RA, [BASE+NARGS:RD*8] + | cmp RA, L:RB->maxstack; ja ->fff_fallback | lea RA, [BASE+8] | sub NARGS:RD, 1 | mov PC, 8+FRAME_PCALL @@ -1731,6 +1734,9 @@ static void build_subroutines(BuildCtx *ctx) | jmp ->vm_call_dispatch | |.ffunc_2 xpcall + | mov L:RB, SAVE_L + | lea RA, [BASE+NARGS:RD*8] + | cmp RA, L:RB->maxstack; ja ->fff_fallback | cmp dword [BASE+12], LJ_TFUNC; jne ->fff_fallback | mov RB, [BASE+4] // Swap function and traceback. | mov [BASE+12], RB From d854d00ce94b274359e5181bed13e977420daf5c Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 12 Nov 2023 15:18:44 +0100 Subject: [PATCH 69/95] x86/x64: Add more red zone checks to assembler backend. Thanks to Peter Cawley. #1116 --- src/lj_asm_x86.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index e01def59..6b114802 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -674,6 +674,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left) emit_rr(as, XO_CVTSI2SD, tmp, dest); if (!(as->flags & JIT_F_SPLIT_XMM)) emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */ + checkmclim(as); emit_rr(as, XO_CVTTSD2SI, dest, left); /* Can't fuse since left is needed twice. */ } @@ -713,6 +714,7 @@ static void asm_conv(ASMState *as, IRIns *ir) emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */ emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */ emit_loadn(as, bias, k); + checkmclim(as); emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR)); return; } else { /* Integer to FP conversion. */ @@ -1025,6 +1027,7 @@ static void asm_href(ASMState *as, IRIns *ir) emit_jcc(as, CC_E, nilexit); else emit_sjcc(as, CC_E, l_end); + checkmclim(as); if (irt_isnum(kt)) { if (isk) { /* Assumes -0.0 is already canonicalized to +0.0. */ @@ -1065,7 +1068,6 @@ static void asm_href(ASMState *as, IRIns *ir) emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); } emit_sfixup(as, l_loop); - checkmclim(as); /* Load main position relative to tab->node into dest. */ khash = isk ? ir_khash(irkey) : 1; @@ -1091,6 +1093,7 @@ static void asm_href(ASMState *as, IRIns *ir) emit_rr(as, XO_ARITH(XOg_SUB), dest, tmp); emit_shifti(as, XOg_ROL, tmp, HASH_ROT3); emit_rr(as, XO_ARITH(XOg_XOR), dest, tmp); + checkmclim(as); emit_shifti(as, XOg_ROL, dest, HASH_ROT2); emit_rr(as, XO_ARITH(XOg_SUB), tmp, dest); emit_shifti(as, XOg_ROL, dest, HASH_ROT1); @@ -1375,6 +1378,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) if (irt_islightud(ir->t)) { Reg dest = asm_load_lightud64(as, ir, 1); if (ra_hasreg(dest)) { + checkmclim(as); asm_fuseahuref(as, ir->op1, RSET_GPR); emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM); } @@ -1394,6 +1398,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE); if (LJ_64 && irt_type(ir->t) >= IRT_NUM) { lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t)); + checkmclim(as); emit_u32(as, LJ_TISNUM); emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM); } else { From 45c88b7963de2969a9a656c03ba06ad995d7fd5f Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 12 Nov 2023 15:41:52 +0100 Subject: [PATCH 70/95] x86/x64: Don't fuse loads across table.clear. Reported by Peter Cawley. #1117 --- src/lj_asm_x86.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index d98fb827..a105b439 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -473,6 +473,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) } } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) { if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0) && + noconflict(as, ref, IR_CALLS, 0) && /* Don't cross table.clear. */ !(LJ_GC64 && irt_isaddr(ir->t))) { asm_fuseahuref(as, ir->op1, xallow); return RID_MRM; From 113a168b792cd367822ec04cdc2ef32facd28efa Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 12 Nov 2023 16:11:11 +0100 Subject: [PATCH 71/95] Improve last commit. --- src/lj_asm_x86.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index a105b439..955a54a4 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -473,7 +473,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) } } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) { if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0) && - noconflict(as, ref, IR_CALLS, 0) && /* Don't cross table.clear. */ + noconflict(as, ref, IR_CALLS, 1) && /* Don't cross table.clear. */ !(LJ_GC64 && irt_isaddr(ir->t))) { asm_fuseahuref(as, ir->op1, xallow); return RID_MRM; From 644723649ea04cb23b72c814b88b72a29e4afed4 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Tue, 14 Nov 2023 22:50:21 +0100 Subject: [PATCH 72/95] x86/x64: Don't fuse loads across IR_NEWREF. Reported by Peter Cawley. #1117 --- src/lj_asm_x86.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 6b114802..ddbe9c55 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -93,7 +93,7 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k) /* Check if there's no conflicting instruction between curins and ref. ** Also avoid fusing loads if there are multiple references. */ -static int noconflict(ASMState *as, IRRef ref, IROp conflict, int noload) +static int noconflict(ASMState *as, IRRef ref, IROp conflict, int check) { IRIns *ir = as->ir; IRRef i = as->curins; @@ -102,7 +102,9 @@ static int noconflict(ASMState *as, IRRef ref, IROp conflict, int noload) while (--i > ref) { if (ir[i].o == conflict) return 0; /* Conflict found. */ - else if (!noload && (ir[i].op1 == ref || ir[i].op2 == ref)) + else if ((check & 1) && ir[i].o == IR_NEWREF) + return 0; + else if ((check & 2) && (ir[i].op1 == ref || ir[i].op2 == ref)) return 0; } return 1; /* Ok, no conflict. */ @@ -118,7 +120,7 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref) lua_assert(irb->op2 == IRFL_TAB_ARRAY); /* We can avoid the FLOAD of t->array for colocated arrays. */ if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE && - !neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 1)) { + !neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 0)) { as->mrm.ofs = (int32_t)sizeof(GCtab); /* Ofs to colocated array. */ return irb->op1; /* Table obj. */ } @@ -337,7 +339,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR; if (ir->o == IR_SLOAD) { if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) && - noconflict(as, ref, IR_RETF, 0)) { + noconflict(as, ref, IR_RETF, 2)) { as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow); as->mrm.ofs = 8*((int32_t)ir->op1-1) + ((ir->op2&IRSLOAD_FRAME)?4:0); as->mrm.idx = RID_NONE; @@ -346,12 +348,12 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) } else if (ir->o == IR_FLOAD) { /* Generic fusion is only ok for 32 bit operand (but see asm_comp). */ if ((irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)) && - noconflict(as, ref, IR_FSTORE, 0)) { + noconflict(as, ref, IR_FSTORE, 2)) { asm_fusefref(as, ir, xallow); return RID_MRM; } } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) { - if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0)) { + if (noconflict(as, ref, ir->o + IRDELTA_L2S, 2+(ir->o != IR_ULOAD))) { asm_fuseahuref(as, ir->op1, xallow); return RID_MRM; } @@ -360,7 +362,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) ** Fusing unaligned memory operands is ok on x86 (except for SIMD types). */ if ((!irt_typerange(ir->t, IRT_I8, IRT_U16)) && - noconflict(as, ref, IR_XSTORE, 0)) { + noconflict(as, ref, IR_XSTORE, 2)) { asm_fusexref(as, ir->op1, xallow); return RID_MRM; } From 43d0a19158ceabaa51b0462c1ebc97612b420a2e Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Wed, 15 Nov 2023 01:41:31 +0100 Subject: [PATCH 73/95] Fix last commit. --- src/lj_asm_x86.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 9fa411a0..aee33716 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -118,7 +118,7 @@ static int noconflict(ASMState *as, IRRef ref, IROp conflict, int check) while (--i > ref) { if (ir[i].o == conflict) return 0; /* Conflict found. */ - else if ((check & 1) && ir[i].o == IR_NEWREF) + else if ((check & 1) && (ir[i].o == IR_NEWREF || ir[i].o == IR_CALLS)) return 0; else if ((check & 2) && (ir[i].op1 == ref || ir[i].op2 == ref)) return 0; From 1761fd2ef79ffe1778011c7e9cb03ed361b48c5e Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 10 Dec 2023 14:29:45 +0100 Subject: [PATCH 74/95] Emit sunk IR_NEWREF only once per key on snapshot replay. Thanks to Sergey Kaplun and Peter Cawley. #1128 --- src/lj_snap.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/lj_snap.c b/src/lj_snap.c index a6cd93d4..5a5c481b 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c @@ -575,9 +575,21 @@ void lj_snap_replay(jit_State *J, GCtrace *T) if (irr->o == IR_HREFK || irr->o == IR_AREF) { IRIns *irf = &T->ir[irr->op1]; tmp = emitir(irf->ot, tmp, irf->op2); + } else if (irr->o == IR_NEWREF) { + IRRef allocref = tref_ref(tr); + IRRef keyref = tref_ref(key); + IRRef newref_ref = J->chain[IR_NEWREF]; + IRIns *newref = &J->cur.ir[newref_ref]; + lua_assert(irref_isk(keyref)); + if (newref_ref > allocref && newref->op2 == keyref) { + lua_assert(newref->op1 == allocref); + tmp = newref_ref; + goto skip_newref; + } } } tmp = emitir(irr->ot, tmp, key); + skip_newref: val = snap_pref(J, T, map, nent, seen, irs->op2); if (val == 0) { IRIns *irc = &T->ir[irs->op2]; From d1236a4caa999b29e774ef5103df3b424d821d9b Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 10 Dec 2023 14:41:56 +0100 Subject: [PATCH 75/95] Optimize table.new() with constant args to (sinkable) IR_TNEW. Thanks to Peter Cawley. #1128 --- src/lj_ffrecord.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index 1233e5f7..151c4c8c 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c @@ -1444,6 +1444,15 @@ static void LJ_FASTCALL recff_table_new(jit_State *J, RecordFFData *rd) { TRef tra = lj_opt_narrow_toint(J, J->base[0]); TRef trh = lj_opt_narrow_toint(J, J->base[1]); + if (tref_isk(tra) && tref_isk(trh)) { + int32_t a = IR(tref_ref(tra))->i; + if (a < 0x7fff) { + uint32_t hbits = hsize2hbits(IR(tref_ref(trh))->i); + a = a > 0 ? a+1 : 0; + J->base[0] = emitir(IRTG(IR_TNEW, IRT_TAB), (uint32_t)a, hbits); + return; + } + } J->base[0] = lj_ir_call(J, IRCALL_lj_tab_new_ah, tra, trh); UNUSED(rd); } From dcf3627d79091e8c5535b15fc0ef40281ec9b9f7 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 10 Dec 2023 14:48:34 +0100 Subject: [PATCH 76/95] Fix .debug_abbrev section in GDB JIT API. Thanks to Dmitry Stogov. #1129 --- src/lj_gdbjit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c index 01f51ba7..c0d7a164 100644 --- a/src/lj_gdbjit.c +++ b/src/lj_gdbjit.c @@ -633,7 +633,7 @@ static void LJ_FASTCALL gdbjit_debugabbrev(GDBJITctx *ctx) DUV(DW_AT_low_pc); DUV(DW_FORM_addr); DUV(DW_AT_high_pc); DUV(DW_FORM_addr); DUV(DW_AT_stmt_list); DUV(DW_FORM_data4); - DB(0); DB(0); + DB(0); DB(0); DB(0); ctx->p = p; } From 856423f5dabb5bbf86d36816a247663e90c69d35 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 10 Dec 2023 15:00:52 +0100 Subject: [PATCH 77/95] Fix runtime library flags for MSVC debug builds. Reported by igor725. #1127 --- src/msvcbuild.bat | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat index 1f60b8f4..0ac65409 100644 --- a/src/msvcbuild.bat +++ b/src/msvcbuild.bat @@ -15,6 +15,7 @@ @rem Add more debug flags here, e.g. DEBUGCFLAGS=/DLUA_USE_APICHECK @set DEBUGCFLAGS= @set LJCOMPILE=cl /nologo /c /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE /D_CRT_STDIO_INLINE=__declspec(dllexport)__inline +@set LJDYNBUILD=/MD /DLUA_BUILD_AS_DLL @set LJLINK=link /nologo @set LJMT=mt /nologo @set LJLIB=lib /nologo /nodefaultlib @@ -71,11 +72,12 @@ buildvm -m folddef -o lj_folddef.h lj_opt_fold.c @shift @set BUILDTYPE=debug @set LJCOMPILE=%LJCOMPILE% /Zi %DEBUGCFLAGS% +@set LJDYNBUILD=/MDd /DLUA_BUILD_AS_DLL :NODEBUG @set LJLINK=%LJLINK% /%BUILDTYPE% @if "%1"=="amalg" goto :AMALGDLL @if "%1"=="static" goto :STATIC -%LJCOMPILE% /MD /DLUA_BUILD_AS_DLL lj_*.c lib_*.c +%LJCOMPILE% %LJDYNBUILD% lj_*.c lib_*.c @if errorlevel 1 goto :BAD %LJLINK% /DLL /out:%LJDLLNAME% lj_*.obj lib_*.obj @if errorlevel 1 goto :BAD @@ -87,7 +89,7 @@ buildvm -m folddef -o lj_folddef.h lj_opt_fold.c @if errorlevel 1 goto :BAD @goto :MTDLL :AMALGDLL -%LJCOMPILE% /MD /DLUA_BUILD_AS_DLL ljamalg.c +%LJCOMPILE% %LJDYNBUILD% ljamalg.c @if errorlevel 1 goto :BAD %LJLINK% /DLL /out:%LJDLLNAME% ljamalg.obj lj_vm.obj @if errorlevel 1 goto :BAD From e02cb19b570d79133a7581e0163e86b69cc792be Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 10 Dec 2023 15:33:47 +0100 Subject: [PATCH 78/95] Fix anchoring for string buffer set() method. Thanks to Peter Cawley. #1125 --- src/lj_ffrecord.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index 151c4c8c..c70793a4 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c @@ -1204,6 +1204,15 @@ static void LJ_FASTCALL recff_buffer_method_set(jit_State *J, RecordFFData *rd) if (tref_isstr(tr)) { TRef trp = emitir(IRT(IR_STRREF, IRT_PGC), tr, lj_ir_kint(J, 0)); TRef len = emitir(IRTI(IR_FLOAD), tr, IRFL_STR_LEN); + IRIns *irp = IR(tref_ref(trp)); + /* Anchor (potentially different) obj into which trp points after fold. */ + if (irp->o == IR_STRREF) { + tr = irp->op1; + } else if (irp->o == IR_KKPTR && !tref_isk(tr)) { + GCstr *str = strV(&rd->argv[1]); /* Constify the argument. */ + tr = lj_ir_kstr(J, str); + trp = lj_ir_kkptr(J, (char *)strdata(str)); + } lj_ir_call(J, IRCALL_lj_bufx_set, trbuf, trp, len, tr); #if LJ_HASFFI } else if (tref_iscdata(tr)) { From 1b38c736550004fba1b9712c1a5788b3eefa49be Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 10 Dec 2023 15:45:10 +0100 Subject: [PATCH 79/95] Document workaround for multilib vs. cross-compiler conflict. Reported by igorpupkinable. #1126 --- doc/install.html | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/doc/install.html b/doc/install.html index 21866315..7f2e40e4 100644 --- a/doc/install.html +++ b/doc/install.html @@ -240,7 +240,10 @@ for any supported target, as long as both architectures have the same pointer size. If you want to cross-compile to any 32 bit target on an x64 OS, you need to install the multilib development package (e.g. libc6-dev-i386 on Debian/Ubuntu) and build a 32 bit host part -(HOST_CC="gcc -m32"). +(HOST_CC="gcc -m32"). On some distro versions, multilib conflicts +with cross-compilers. The workaround is to install the x86 cross-compiler +package gcc-i686-linux-gnu and use it to build the host part +(HOST_CC=i686-linux-gnu-gcc). You need to specify TARGET_SYS whenever the host OS and the From 10cc759f259e1f3b6572ce663858c8ce4d34a483 Mon Sep 17 00:00:00 2001 From: Mike Pall
-Date: Sun, 10 Dec 2023 16:10:48 +0100 Subject: [PATCH 80/95] ARM: Fix stack restore for FP slots. Thanks to Peter Cawley. #1131 --- src/lj_asm_arm.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index f53f708b..8869af32 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h @@ -1991,11 +1991,12 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) SnapEntry *map = &as->T->snapmap[snap->mapofs]; SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1]; MSize n, nent = snap->nent; + int32_t bias = 0; /* Store the value of all modified slots to the Lua stack. */ for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; BCReg s = snap_slot(sn); - int32_t ofs = 8*((int32_t)s-1); + int32_t ofs = 8*((int32_t)s-1) - bias; IRRef ref = snap_ref(sn); IRIns *ir = IR(ref); if ((sn & SNAP_NORESTORE)) @@ -2013,6 +2014,12 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs+4); #else Reg src = ra_alloc1(as, ref, RSET_FPR); + if (LJ_UNLIKELY(ofs < -1020 || ofs > 1020)) { + int32_t adj = ofs & 0xffffff00; /* K12-friendly. */ + bias += adj; + ofs -= adj; + emit_addptr(as, RID_BASE, -adj); + } emit_vlso(as, ARMI_VSTR_D, src, RID_BASE, ofs); #endif } else { @@ -2038,6 +2045,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) } checkmclim(as); } + emit_addptr(as, RID_BASE, bias); lua_assert(map + nent == flinks); } From ff204d0350575cf710f6f4af982db146cb454e1a Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 10 Dec 2023 19:42:22 +0100 Subject: [PATCH 81/95] Fix anchoring for string buffer set() method (again). Thanks to Peter Cawley. #1125 --- src/lj_ffrecord.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index c70793a4..30dc6bfc 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c @@ -1205,14 +1205,11 @@ static void LJ_FASTCALL recff_buffer_method_set(jit_State *J, RecordFFData *rd) TRef trp = emitir(IRT(IR_STRREF, IRT_PGC), tr, lj_ir_kint(J, 0)); TRef len = emitir(IRTI(IR_FLOAD), tr, IRFL_STR_LEN); IRIns *irp = IR(tref_ref(trp)); - /* Anchor (potentially different) obj into which trp points after fold. */ - if (irp->o == IR_STRREF) { + /* trp must point into the anchored obj, even after folding. */ + if (irp->o == IR_STRREF) tr = irp->op1; - } else if (irp->o == IR_KKPTR && !tref_isk(tr)) { - GCstr *str = strV(&rd->argv[1]); /* Constify the argument. */ - tr = lj_ir_kstr(J, str); - trp = lj_ir_kkptr(J, (char *)strdata(str)); - } + else if (!tref_isk(tr)) + trp = emitir(IRT(IR_ADD, IRT_PGC), tr, lj_ir_kintpgc(J, sizeof(GCstr))); lj_ir_call(J, IRCALL_lj_bufx_set, trbuf, trp, len, tr); #if LJ_HASFFI } else if (tref_iscdata(tr)) { From 9bdfd34dccb913777be0efcc6869b6eeb5b9b43b Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 11 Dec 2023 13:01:36 +0100 Subject: [PATCH 82/95] Only emit proper parent references in snapshot replay. Thanks to Peter Cawley. #1132 --- src/lj_snap.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/lj_snap.c b/src/lj_snap.c index 5a5c481b..b387dd76 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c @@ -510,12 +510,14 @@ void lj_snap_replay(jit_State *J, GCtrace *T) IRRef refp = snap_ref(sn); IRIns *ir = &T->ir[refp]; if (regsp_reg(ir->r) == RID_SUNK) { + uint8_t m; if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue; pass23 = 1; lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW || ir->o == IR_CNEWI); - if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1); - if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2); + m = lj_ir_mode[ir->o]; + if (irm_op1(m) == IRMref) snap_pref(J, T, map, nent, seen, ir->op1); + if (irm_op2(m) == IRMref) snap_pref(J, T, map, nent, seen, ir->op2); if (LJ_HASFFI && ir->o == IR_CNEWI) { if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) snap_pref(J, T, map, nent, seen, (ir+1)->op2); @@ -542,14 +544,16 @@ void lj_snap_replay(jit_State *J, GCtrace *T) IRIns *ir = &T->ir[refp]; if (regsp_reg(ir->r) == RID_SUNK) { TRef op1, op2; + uint8_t m; if (J->slot[snap_slot(sn)] != snap_slot(sn)) { /* De-dup allocs. */ J->slot[snap_slot(sn)] = J->slot[J->slot[snap_slot(sn)]]; continue; } op1 = ir->op1; - if (op1 >= T->nk) op1 = snap_pref(J, T, map, nent, seen, op1); + m = lj_ir_mode[ir->o]; + if (irm_op1(m) == IRMref) op1 = snap_pref(J, T, map, nent, seen, op1); op2 = ir->op2; - if (op2 >= T->nk) op2 = snap_pref(J, T, map, nent, seen, op2); + if (irm_op2(m) == IRMref) op2 = snap_pref(J, T, map, nent, seen, op2); if (LJ_HASFFI && ir->o == IR_CNEWI) { if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) { lj_needsplit(J); /* Emit joining HIOP. */ From c42c62e71a45a677b8b1cbf749bd33cf4d5918ff Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sat, 23 Dec 2023 19:14:32 +0100 Subject: [PATCH 83/95] Simplify handling of instable types in TNEW/TDUP load forwarding. Thanks to Peter Cawley. #994 --- src/lj_opt_mem.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c index dc74a06d..04b95a6f 100644 --- a/src/lj_opt_mem.c +++ b/src/lj_opt_mem.c @@ -185,25 +185,23 @@ static TRef fwd_ahload(jit_State *J, IRRef xref) } ref = store->prev; } - if (ir->o == IR_TNEW && !irt_isnil(fins->t)) - return 0; /* Type instability in loop-carried dependency. */ - if (irt_ispri(fins->t)) { - return TREF_PRI(irt_type(fins->t)); - } else if (irt_isnum(fins->t) || (LJ_DUALNUM && irt_isint(fins->t)) || - irt_isstr(fins->t)) { + /* Simplified here: let loop_unroll() figure out any type instability. */ + if (ir->o == IR_TNEW) { + return TREF_NIL; + } else { TValue keyv; cTValue *tv; IRIns *key = IR(xr->op2); if (key->o == IR_KSLOT) key = IR(key->op1); lj_ir_kvalue(J->L, &keyv, key); tv = lj_tab_get(J->L, ir_ktab(IR(ir->op1)), &keyv); - if (itype2irt(tv) != irt_type(fins->t)) - return 0; /* Type instability in loop-carried dependency. */ - if (irt_isnum(fins->t)) + if (tvispri(tv)) + return TREF_PRI(itype2irt(tv)); + else if (tvisnum(tv)) return lj_ir_knum_u64(J, tv->u64); - else if (LJ_DUALNUM && irt_isint(fins->t)) + else if (tvisint(tv)) return lj_ir_kint(J, intV(tv)); - else + else if (tvisgcv(tv)) return lj_ir_kstr(J, strV(tv)); } /* Othwerwise: don't intern as a constant. */ From 7dbe545933485849977d50384f2f20f2cccf0cf9 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sat, 23 Dec 2023 19:22:34 +0100 Subject: [PATCH 84/95] Respect jit.off() on pending trace exit. Thanks to Sergey Kaplun. #1134 --- src/lj_trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lj_trace.c b/src/lj_trace.c index 25e610b5..d015f2ab 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -804,7 +804,7 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) if (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize) { if (!(G(L)->hookmask & HOOK_GC)) lj_gc_step(L); /* Exited because of GC: drive GC forward. */ - } else { + } else if ((J->flags & JIT_F_ON)) { trace_hotside(J, pc); } if (bc_op(*pc) == BC_JLOOP) { From 658530562c2ac7ffa8e4ca5d18856857471244e9 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sat, 23 Dec 2023 19:43:03 +0100 Subject: [PATCH 85/95] Check for IR_HREF vs. IR_HREFK aliasing in non-nil store check. Thanks to Peter Cawley. #1133 --- src/lj_ir.h | 1 + src/lj_opt_mem.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/src/lj_ir.h b/src/lj_ir.h index 9fd4e275..6d974ed2 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h @@ -346,6 +346,7 @@ typedef struct IRType1 { uint8_t irt; } IRType1; #define irt_isu32(t) (irt_type(t) == IRT_U32) #define irt_isi64(t) (irt_type(t) == IRT_I64) #define irt_isu64(t) (irt_type(t) == IRT_U64) +#define irt_isp32(t) (irt_type(t) == IRT_P32) #define irt_isfp(t) (irt_isnum(t) || irt_isfloat(t)) #define irt_isinteger(t) (irt_typerange((t), IRT_I8, IRT_INT)) diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c index 04b95a6f..214fb632 100644 --- a/src/lj_opt_mem.c +++ b/src/lj_opt_mem.c @@ -879,6 +879,8 @@ int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref) if (skref == xkref || !irref_isk(skref) || !irref_isk(xkref)) return 0; /* A nil store with same const key or var key MAY alias. */ /* Different const keys CANNOT alias. */ + } else if (irt_isp32(IR(skref)->t) != irt_isp32(IR(xkref)->t)) { + return 0; /* HREF and HREFK MAY alias. */ } /* Different key types CANNOT alias. */ } /* Other non-nil stores MAY alias. */ ref = store->prev; From c525bcb9024510cad9e170e12b6209aedb330f83 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sat, 23 Dec 2023 20:06:17 +0100 Subject: [PATCH 86/95] DynASM/x86: Allow [&expr] operand. Thanks to Dmitry Stogov. #1138 --- dynasm/dasm_x86.lua | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua index 787163c0..df70fed8 100644 --- a/dynasm/dasm_x86.lua +++ b/dynasm/dasm_x86.lua @@ -627,7 +627,11 @@ local function wputmrmsib(t, imark, s, vsreg, psz, sk) werror("NYI: rip-relative displacement followed by immediate") end -- The previous byte in the action buffer cannot be 0xe9 or 0x80-0x8f. - wputlabel("REL_", disp[1], 2) + if disp[2] == "iPJ" then + waction("REL_A", disp[1]) + else + wputlabel("REL_", disp[1], 2) + end else wputdarg(disp) end @@ -744,9 +748,9 @@ local function dispexpr(expr) return imm*map_opsizenum[ops] end local mode, iexpr = immexpr(dispt) - if mode == "iJ" then + if mode == "iJ" or mode == "iPJ" then if c == "-" then werror("cannot invert label reference") end - return { iexpr } + return { iexpr, mode } end return expr -- Need to return original signed expression. end From 4b90f6c4d7420139c135435e1580acb52ea18436 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 22 Jan 2024 19:06:36 +0100 Subject: [PATCH 87/95] Add cross-32/64 bit and deterministic bytecode generation. Contributed by Peter Cawley. #993 #1008 --- doc/extensions.html | 51 +++++++++++++++----- doc/running.html | 3 ++ src/host/genlibbc.lua | 99 +++++++++++++++++++++----------------- src/jit/bcsave.lua | 31 ++++++++---- src/lib_base.c | 6 ++- src/lib_jit.c | 26 ++-------- src/lib_string.c | 22 +++++++-- src/lj_bcdump.h | 4 +- src/lj_bcread.c | 9 ++-- src/lj_bcwrite.c | 109 ++++++++++++++++++++++++++++++++++++------ src/lj_lex.c | 1 + src/lj_lex.h | 1 + src/lj_lib.c | 18 +++++++ src/lj_lib.h | 1 + src/lj_load.c | 29 ++++++++--- src/lj_parse.c | 28 ++++++----- 16 files changed, 306 insertions(+), 132 deletions(-) diff --git a/doc/extensions.html b/doc/extensions.html index a4f20841..1d28475c 100644 --- a/doc/extensions.html +++ b/doc/extensions.html @@ -160,13 +160,33 @@ passes any arguments after the error function to the function which is called in a protected context. loadfile() etc. handle UTF-8 source code
+load*() handle UTF-8 source code
Non-ASCII characters are handled transparently by the Lua source code parser. This allows the use of UTF-8 characters in identifiers and strings. A UTF-8 BOM is skipped at the start of the source code.
+load*() add a mode parameter
++As an extension from Lua 5.2, the functions loadstring(), +loadfile() and (new) load() add an optional +mode parameter. +
++The default mode string is "bt", which allows loading of both +source code and bytecode. Use "t" to allow only source code +or "b" to allow only bytecode to be loaded. +
++By default, the load* functions generate the native bytecode format. +For cross-compilation purposes, add W to the mode string to +force the 32 bit format and X to force the 64 bit format. +Add both to force the opposite format. Note that non-native bytecode +generated by load* cannot be run, but can still be passed +to string.dump. +
+tostring() etc. canonicalize NaN and ±Inf
All number-to-string conversions consistently convert non-finite numbers @@ -186,26 +206,33 @@ works independently of the current locale and it supports hex floating-point numbers (e.g. 0x1.5p-3).
-string.dump(f [,strip]) generates portable bytecode
+string.dump(f [,mode]) generates portable bytecode
An extra argument has been added to string.dump(). If set to -true, 'stripped' bytecode without debug information is -generated. This speeds up later bytecode loading and reduces memory -usage. See also the +true or to a string which contains the character s, +'stripped' bytecode without debug information is generated. This speeds +up later bytecode loading and reduces memory usage. See also the -b command line option.
The generated bytecode is portable and can be loaded on any architecture -that LuaJIT supports, independent of word size or endianess. However, the -bytecode compatibility versions must match. Bytecode stays compatible -for dot releases (x.y.0 → x.y.1), but may change with major or -minor releases (2.0 → 2.1) or between any beta release. Foreign -bytecode (e.g. from Lua 5.1) is incompatible and cannot be loaded. +that LuaJIT supports. However, the bytecode compatibility versions must +match. Bytecode only stays compatible within a major+minor version +(x.y.aaa → x.y.bbb), except for development branches. Foreign bytecode +(e.g. from Lua 5.1) is incompatible and cannot be loaded.
Note: LJ_GC64 mode requires a different frame layout, which implies -a different, incompatible bytecode format for all 64 bit ports. This may be -rectified in the future. +a different, incompatible bytecode format between 32 bit and 64 bit ports. +This may be rectified in the future. In the meantime, use the W +and X modes of the load* functions +for cross-compilation purposes. +
++Due to VM hardening, bytecode is not deterministic. Add d to the +mode string to dump it in a deterministic manner: identical source code +always gives a byte-for-byte identical bytecode dump. This feature is +mainly useful for reproducible builds.
table.new(narray, nhash) allocates a pre-sized table
diff --git a/doc/running.html b/doc/running.html index 9dd2b411..142b810f 100644 --- a/doc/running.html +++ b/doc/running.html @@ -106,6 +106,9 @@ are accepted:- -l — Only list bytecode.
- -s — Strip debug info (this is the default).
- -g — Keep debug info.
+- -W — Generate 32 bit (non-GC64) bytecode.
+- -X — Generate 64 bit (GC64) bytecode.
+- -d — Generate bytecode in deterministic manner.
- -n name — Set module name (default: auto-detect from input name)
- -t type — Set output file type (default: auto-detect from output name).
- -a arch — Override architecture for object files (default: native).
diff --git a/src/host/genlibbc.lua b/src/host/genlibbc.lua index 3621c3f5..e697fceb 100644 --- a/src/host/genlibbc.lua +++ b/src/host/genlibbc.lua @@ -138,65 +138,73 @@ local function fixup_dump(dump, fixup) return { dump = ndump, startbc = startbc, sizebc = sizebc } end -local function find_defs(src) +local function find_defs(src, mode) local defs = {} for name, code in string.gmatch(src, "LJLIB_LUA%(([^)]*)%)%s*/%*(.-)%*/") do - local env = {} local tcode, fixup = transform_lua(code) - local func = assert(load(tcode, "", nil, env))() - defs[name] = fixup_dump(string.dump(func, true), fixup) + local func = assert(load(tcode, "", mode)) + defs[name] = fixup_dump(string.dump(func, mode), fixup) defs[#defs+1] = name end return defs end -local function gen_header(defs) +local function gen_header(defs32, defs64) local t = {} local function w(x) t[#t+1] = x end w("/* This is a generated file. DO NOT EDIT! */\n\n") w("static const int libbc_endian = ") w(isbe and 1 or 0) w(";\n\n") - local s, sb = "", "" - for i,name in ipairs(defs) do - local d = defs[name] - s = s .. d.dump - sb = sb .. string.char(i) .. ("\0"):rep(d.startbc - 1) - .. (isbe and "\0\0\0\255" or "\255\0\0\0"):rep(d.sizebc) - .. ("\0"):rep(#d.dump - d.startbc - d.sizebc*4) - end - w("static const uint8_t libbc_code[] = {\n") - local n = 0 - for i=1,#s do - local x = string.byte(s, i) - local xb = string.byte(sb, i) - if xb == 255 then - local name = BCN[x] - local m = #name + 4 - if n + m > 78 then n = 0; w("\n") end - n = n + m - w("BC_"); w(name) - else - local m = x < 10 and 2 or (x < 100 and 3 or 4) - if xb == 0 then - if n + m > 78 then n = 0; w("\n") end - else - local name = defs[xb]:gsub("_", ".") - if n ~= 0 then w("\n") end - w("/* "); w(name); w(" */ ") - n = #name + 7 - end - n = n + m - w(x) + for j,defs in ipairs{defs64, defs32} do + local s, sb = "", "" + for i,name in ipairs(defs) do + local d = defs[name] + s = s .. d.dump + sb = sb .. string.char(i) .. ("\0"):rep(d.startbc - 1) + .. (isbe and "\0\0\0\255" or "\255\0\0\0"):rep(d.sizebc) + .. ("\0"):rep(#d.dump - d.startbc - d.sizebc*4) + end + if j == 1 then + w("static const uint8_t libbc_code[] = {\n#if LJ_FR2\n") + else + w("\n#else\n") + end + local n = 0 + for i=1,#s do + local x = string.byte(s, i) + local xb = string.byte(sb, i) + if xb == 255 then + local name = BCN[x] + local m = #name + 4 + if n + m > 78 then n = 0; w("\n") end + n = n + m + w("BC_"); w(name) + else + local m = x < 10 and 2 or (x < 100 and 3 or 4) + if xb == 0 then + if n + m > 78 then n = 0; w("\n") end + else + local name = defs[xb]:gsub("_", ".") + if n ~= 0 then w("\n") end + w("/* "); w(name); w(" */ ") + n = #name + 7 + end + n = n + m + w(x) + end + w(",") end - w(",") end - w("\n0\n};\n\n") + w("\n#endif\n0\n};\n\n") w("static const struct { const char *name; int ofs; } libbc_map[] = {\n") - local m = 0 - for _,name in ipairs(defs) do - w('{"'); w(name); w('",'); w(m) w('},\n') - m = m + #defs[name].dump + local m32, m64 = 0, 0 + for i,name in ipairs(defs32) do + assert(name == defs64[i]) + w('{"'); w(name); w('",'); w(m32) w('},\n') + m32 = m32 + #defs32[name].dump + m64 = m64 + #defs64[name].dump + assert(m32 == m64) end - w("{NULL,"); w(m); w("}\n};\n\n") + w("{NULL,"); w(m32); w("}\n};\n\n") return table.concat(t) end @@ -219,7 +227,8 @@ end local outfile = parse_arg(arg) local src = read_files(arg) -local defs = find_defs(src) -local hdr = gen_header(defs) +local defs32 = find_defs(src, "Wdts") +local defs64 = find_defs(src, "Xdts") +local hdr = gen_header(defs32, defs64) write_file(outfile, hdr) diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua index 390d297c..131bf39b 100644 --- a/src/jit/bcsave.lua +++ b/src/jit/bcsave.lua @@ -29,6 +29,9 @@ Save LuaJIT bytecode: luajit -b[options] input output -l Only list bytecode. -s Strip debug info (default). -g Keep debug info. + -W Generate 32 bit (non-GC64) bytecode. + -X Generate 64 bit (GC64) bytecode. + -d Generate bytecode in deterministic manner. -n name Set module name (default: auto-detect from input name). -t type Set output file type (default: auto-detect from output name). -a arch Override architecture for object files (default: native). @@ -51,8 +54,9 @@ local function check(ok, ...) end local function readfile(ctx, input) - if type(input) == "function" then return input end - if ctx.filename then + if ctx.string then + return check(loadstring(input, nil, ctx.mode)) + elseif ctx.filename then local data if input == "-" then data = io.stdin:read("*a") @@ -61,10 +65,10 @@ local function readfile(ctx, input) data = assert(fp:read("*a")) assert(fp:close()) end - return check(load(data, ctx.filename)) + return check(load(data, ctx.filename, ctx.mode)) else if input == "-" then input = nil end - return check(loadfile(input)) + return check(loadfile(input, ctx.mode)) end end @@ -624,7 +628,7 @@ end local function bcsave(ctx, input, output) local f = readfile(ctx, input) - local s = string.dump(f, ctx.strip) + local s = string.dump(f, ctx.mode) local t = ctx.type if not t then t = detecttype(output) @@ -647,9 +651,11 @@ local function docmd(...) local n = 1 local list = false local ctx = { - strip = true, arch = jit.arch, os = jit.os:lower(), - type = false, modname = false, + mode = "bt", arch = jit.arch, os = jit.os:lower(), + type = false, modname = false, string = false, } + local strip = "s" + local gc64 = "" while n <= #arg do local a = arg[n] if type(a) == "string" and a:sub(1, 1) == "-" and a ~= "-" then @@ -660,14 +666,18 @@ local function docmd(...) if opt == "l" then list = true elseif opt == "s" then - ctx.strip = true + strip = "s" elseif opt == "g" then - ctx.strip = false + strip = "" + elseif opt == "W" or opt == "X" then + gc64 = opt + elseif opt == "d" then + ctx.mode = ctx.mode .. opt else if arg[n] == nil or m ~= #a then usage() end if opt == "e" then if n ~= 1 then usage() end - arg[1] = check(loadstring(arg[1])) + ctx.string = true elseif opt == "n" then ctx.modname = checkmodname(tremove(arg, n)) elseif opt == "t" then @@ -687,6 +697,7 @@ local function docmd(...) n = n + 1 end end + ctx.mode = ctx.mode .. strip .. gc64 if list then if #arg == 0 or #arg > 2 then usage() end bclist(ctx, arg[1], arg[2] or "-") diff --git a/src/lib_base.c b/src/lib_base.c index 4e6f8a30..d644b4f2 100644 --- a/src/lib_base.c +++ b/src/lib_base.c @@ -360,7 +360,11 @@ LJLIB_ASM_(xpcall) LJLIB_REC(.) static int load_aux(lua_State *L, int status, int envarg) { if (status == LUA_OK) { - if (tvistab(L->base+envarg-1)) { + /* + ** Set environment table for top-level function. + ** Don't do this for non-native bytecode, which returns a prototype. + */ + if (tvistab(L->base+envarg-1) && tvisfunc(L->top-1)) { GCfunc *fn = funcV(L->top-1); GCtab *t = tabV(L->base+envarg-1); setgcref(fn->c.env, obj2gco(t)); diff --git a/src/lib_jit.c b/src/lib_jit.c index c0294927..b83c865a 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c @@ -161,24 +161,6 @@ LJLIB_PUSH(top-2) LJLIB_SET(version) /* -- Reflection API for Lua functions ------------------------------------ */ -/* Return prototype of first argument (Lua function or prototype object) */ -static GCproto *check_Lproto(lua_State *L, int nolua) -{ - TValue *o = L->base; - if (L->top > o) { - if (tvisproto(o)) { - return protoV(o); - } else if (tvisfunc(o)) { - if (isluafunc(funcV(o))) - return funcproto(funcV(o)); - else if (nolua) - return NULL; - } - } - lj_err_argt(L, 1, LUA_TFUNCTION); - return NULL; /* unreachable */ -} - static void setintfield(lua_State *L, GCtab *t, const char *name, int32_t val) { setintV(lj_tab_setstr(L, t, lj_str_newz(L, name)), val); @@ -187,7 +169,7 @@ static void setintfield(lua_State *L, GCtab *t, const char *name, int32_t val) /* local info = jit.util.funcinfo(func [,pc]) */ LJLIB_CF(jit_util_funcinfo) { - GCproto *pt = check_Lproto(L, 1); + GCproto *pt = lj_lib_checkLproto(L, 1, 1); if (pt) { BCPos pc = (BCPos)lj_lib_optint(L, 2, 0); GCtab *t; @@ -229,7 +211,7 @@ LJLIB_CF(jit_util_funcinfo) /* local ins, m = jit.util.funcbc(func, pc) */ LJLIB_CF(jit_util_funcbc) { - GCproto *pt = check_Lproto(L, 0); + GCproto *pt = lj_lib_checkLproto(L, 1, 0); BCPos pc = (BCPos)lj_lib_checkint(L, 2); if (pc < pt->sizebc) { BCIns ins = proto_bc(pt)[pc]; @@ -246,7 +228,7 @@ LJLIB_CF(jit_util_funcbc) /* local k = jit.util.funck(func, idx) */ LJLIB_CF(jit_util_funck) { - GCproto *pt = check_Lproto(L, 0); + GCproto *pt = lj_lib_checkLproto(L, 1, 0); ptrdiff_t idx = (ptrdiff_t)lj_lib_checkint(L, 2); if (idx >= 0) { if (idx < (ptrdiff_t)pt->sizekn) { @@ -266,7 +248,7 @@ LJLIB_CF(jit_util_funck) /* local name = jit.util.funcuvname(func, idx) */ LJLIB_CF(jit_util_funcuvname) { - GCproto *pt = check_Lproto(L, 0); + GCproto *pt = lj_lib_checkLproto(L, 1, 0); uint32_t idx = (uint32_t)lj_lib_checkint(L, 2); if (idx < pt->sizeuv) { setstrV(L, L->top-1, lj_str_newz(L, lj_debug_uvname(pt, idx))); diff --git a/src/lib_string.c b/src/lib_string.c index 29bcb8fe..255689ce 100644 --- a/src/lib_string.c +++ b/src/lib_string.c @@ -122,11 +122,25 @@ static int writer_buf(lua_State *L, const void *p, size_t size, void *sb) LJLIB_CF(string_dump) { - GCfunc *fn = lj_lib_checkfunc(L, 1); - int strip = L->base+1 < L->top && tvistruecond(L->base+1); - SBuf *sb = lj_buf_tmp_(L); /* Assumes lj_bcwrite() doesn't use tmpbuf. */ + GCproto *pt = lj_lib_checkLproto(L, 1, 1); + uint32_t flags = 0; + SBuf *sb; + TValue *o = L->base+1; + if (o < L->top) { + if (tvisstr(o)) { + const char *mode = strVdata(o); + char c; + while ((c = *mode++)) { + if (c == 's') flags |= BCDUMP_F_STRIP; + if (c == 'd') flags |= BCDUMP_F_DETERMINISTIC; + } + } else if (tvistruecond(o)) { + flags |= BCDUMP_F_STRIP; + } + } + sb = lj_buf_tmp_(L); /* Assumes lj_bcwrite() doesn't use tmpbuf. */ L->top = L->base+1; - if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, sb, strip)) + if (!pt || lj_bcwrite(L, pt, writer_buf, sb, flags)) lj_err_caller(L, LJ_ERR_STRDUMP); setstrV(L, L->top-1, lj_buf_str(L, sb)); lj_gc_check(L); diff --git a/src/lj_bcdump.h b/src/lj_bcdump.h index 6ba71e25..3e56e39c 100644 --- a/src/lj_bcdump.h +++ b/src/lj_bcdump.h @@ -46,6 +46,8 @@ #define BCDUMP_F_KNOWN (BCDUMP_F_FR2*2-1) +#define BCDUMP_F_DETERMINISTIC 0x80000000 + /* Type codes for the GC constants of a prototype. Plus length for strings. */ enum { BCDUMP_KGC_CHILD, BCDUMP_KGC_TAB, BCDUMP_KGC_I64, BCDUMP_KGC_U64, @@ -61,7 +63,7 @@ enum { /* -- Bytecode reader/writer ---------------------------------------------- */ LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, - void *data, int strip); + void *data, uint32_t flags); LJ_FUNC GCproto *lj_bcread_proto(LexState *ls); LJ_FUNC GCproto *lj_bcread(LexState *ls); diff --git a/src/lj_bcread.c b/src/lj_bcread.c index c98c0d42..637ef067 100644 --- a/src/lj_bcread.c +++ b/src/lj_bcread.c @@ -281,8 +281,11 @@ static void bcread_knum(LexState *ls, GCproto *pt, MSize sizekn) static void bcread_bytecode(LexState *ls, GCproto *pt, MSize sizebc) { BCIns *bc = proto_bc(pt); - bc[0] = BCINS_AD((pt->flags & PROTO_VARARG) ? BC_FUNCV : BC_FUNCF, - pt->framesize, 0); + BCIns op; + if (ls->fr2 != LJ_FR2) op = BC_NOT; /* Mark non-native prototype. */ + else if ((pt->flags & PROTO_VARARG)) op = BC_FUNCV; + else op = BC_FUNCF; + bc[0] = BCINS_AD(op, pt->framesize, 0); bcread_block(ls, bc+1, (sizebc-1)*(MSize)sizeof(BCIns)); /* Swap bytecode instructions if the endianess differs. */ if (bcread_swap(ls)) { @@ -395,7 +398,7 @@ static int bcread_header(LexState *ls) bcread_byte(ls) != BCDUMP_VERSION) return 0; bcread_flags(ls) = flags = bcread_uleb128(ls); if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0; - if ((flags & BCDUMP_F_FR2) != LJ_FR2*BCDUMP_F_FR2) return 0; + if ((flags & BCDUMP_F_FR2) != (uint32_t)ls->fr2*BCDUMP_F_FR2) return 0; if ((flags & BCDUMP_F_FFI)) { #if LJ_HASFFI lua_State *L = ls->L; diff --git a/src/lj_bcwrite.c b/src/lj_bcwrite.c index dd969413..c062dc49 100644 --- a/src/lj_bcwrite.c +++ b/src/lj_bcwrite.c @@ -27,7 +27,9 @@ typedef struct BCWriteCtx { GCproto *pt; /* Root prototype. */ lua_Writer wfunc; /* Writer callback. */ void *wdata; /* Writer callback data. */ - int strip; /* Strip debug info. */ + TValue **heap; /* Heap used for deterministic sorting. */ + uint32_t heapsz; /* Size of heap. */ + uint32_t flags; /* BCDUMP_F_* flags. */ int status; /* Status from writer callback. */ #ifdef LUA_USE_ASSERT global_State *g; @@ -76,6 +78,75 @@ static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow) ctx->sb.w = p; } +/* Compare two template table keys. */ +static LJ_AINLINE int bcwrite_ktabk_lt(TValue *a, TValue *b) +{ + uint32_t at = itype(a), bt = itype(b); + if (at != bt) { /* This also handles false and true keys. */ + return at < bt; + } else if (at == LJ_TSTR) { + return lj_str_cmp(strV(a), strV(b)) < 0; + } else { + return a->u64 < b->u64; /* This works for numbers and integers. */ + } +} + +/* Insert key into a sorted heap. */ +static void bcwrite_ktabk_heap_insert(TValue **heap, MSize idx, MSize end, + TValue *key) +{ + MSize child; + while ((child = idx * 2 + 1) < end) { + /* Find lower of the two children. */ + TValue *c0 = heap[child]; + if (child + 1 < end) { + TValue *c1 = heap[child + 1]; + if (bcwrite_ktabk_lt(c1, c0)) { + c0 = c1; + child++; + } + } + if (bcwrite_ktabk_lt(key, c0)) break; /* Key lower? Found our position. */ + heap[idx] = c0; /* Move lower child up. */ + idx = child; /* Descend. */ + } + heap[idx] = key; /* Insert key here. */ +} + +/* Resize heap, dropping content. */ +static void bcwrite_heap_resize(BCWriteCtx *ctx, uint32_t nsz) +{ + lua_State *L = sbufL(&ctx->sb); + if (ctx->heapsz) { + lj_mem_freevec(G(L), ctx->heap, ctx->heapsz, TValue *); + ctx->heapsz = 0; + } + if (nsz) { + ctx->heap = lj_mem_newvec(L, nsz, TValue *); + ctx->heapsz = nsz; + } +} + +/* Write hash part of template table in sorted order. */ +static void bcwrite_ktab_sorted_hash(BCWriteCtx *ctx, Node *node, MSize nhash) +{ + TValue **heap = ctx->heap; + MSize i = nhash; + for (;; node--) { /* Build heap. */ + if (!tvisnil(&node->val)) { + bcwrite_ktabk_heap_insert(heap, --i, nhash, &node->key); + if (i == 0) break; + } + } + do { /* Drain heap. */ + TValue *key = heap[0]; /* Output lowest key from top. */ + bcwrite_ktabk(ctx, key, 0); + bcwrite_ktabk(ctx, (TValue *)((char *)key - offsetof(Node, key)), 1); + key = heap[--nhash]; /* Remove last key. */ + bcwrite_ktabk_heap_insert(heap, 0, nhash, key); /* Re-insert. */ + } while (nhash); +} + /* Write a template table. */ static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t) { @@ -105,14 +176,20 @@ static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t) bcwrite_ktabk(ctx, o, 1); } if (nhash) { /* Write hash entries. */ - MSize i = nhash; Node *node = noderef(t->node) + t->hmask; - for (;; node--) - if (!tvisnil(&node->val)) { - bcwrite_ktabk(ctx, &node->key, 0); - bcwrite_ktabk(ctx, &node->val, 1); - if (--i == 0) break; - } + if ((ctx->flags & BCDUMP_F_DETERMINISTIC) && nhash > 1) { + if (ctx->heapsz < nhash) + bcwrite_heap_resize(ctx, t->hmask + 1); + bcwrite_ktab_sorted_hash(ctx, node, nhash); + } else { + MSize i = nhash; + for (;; node--) + if (!tvisnil(&node->val)) { + bcwrite_ktabk(ctx, &node->key, 0); + bcwrite_ktabk(ctx, &node->val, 1); + if (--i == 0) break; + } + } } } @@ -269,7 +346,7 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt) p = lj_strfmt_wuleb128(p, pt->sizekgc); p = lj_strfmt_wuleb128(p, pt->sizekn); p = lj_strfmt_wuleb128(p, pt->sizebc-1); - if (!ctx->strip) { + if (!(ctx->flags & BCDUMP_F_STRIP)) { if (proto_lineinfo(pt)) sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt); p = lj_strfmt_wuleb128(p, sizedbg); @@ -317,11 +394,10 @@ static void bcwrite_header(BCWriteCtx *ctx) *p++ = BCDUMP_HEAD2; *p++ = BCDUMP_HEAD3; *p++ = BCDUMP_VERSION; - *p++ = (ctx->strip ? BCDUMP_F_STRIP : 0) + + *p++ = (ctx->flags & (BCDUMP_F_STRIP | BCDUMP_F_FR2)) + LJ_BE*BCDUMP_F_BE + - ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0) + - LJ_FR2*BCDUMP_F_FR2; - if (!ctx->strip) { + ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0); + if (!(ctx->flags & BCDUMP_F_STRIP)) { p = lj_strfmt_wuleb128(p, len); p = lj_buf_wmem(p, name, len); } @@ -352,14 +428,16 @@ static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud) /* Write bytecode for a prototype. */ int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data, - int strip) + uint32_t flags) { BCWriteCtx ctx; int status; ctx.pt = pt; ctx.wfunc = writer; ctx.wdata = data; - ctx.strip = strip; + ctx.heapsz = 0; + if ((bc_op(proto_bc(pt)[0]) != BC_NOT) == LJ_FR2) flags |= BCDUMP_F_FR2; + ctx.flags = flags; ctx.status = 0; #ifdef LUA_USE_ASSERT ctx.g = G(L); @@ -368,6 +446,7 @@ int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data, status = lj_vm_cpcall(L, NULL, &ctx, cpwriter); if (status == 0) status = ctx.status; lj_buf_free(G(sbufL(&ctx.sb)), &ctx.sb); + bcwrite_heap_resize(&ctx, 0); return status; } diff --git a/src/lj_lex.c b/src/lj_lex.c index 61b04c4b..bd81dc40 100644 --- a/src/lj_lex.c +++ b/src/lj_lex.c @@ -411,6 +411,7 @@ int lj_lex_setup(lua_State *L, LexState *ls) ls->linenumber = 1; ls->lastline = 1; ls->endmark = 0; + ls->fr2 = LJ_FR2; /* Generate native bytecode by default. */ lex_next(ls); /* Read-ahead first char. */ if (ls->c == 0xef && ls->p + 2 <= ls->pe && (uint8_t)ls->p[0] == 0xbb && (uint8_t)ls->p[1] == 0xbf) { /* Skip UTF-8 BOM (if buffered). */ diff --git a/src/lj_lex.h b/src/lj_lex.h index e46fbd89..2ef7fc77 100644 --- a/src/lj_lex.h +++ b/src/lj_lex.h @@ -74,6 +74,7 @@ typedef struct LexState { MSize sizebcstack; /* Size of bytecode stack. */ uint32_t level; /* Syntactical nesting level. */ int endmark; /* Trust bytecode end marker, even if not at EOF. */ + int fr2; /* Generate bytecode for LJ_FR2 mode. */ } LexState; LJ_FUNC int lj_lex_setup(lua_State *L, LexState *ls); diff --git a/src/lj_lib.c b/src/lj_lib.c index ebe0dc78..06ae4fcf 100644 --- a/src/lj_lib.c +++ b/src/lj_lib.c @@ -62,6 +62,7 @@ static const uint8_t *lib_read_lfunc(lua_State *L, const uint8_t *p, GCtab *tab) ls.pe = (const char *)~(uintptr_t)0; ls.c = -1; ls.level = (BCDUMP_F_STRIP|(LJ_BE*BCDUMP_F_BE)); + ls.fr2 = LJ_FR2; ls.chunkname = name; pt = lj_bcread_proto(&ls); pt->firstline = ~(BCLine)0; @@ -266,6 +267,23 @@ GCfunc *lj_lib_checkfunc(lua_State *L, int narg) return funcV(o); } +GCproto *lj_lib_checkLproto(lua_State *L, int narg, int nolua) +{ + TValue *o = L->base + narg-1; + if (L->top > o) { + if (tvisproto(o)) { + return protoV(o); + } else if (tvisfunc(o)) { + if (isluafunc(funcV(o))) + return funcproto(funcV(o)); + else if (nolua) + return NULL; + } + } + lj_err_argt(L, narg, LUA_TFUNCTION); + return NULL; /* unreachable */ +} + GCtab *lj_lib_checktab(lua_State *L, int narg) { TValue *o = L->base + narg-1; diff --git a/src/lj_lib.h b/src/lj_lib.h index 6c3a1c83..a48e3c98 100644 --- a/src/lj_lib.h +++ b/src/lj_lib.h @@ -42,6 +42,7 @@ LJ_FUNC lua_Number lj_lib_checknum(lua_State *L, int narg); LJ_FUNC int32_t lj_lib_checkint(lua_State *L, int narg); LJ_FUNC int32_t lj_lib_optint(lua_State *L, int narg, int32_t def); LJ_FUNC GCfunc *lj_lib_checkfunc(lua_State *L, int narg); +LJ_FUNC GCproto *lj_lib_checkLproto(lua_State *L, int narg, int nolua); LJ_FUNC GCtab *lj_lib_checktab(lua_State *L, int narg); LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg); LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst); diff --git a/src/lj_load.c b/src/lj_load.c index 07304487..152ef6da 100644 --- a/src/lj_load.c +++ b/src/lj_load.c @@ -34,14 +34,28 @@ static TValue *cpparser(lua_State *L, lua_CFunction dummy, void *ud) UNUSED(dummy); cframe_errfunc(L->cframe) = -1; /* Inherit error function. */ bc = lj_lex_setup(L, ls); - if (ls->mode && !strchr(ls->mode, bc ? 'b' : 't')) { - setstrV(L, L->top++, lj_err_str(L, LJ_ERR_XMODE)); - lj_err_throw(L, LUA_ERRSYNTAX); + if (ls->mode) { + int xmode = 1; + const char *mode = ls->mode; + char c; + while ((c = *mode++)) { + if (c == (bc ? 'b' : 't')) xmode = 0; + if (c == (LJ_FR2 ? 'W' : 'X')) ls->fr2 = !LJ_FR2; + } + if (xmode) { + setstrV(L, L->top++, lj_err_str(L, LJ_ERR_XMODE)); + lj_err_throw(L, LUA_ERRSYNTAX); + } } pt = bc ? lj_bcread(ls) : lj_parse(ls); - fn = lj_func_newL_empty(L, pt, tabref(L->env)); - /* Don't combine above/below into one statement. */ - setfuncV(L, L->top++, fn); + if (ls->fr2 == LJ_FR2) { + fn = lj_func_newL_empty(L, pt, tabref(L->env)); + /* Don't combine above/below into one statement. */ + setfuncV(L, L->top++, fn); + } else { + /* Non-native generation returns a dumpable, but non-runnable prototype. */ + setprotoV(L, L->top++, pt); + } return NULL; } @@ -159,9 +173,10 @@ LUALIB_API int luaL_loadstring(lua_State *L, const char *s) LUA_API int lua_dump(lua_State *L, lua_Writer writer, void *data) { cTValue *o = L->top-1; + uint32_t flags = LJ_FR2*BCDUMP_F_FR2; /* Default mode for legacy C API. */ lj_checkapi(L->top > L->base, "top slot empty"); if (tvisfunc(o) && isluafunc(funcV(o))) - return lj_bcwrite(L, funcproto(funcV(o)), writer, data, 0); + return lj_bcwrite(L, funcproto(funcV(o)), writer, data, flags); else return 1; } diff --git a/src/lj_parse.c b/src/lj_parse.c index a30921af..5a44f8db 100644 --- a/src/lj_parse.c +++ b/src/lj_parse.c @@ -667,19 +667,20 @@ static void bcemit_store(FuncState *fs, ExpDesc *var, ExpDesc *e) /* Emit method lookup expression. */ static void bcemit_method(FuncState *fs, ExpDesc *e, ExpDesc *key) { - BCReg idx, func, obj = expr_toanyreg(fs, e); + BCReg idx, func, fr2, obj = expr_toanyreg(fs, e); expr_free(fs, e); func = fs->freereg; - bcemit_AD(fs, BC_MOV, func+1+LJ_FR2, obj); /* Copy object to 1st argument. */ + fr2 = fs->ls->fr2; + bcemit_AD(fs, BC_MOV, func+1+fr2, obj); /* Copy object to 1st argument. */ lj_assertFS(expr_isstrk(key), "bad usage"); idx = const_str(fs, key); if (idx <= BCMAX_C) { - bcreg_reserve(fs, 2+LJ_FR2); + bcreg_reserve(fs, 2+fr2); bcemit_ABC(fs, BC_TGETS, func, obj, idx); } else { - bcreg_reserve(fs, 3+LJ_FR2); - bcemit_AD(fs, BC_KSTR, func+2+LJ_FR2, idx); - bcemit_ABC(fs, BC_TGETV, func, obj, func+2+LJ_FR2); + bcreg_reserve(fs, 3+fr2); + bcemit_AD(fs, BC_KSTR, func+2+fr2, idx); + bcemit_ABC(fs, BC_TGETV, func, obj, func+2+fr2); fs->freereg--; } e->u.s.info = func; @@ -1326,9 +1327,12 @@ static void fs_fixup_bc(FuncState *fs, GCproto *pt, BCIns *bc, MSize n) { BCInsLine *base = fs->bcbase; MSize i; + BCIns op; pt->sizebc = n; - bc[0] = BCINS_AD((fs->flags & PROTO_VARARG) ? BC_FUNCV : BC_FUNCF, - fs->framesize, 0); + if (fs->ls->fr2 != LJ_FR2) op = BC_NOT; /* Mark non-native prototype. */ + else if ((fs->flags & PROTO_VARARG)) op = BC_FUNCV; + else op = BC_FUNCF; + bc[0] = BCINS_AD(op, fs->framesize, 0); for (i = 1; i < n; i++) bc[i] = base[i].ins; } @@ -1936,11 +1940,11 @@ static void parse_args(LexState *ls, ExpDesc *e) lj_assertFS(e->k == VNONRELOC, "bad expr type %d", e->k); base = e->u.s.info; /* Base register for call. */ if (args.k == VCALL) { - ins = BCINS_ABC(BC_CALLM, base, 2, args.u.s.aux - base - 1 - LJ_FR2); + ins = BCINS_ABC(BC_CALLM, base, 2, args.u.s.aux - base - 1 - ls->fr2); } else { if (args.k != VVOID) expr_tonextreg(fs, &args); - ins = BCINS_ABC(BC_CALL, base, 2, fs->freereg - base - LJ_FR2); + ins = BCINS_ABC(BC_CALL, base, 2, fs->freereg - base - ls->fr2); } expr_init(e, VCALL, bcemit_INS(fs, ins)); e->u.s.aux = base; @@ -1980,7 +1984,7 @@ static void expr_primary(LexState *ls, ExpDesc *v) parse_args(ls, v); } else if (ls->tok == '(' || ls->tok == TK_string || ls->tok == '{') { expr_tonextreg(fs, v); - if (LJ_FR2) bcreg_reserve(fs, 1); + if (ls->fr2) bcreg_reserve(fs, 1); parse_args(ls, v); } else { break; @@ -2565,7 +2569,7 @@ static void parse_for_iter(LexState *ls, GCstr *indexname) line = ls->linenumber; assign_adjust(ls, 3, expr_list(ls, &e), &e); /* The iterator needs another 3 [4] slots (func [pc] | state ctl). */ - bcreg_bump(fs, 3+LJ_FR2); + bcreg_bump(fs, 3+ls->fr2); isnext = (nvars <= 5 && predict_next(ls, fs, exprpc)); var_add(ls, 3); /* Hidden control variables. */ lex_check(ls, TK_do); From 2f35cb45fdd557aacb3875ec6ffd5721f92c9a51 Mon Sep 17 00:00:00 2001 From: Mike PallDate: Mon, 22 Jan 2024 19:12:13 +0100 Subject: [PATCH 88/95] MIPS64 R2/R6: Fix FP to integer conversions. Thanks to Peter Cawley. #1146 --- src/lj_asm_mips.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h index d4e40c91..5b83e34d 100644 --- a/src/lj_asm_mips.h +++ b/src/lj_asm_mips.h @@ -653,11 +653,11 @@ static void asm_conv(ASMState *as, IRIns *ir) rset_exclude(RSET_GPR, dest)); emit_fg(as, MIPSI_TRUNC_L_D, tmp, left); /* Delay slot. */ #if !LJ_TARGET_MIPSR6 - emit_branch(as, MIPSI_BC1T, 0, 0, l_end); - emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp); + emit_branch(as, MIPSI_BC1T, 0, 0, l_end); + emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp); #else - emit_branch(as, MIPSI_BC1NEZ, 0, (left&31), l_end); - emit_fgh(as, MIPSI_CMP_LT_D, left, left, tmp); + emit_branch(as, MIPSI_BC1NEZ, 0, (tmp&31), l_end); + emit_fgh(as, MIPSI_CMP_LT_D, tmp, left, tmp); #endif emit_lsptr(as, MIPSI_LDC1, (tmp & 31), (void *)&as->J->k64[LJ_K64_2P63], @@ -670,11 +670,11 @@ static void asm_conv(ASMState *as, IRIns *ir) rset_exclude(RSET_GPR, dest)); emit_fg(as, MIPSI_TRUNC_L_S, tmp, left); /* Delay slot. */ #if !LJ_TARGET_MIPSR6 - emit_branch(as, MIPSI_BC1T, 0, 0, l_end); - emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp); + emit_branch(as, MIPSI_BC1T, 0, 0, l_end); + emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp); #else - emit_branch(as, MIPSI_BC1NEZ, 0, (left&31), l_end); - emit_fgh(as, MIPSI_CMP_LT_S, left, left, tmp); + emit_branch(as, MIPSI_BC1NEZ, 0, (tmp&31), l_end); + emit_fgh(as, MIPSI_CMP_LT_S, tmp, left, tmp); #endif emit_lsptr(as, MIPSI_LWC1, (tmp & 31), (void *)&as->J->k32[LJ_K32_2P63], @@ -690,8 +690,8 @@ static void asm_conv(ASMState *as, IRIns *ir) MIPSIns mi = irt_is64(ir->t) ? (st == IRT_NUM ? MIPSI_TRUNC_L_D : MIPSI_TRUNC_L_S) : (st == IRT_NUM ? MIPSI_TRUNC_W_D : MIPSI_TRUNC_W_S); - emit_tg(as, irt_is64(ir->t) ? MIPSI_DMFC1 : MIPSI_MFC1, dest, left); - emit_fg(as, mi, left, left); + emit_tg(as, irt_is64(ir->t) ? MIPSI_DMFC1 : MIPSI_MFC1, dest, tmp); + emit_fg(as, mi, tmp, left); #endif } } From 3ca0a80711ef53e2e788bca7b282f8ad7c927b59 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 22 Jan 2024 19:17:45 +0100 Subject: [PATCH 89/95] DynASM/x86: Add endbr instruction. Thanks to Dmitry Stogov. #1143 #1142 --- dynasm/dasm_x86.lua | 2 ++ dynasm/dynasm.lua | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua index df70fed8..7c789f82 100644 --- a/dynasm/dasm_x86.lua +++ b/dynasm/dasm_x86.lua @@ -1151,6 +1151,8 @@ local map_op = { rep_0 = "F3", repe_0 = "F3", repz_0 = "F3", + endbr32_0 = "F30F1EFB", + endbr64_0 = "F30F1EFA", -- F4: *hlt cmc_0 = "F5", -- F6: test... mb,i; div... mb diff --git a/dynasm/dynasm.lua b/dynasm/dynasm.lua index 5be75f7f..0d15a872 100644 --- a/dynasm/dynasm.lua +++ b/dynasm/dynasm.lua @@ -75,7 +75,7 @@ local function wline(line, needindent) g_synclineno = g_synclineno + 1 end --- Write assembler line as a comment, if requestd. +-- Write assembler line as a comment, if requested. local function wcomment(aline) if g_opt.comment then wline(g_opt.comment..aline..g_opt.endcomment, true) From 85b4fed0b0353dd78c8c875c2f562d522a2b310f Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Tue, 23 Jan 2024 18:58:52 +0100 Subject: [PATCH 90/95] Fix unsinking of IR_FSTORE for NULL metatable. Reported by pwnhacker0x18. #1147 --- src/lj_snap.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/lj_snap.c b/src/lj_snap.c index b387dd76..4a773048 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c @@ -412,6 +412,7 @@ static TRef snap_replay_const(jit_State *J, IRIns *ir) case IR_KNUM: return lj_ir_k64(J, IR_KNUM, ir_knum(ir)); case IR_KINT64: return lj_ir_k64(J, IR_KINT64, ir_kint64(ir)); case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */ + case IR_KNULL: return lj_ir_knull(J, irt_type(ir->t)); default: lua_assert(0); return TREF_NIL; break; } } @@ -821,9 +822,13 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, if (irk->o == IR_FREF) { switch (irk->op2) { case IRFL_TAB_META: - snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp); - /* NOBARRIER: The table is new (marked white). */ - setgcref(t->metatable, obj2gco(tabV(&tmp))); + if (T->ir[irs->op2].o == IR_KNULL) { + setgcrefnull(t->metatable); + } else { + snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp); + /* NOBARRIER: The table is new (marked white). */ + setgcref(t->metatable, obj2gco(tabV(&tmp))); + } break; case IRFL_TAB_NOMM: /* Negative metamethod cache invalidated by lj_tab_set() below. */ From 343ce0edaf3906a62022936175b2f5410024cbfc Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 25 Jan 2024 13:23:48 +0100 Subject: [PATCH 91/95] Fix zero stripping in %g number formatting. Reported by pwnhacker0x18. #1149 --- src/lj_strfmt_num.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lj_strfmt_num.c b/src/lj_strfmt_num.c index 79ec0263..c6e776aa 100644 --- a/src/lj_strfmt_num.c +++ b/src/lj_strfmt_num.c @@ -454,7 +454,8 @@ static char *lj_strfmt_wfnum(SBuf *sb, SFormat sf, lua_Number n, char *p) prec--; if (!i) { if (ndlo == ndhi) { prec = 0; break; } - lj_strfmt_wuint9(tail, nd[++ndlo]); + ndlo = (ndlo + 1) & 0x3f; + lj_strfmt_wuint9(tail, nd[ndlo]); i = 9; } } From e6c0ade97c6b250a44e8f4b964024a22d913b860 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Fri, 26 Jan 2024 23:17:33 +0100 Subject: [PATCH 92/95] Fix documentation bug about '\z' string escape. --- doc/extensions.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/extensions.html b/doc/extensions.html index f8b45c28..04a9ae07 100644 --- a/doc/extensions.html +++ b/doc/extensions.html @@ -244,7 +244,7 @@ enabled:
- goto and ::labels::.
-- Hex escapes '\x3F' and '\*' escape in strings.
+- Hex escapes '\x3F' and '\z' escape in strings.
- load(string|reader [, chunkname [,mode [,env]]]).
- loadstring() is an alias for load().
- loadfile(filename [,mode [,env]]).
From 14987af80ab583514f19ef36d1023655324fc757 Mon Sep 17 00:00:00 2001 From: Mike PallDate: Wed, 31 Jan 2024 14:29:23 +0100 Subject: [PATCH 93/95] Prevent include of luajit_rolling.h. Thanks to Peter Cawley. #1145 --- src/host/genversion.lua | 2 +- src/luajit_rolling.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/host/genversion.lua b/src/host/genversion.lua index 28f7206c..f0925160 100644 --- a/src/host/genversion.lua +++ b/src/host/genversion.lua @@ -29,7 +29,7 @@ local function file_write_mod(file, data) assert(fp:close()) end -local text = file_read(FILE_ROLLING_H) +local text = file_read(FILE_ROLLING_H):gsub("#error.-\n", "") local relver = file_read(FILE_RELVER_TXT):match("(%d+)") if relver then diff --git a/src/luajit_rolling.h b/src/luajit_rolling.h index 27368836..e7ff2c23 100644 --- a/src/luajit_rolling.h +++ b/src/luajit_rolling.h @@ -67,4 +67,5 @@ LUA_API int luaJIT_setmode(lua_State *L, int idx, int mode); /* Enforce (dynamic) linker error for version mismatches. Call from main. */ LUA_API void LUAJIT_VERSION_SYM(void); +#error "DO NOT USE luajit_rolling.h -- only include build-generated luajit.h" #endif From 9cdd5a9479d2265f42dfefc17d068174969bbcff Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Wed, 31 Jan 2024 14:32:04 +0100 Subject: [PATCH 94/95] Preserve keys with dynamic values in template tables when saving bytecode. Reported by Lyrthras. Fixed by Peter Cawley. #1155 --- src/lj_bcwrite.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lj_bcwrite.c b/src/lj_bcwrite.c index 6282f767..9820ad12 100644 --- a/src/lj_bcwrite.c +++ b/src/lj_bcwrite.c @@ -124,7 +124,7 @@ static void bcwrite_ktab(BCWriteCtx *ctx, const GCtab *t) MSize i, hmask = t->hmask; Node *node = noderef(t->node); for (i = 0; i <= hmask; i++) - nhash += !tvisnil(&node[i].val); + nhash += !tvisnil(&node[i].key); } /* Write number of array slots and hash slots. */ bcwrite_uleb128(ctx, narray); @@ -139,7 +139,7 @@ static void bcwrite_ktab(BCWriteCtx *ctx, const GCtab *t) MSize i = nhash; Node *node = noderef(t->node) + t->hmask; for (;; node--) - if (!tvisnil(&node->val)) { + if (!tvisnil(&node->key)) { bcwrite_ktabk(ctx, &node->key, 0); bcwrite_ktabk(ctx, &node->val, 1); if (--i == 0) break; From defe61a56751a0db5f00ff3ab7b8f45436ba74c8 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 4 Feb 2024 16:34:30 +0100 Subject: [PATCH 95/95] Rework stack overflow handling. Reported by pwnhacker0x18. Fixed by Peter Cawley. #1152 --- src/lj_debug.c | 1 + src/lj_err.c | 22 +++++++++++++++++--- src/lj_err.h | 1 + src/lj_state.c | 56 +++++++++++++++++++++++++++++++++----------------- 4 files changed, 58 insertions(+), 22 deletions(-) diff --git a/src/lj_debug.c b/src/lj_debug.c index e6a8be54..bca1d7a5 100644 --- a/src/lj_debug.c +++ b/src/lj_debug.c @@ -63,6 +63,7 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe) if (cf == NULL || (char *)cframe_pc(cf) == (char *)cframe_L(cf)) return NO_BCPOS; ins = cframe_pc(cf); /* Only happens during error/hook handling. */ + if (!ins) return NO_BCPOS; } else { if (frame_islua(nextframe)) { ins = frame_pc(nextframe); diff --git a/src/lj_err.c b/src/lj_err.c index 4a2d6bbd..7afe1e29 100644 --- a/src/lj_err.c +++ b/src/lj_err.c @@ -488,7 +488,14 @@ LJ_NOINLINE void lj_err_mem(lua_State *L) { if (L->status == LUA_ERRERR+1) /* Don't touch the stack during lua_open. */ lj_vm_unwind_c(L->cframe, LUA_ERRMEM); - if (curr_funcisL(L)) L->top = curr_topL(L); + if (curr_funcisL(L)) { + L->top = curr_topL(L); + if (LJ_UNLIKELY(L->top > tvref(L->maxstack))) { + /* The current Lua frame violates the stack. Replace it with a dummy. */ + L->top = L->base; + setframe_gc(L->base - 1, obj2gco(L)); + } + } setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRMEM)); lj_err_throw(L, LUA_ERRMEM); } @@ -551,9 +558,11 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_run(lua_State *L) { ptrdiff_t ef = finderrfunc(L); if (ef) { - TValue *errfunc = restorestack(L, ef); - TValue *top = L->top; + TValue *errfunc, *top; + lj_state_checkstack(L, LUA_MINSTACK * 2); /* Might raise new error. */ lj_trace_abort(G(L)); + errfunc = restorestack(L, ef); + top = L->top; if (!tvisfunc(errfunc) || L->status == LUA_ERRERR) { setstrV(L, top-1, lj_err_str(L, LJ_ERR_ERRERR)); lj_err_throw(L, LUA_ERRERR); @@ -567,6 +576,13 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_run(lua_State *L) lj_err_throw(L, LUA_ERRRUN); } +/* Stack overflow error. */ +void LJ_FASTCALL lj_err_stkov(lua_State *L) +{ + lj_debug_addloc(L, err2msg(LJ_ERR_STKOV), L->base-1, NULL); + lj_err_run(L); +} + /* Formatted runtime error message. */ LJ_NORET LJ_NOINLINE static void err_msgv(lua_State *L, ErrMsg em, ...) { diff --git a/src/lj_err.h b/src/lj_err.h index 321719a9..15040922 100644 --- a/src/lj_err.h +++ b/src/lj_err.h @@ -23,6 +23,7 @@ LJ_DATA const char *lj_err_allmsg; LJ_FUNC GCstr *lj_err_str(lua_State *L, ErrMsg em); LJ_FUNCA_NORET void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode); LJ_FUNC_NORET void lj_err_mem(lua_State *L); +LJ_FUNC_NORET void LJ_FASTCALL lj_err_stkov(lua_State *L); LJ_FUNCA_NORET void LJ_FASTCALL lj_err_run(lua_State *L); LJ_FUNC_NORET void lj_err_msg(lua_State *L, ErrMsg em); LJ_FUNC_NORET void lj_err_lex(lua_State *L, GCstr *src, const char *tok, diff --git a/src/lj_state.c b/src/lj_state.c index c2f0b115..adedb66c 100644 --- a/src/lj_state.c +++ b/src/lj_state.c @@ -96,27 +96,45 @@ void lj_state_shrinkstack(lua_State *L, MSize used) /* Try to grow stack. */ void LJ_FASTCALL lj_state_growstack(lua_State *L, MSize need) { - MSize n; - if (L->stacksize >= LJ_STACK_MAXEX) { - /* 4. Throw 'error in error handling' when we are _over_ the limit. */ - if (L->stacksize > LJ_STACK_MAXEX) - lj_err_throw(L, LUA_ERRERR); /* Does not invoke an error handler. */ - /* 1. We are _at_ the limit after the last growth. */ - if (L->status < LUA_ERRRUN) { /* 2. Throw 'stack overflow'. */ - L->status = LUA_ERRRUN; /* Prevent ending here again for pushed msg. */ - lj_err_msg(L, LJ_ERR_STKOV); /* May invoke an error handler. */ + MSize n = L->stacksize + need; + if (LJ_LIKELY(n < LJ_STACK_MAX)) { /* The stack can grow as requested. */ + if (n < 2 * L->stacksize) { /* Try to double the size. */ + n = 2 * L->stacksize; + if (n > LJ_STACK_MAX) + n = LJ_STACK_MAX; + } + resizestack(L, n); + } else { /* Request would overflow. Raise a stack overflow error. */ + if (curr_funcisL(L)) { + L->top = curr_topL(L); + if (L->top > tvref(L->maxstack)) { + /* The current Lua frame violates the stack, so replace it with a + ** dummy. This can happen when BC_IFUNCF is trying to grow the stack. + */ + L->top = L->base; + setframe_gc(L->base - 1, obj2gco(L)); + } + } + if (L->stacksize <= LJ_STACK_MAXEX) { + /* An error handler might want to inspect the stack overflow error, but + ** will need some stack space to run in. We give it a stack size beyond + ** the normal limit in order to do so, then rely on lj_state_relimitstack + ** calls during unwinding to bring us back to a convential stack size. + ** The + 1 is space for the error message, and 2 * LUA_MINSTACK is for + ** the lj_state_checkstack() call in lj_err_run(). + */ + resizestack(L, LJ_STACK_MAX + 1 + 2 * LUA_MINSTACK); + lj_err_stkov(L); /* May invoke an error handler. */ + } else { + /* If we're here, then the stack overflow error handler is requesting + ** to grow the stack even further. We have no choice but to abort the + ** error handler. + */ + GCstr *em = lj_err_str(L, LJ_ERR_STKOV); /* Might OOM. */ + setstrV(L, L->top++, em); /* There is always space to push an error. */ + lj_err_throw(L, LUA_ERRERR); /* Does not invoke an error handler. */ } - /* 3. Add space (over the limit) for pushed message and error handler. */ } - n = L->stacksize + need; - if (n > LJ_STACK_MAX) { - n += 2*LUA_MINSTACK; - } else if (n < 2*L->stacksize) { - n = 2*L->stacksize; - if (n >= LJ_STACK_MAX) - n = LJ_STACK_MAX; - } - resizestack(L, n); } void LJ_FASTCALL lj_state_growstack1(lua_State *L)