From 309fb42b871b6414f53e0e0e708bce0b0d62daff Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 28 Aug 2023 21:00:37 +0200 Subject: [PATCH 01/13] Fix predict_next() in parser (again). Reported by Sergey Bronnikov. #1054 --- src/lj_parse.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/lj_parse.c b/src/lj_parse.c index c0cbd261..afdbcc3d 100644 --- a/src/lj_parse.c +++ b/src/lj_parse.c @@ -2527,9 +2527,11 @@ static void parse_for_num(LexState *ls, GCstr *varname, BCLine line) */ static int predict_next(LexState *ls, FuncState *fs, BCPos pc) { - BCIns ins = fs->bcbase[pc].ins; + BCIns ins; GCstr *name; cTValue *o; + if (pc >= fs->bclim) return 0; + ins = fs->bcbase[pc].ins; switch (bc_op(ins)) { case BC_MOV: if (bc_d(ins) >= fs->nactvar) return 0; From 14e2917e7ab3d6f043d6604298bfa66470c6f47d Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 28 Aug 2023 21:04:01 +0200 Subject: [PATCH 02/13] Fix external C call stack check when using LUAJIT_MODE_WRAPCFUNC. Thanks to Peter Cawley. #1047 --- src/lj_dispatch.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c index 8009d289..63e09752 100644 --- a/src/lj_dispatch.c +++ b/src/lj_dispatch.c @@ -292,9 +292,9 @@ int luaJIT_setmode(lua_State *L, int idx, int mode) } else { return 0; /* Failed. */ } - g->bc_cfunc_ext = BCINS_AD(BC_FUNCCW, 0, 0); + setbc_op(&g->bc_cfunc_ext, BC_FUNCCW); } else { - g->bc_cfunc_ext = BCINS_AD(BC_FUNCC, 0, 0); + setbc_op(&g->bc_cfunc_ext, BC_FUNCC); } break; default: From a0b52aae33ffaeff2cc3f28b6e125f9582b133d6 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 28 Aug 2023 21:59:01 +0200 Subject: [PATCH 03/13] Handle non-.git checkout with .relver in .bat-file builds. Thanks to Simon Cooke. --- src/msvcbuild.bat | 2 +- src/ps4build.bat | 2 +- src/psvitabuild.bat | 2 +- src/xedkbuild.bat | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat index 44781e10..1f60b8f4 100644 --- a/src/msvcbuild.bat +++ b/src/msvcbuild.bat @@ -42,7 +42,7 @@ if exist minilua.exe.manifest^ minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc @if errorlevel 1 goto :BAD -git show -s --format=%%ct >luajit_relver.txt +if exist ..\.git ( git show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt ) minilua host\genversion.lua %LJCOMPILE% /I "." /I %DASMDIR% host\buildvm*.c diff --git a/src/ps4build.bat b/src/ps4build.bat index 306b06ef..5ae92171 100644 --- a/src/ps4build.bat +++ b/src/ps4build.bat @@ -31,7 +31,7 @@ if exist minilua.exe.manifest^ minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc @if errorlevel 1 goto :BAD -git show -s --format=%%ct >luajit_relver.txt +if exist ..\.git ( git show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt ) minilua host\genversion.lua %LJCOMPILE% /I "." /I %DASMDIR% -DLUAJIT_TARGET=LUAJIT_ARCH_X64 -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_NO_UNWIND host\buildvm*.c diff --git a/src/psvitabuild.bat b/src/psvitabuild.bat index 9be485ae..132fed0a 100644 --- a/src/psvitabuild.bat +++ b/src/psvitabuild.bat @@ -31,7 +31,7 @@ if exist minilua.exe.manifest^ minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_arm.dasc @if errorlevel 1 goto :BAD -git show -s --format=%%ct >luajit_relver.txt +if exist ..\.git ( git show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt ) minilua host\genversion.lua %LJCOMPILE% /I "." /I %DASMDIR% -DLUAJIT_TARGET=LUAJIT_ARCH_ARM -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLJ_TARGET_PSVITA=1 host\buildvm*.c diff --git a/src/xedkbuild.bat b/src/xedkbuild.bat index 145219c8..b07f3bc2 100644 --- a/src/xedkbuild.bat +++ b/src/xedkbuild.bat @@ -31,7 +31,7 @@ if exist minilua.exe.manifest^ minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_ppc.dasc @if errorlevel 1 goto :BAD -git show -s --format=%%ct >luajit_relver.txt +if exist ..\.git ( git show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt ) minilua host\genversion.lua %LJCOMPILE% /I "." /I %DASMDIR% /D_XBOX_VER=200 /DLUAJIT_TARGET=LUAJIT_ARCH_PPC host\buildvm*.c From 6a3111a57f817cb00ef2ab6f2553cd887ec36462 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 28 Aug 2023 21:25:51 +0200 Subject: [PATCH 04/13] Use fallback name for install files without valid .git or .relver. --- src/host/genversion.lua | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/host/genversion.lua b/src/host/genversion.lua index a38cec56..42b5e6fe 100644 --- a/src/host/genversion.lua +++ b/src/host/genversion.lua @@ -5,9 +5,9 @@ -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -local FILE_INPUT_H = "luajit_rolling.h" -local FILE_INPUT_R = "luajit_relver.txt" -local FILE_OUTPUT_H = "luajit.h" +local FILE_ROLLING_H = "luajit_rolling.h" +local FILE_RELVER_TXT = "luajit_relver.txt" +local FILE_LUAJIT_H = "luajit.h" local function file_read(file) local fp = assert(io.open(file, "rb"), "run from the wrong directory") @@ -28,8 +28,8 @@ local function file_write_mod(file, data) assert(fp:close()) end -local text = file_read(FILE_INPUT_H) -local relver = file_read(FILE_INPUT_R):match("(%d+)") +local text = file_read(FILE_ROLLING_H) +local relver = file_read(FILE_RELVER_TXT):match("(%d+)") if relver then text = text:gsub("ROLLING", relver) @@ -38,6 +38,7 @@ else **** WARNING Cannot determine rolling release version from git log. **** WARNING The 'git' command must be available during the build. ]]) + file_write_mod(FILE_RELVER_TXT, "ROLLING\n") -- Fallback for install target. end -file_write_mod(FILE_OUTPUT_H, text) +file_write_mod(FILE_LUAJIT_H, text) From 0ef51b495f9497aac77b41eb3d837c9c38b9424b Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 28 Aug 2023 22:15:42 +0200 Subject: [PATCH 05/13] Handle table unsinking in the presence of IRFL_TAB_NOMM. Reported by Sergey Kaplun. #1052 --- src/lj_snap.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/lj_snap.c b/src/lj_snap.c index 1ef75e83..a6cd93d4 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c @@ -796,17 +796,26 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, lj_tab_dup(J->L, ir_ktab(&T->ir[ir->op1])); settabV(J->L, o, t); irlast = &T->ir[T->snap[snapno].ref]; - for (irs = ir+1; irs < irlast; irs++) + for (irs = ir+1; irs < irlast; irs++) { if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { IRIns *irk = &T->ir[irs->op1]; TValue tmp, *val; lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE || irs->o == IR_FSTORE); if (irk->o == IR_FREF) { - lua_assert(irk->op2 == IRFL_TAB_META); - snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp); - /* NOBARRIER: The table is new (marked white). */ - setgcref(t->metatable, obj2gco(tabV(&tmp))); + switch (irk->op2) { + case IRFL_TAB_META: + snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp); + /* NOBARRIER: The table is new (marked white). */ + setgcref(t->metatable, obj2gco(tabV(&tmp))); + break; + case IRFL_TAB_NOMM: + /* Negative metamethod cache invalidated by lj_tab_set() below. */ + break; + default: + lua_assert(0); + break; + } } else { irk = &T->ir[irk->op2]; if (irk->o == IR_KSLOT) irk = &T->ir[irk->op1]; @@ -820,6 +829,7 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, } } } + } } } From 0fa2f1cbcf023ad0549f1428809e506fa2c78552 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 28 Aug 2023 22:33:54 +0200 Subject: [PATCH 06/13] ARM64: Fix LDP/STP fusing for unaligned accesses. Thanks to Peter Cawley. #1056 --- src/lj_emit_arm64.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h index 52d010b8..6926c71a 100644 --- a/src/lj_emit_arm64.h +++ b/src/lj_emit_arm64.h @@ -142,7 +142,7 @@ static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs) } else { goto nopair; } - if (ofsm >= (int)((unsigned int)-64<mcp = aip | A64F_N(rn) | (((ofsm >> sc) & 0x7f) << 15) | (ai ^ ((ai == A64I_LDRx || ai == A64I_STRx) ? 0x50000000 : 0x90000000)); return; From 7cc53f0b85f834dfba1516ea79d59db463e856fa Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 28 Aug 2023 22:39:35 +0200 Subject: [PATCH 07/13] ARM64: Prevent STP fusion for conditional code emitted by TBAR. Thanks to Peter Cawley. #1057 --- src/lj_asm_arm64.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index 34960d7c..1d5cca4f 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -1289,8 +1289,9 @@ static void asm_tbar(ASMState *as, IRIns *ir) Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab)); Reg mark = RID_TMP; MCLabel l_end = emit_label(as); - emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist)); emit_lso(as, A64I_STRB, mark, tab, (int32_t)offsetof(GCtab, marked)); + /* Keep STRx in the middle to avoid LDP/STP fusion with surrounding code. */ + emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist)); emit_setgl(as, tab, gc.grayagain); emit_dn(as, A64I_ANDw^emit_isk13(~LJ_GC_BLACK, 0), mark, mark); emit_getgl(as, link, gc.grayagain); From cf903edb30e0cbd620ebd4bac02d4e2b4410fd02 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Tue, 29 Aug 2023 02:12:13 +0200 Subject: [PATCH 08/13] FFI: Unify stack setup for C calls in interpreter. --- src/lj_ccall.c | 57 +++++++++++++++++++++++++--------------------- src/lj_ccall.h | 7 +++--- src/vm_arm.dasc | 8 +++---- src/vm_arm64.dasc | 8 +++---- src/vm_mips.dasc | 1 - src/vm_mips64.dasc | 1 - src/vm_ppc.dasc | 3 +-- src/vm_x64.dasc | 8 +++---- src/vm_x86.dasc | 24 +++++++++++-------- 9 files changed, 63 insertions(+), 54 deletions(-) diff --git a/src/lj_ccall.c b/src/lj_ccall.c index 04e306eb..9001cb5a 100644 --- a/src/lj_ccall.c +++ b/src/lj_ccall.c @@ -20,12 +20,15 @@ #if LJ_TARGET_X86 /* -- x86 calling conventions --------------------------------------------- */ +#define CCALL_PUSH(arg) \ + *(GPRArg *)((uint8_t *)cc->stack + nsp) = (GPRArg)(arg), nsp += CTSIZE_PTR + #if LJ_ABI_WIN #define CCALL_HANDLE_STRUCTRET \ /* Return structs bigger than 8 by reference (on stack only). */ \ cc->retref = (sz > 8); \ - if (cc->retref) cc->stack[nsp++] = (GPRArg)dp; + if (cc->retref) CCALL_PUSH(dp); #define CCALL_HANDLE_COMPLEXRET CCALL_HANDLE_STRUCTRET @@ -40,7 +43,7 @@ if (ngpr < maxgpr) \ cc->gpr[ngpr++] = (GPRArg)dp; \ else \ - cc->stack[nsp++] = (GPRArg)dp; \ + CCALL_PUSH(dp); \ } else { /* Struct with single FP field ends up in FPR. */ \ cc->resx87 = ccall_classify_struct(cts, ctr); \ } @@ -56,7 +59,7 @@ if (ngpr < maxgpr) \ cc->gpr[ngpr++] = (GPRArg)dp; \ else \ - cc->stack[nsp++] = (GPRArg)dp; + CCALL_PUSH(dp); #endif @@ -67,7 +70,7 @@ if (ngpr < maxgpr) \ cc->gpr[ngpr++] = (GPRArg)dp; \ else \ - cc->stack[nsp++] = (GPRArg)dp; \ + CCALL_PUSH(dp); \ } #endif @@ -278,8 +281,8 @@ if (ngpr < maxgpr) { \ dp = &cc->gpr[ngpr]; \ if (ngpr + n > maxgpr) { \ - nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ - if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ + nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \ + if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \ ngpr = maxgpr; \ } else { \ ngpr += n; \ @@ -471,8 +474,8 @@ if (ngpr < maxgpr) { \ dp = &cc->gpr[ngpr]; \ if (ngpr + n > maxgpr) { \ - nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ - if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ + nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \ + if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \ ngpr = maxgpr; \ } else { \ ngpr += n; \ @@ -565,8 +568,8 @@ if (ngpr < maxgpr) { \ dp = &cc->gpr[ngpr]; \ if (ngpr + n > maxgpr) { \ - nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ - if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ + nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \ + if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \ ngpr = maxgpr; \ } else { \ ngpr += n; \ @@ -698,10 +701,11 @@ static int ccall_struct_arg(CCallState *cc, CTState *cts, CType *d, int *rcl, lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg)); if (ccall_struct_reg(cc, cts, dp, rcl)) { /* Register overflow? Pass on stack. */ - MSize nsp = cc->nsp, n = rcl[1] ? 2 : 1; - if (nsp + n > CCALL_MAXSTACK) return 1; /* Too many arguments. */ - cc->nsp = nsp + n; - memcpy(&cc->stack[nsp], dp, n*CTSIZE_PTR); + MSize nsp = cc->nsp, sz = rcl[1] ? 2*CTSIZE_PTR : CTSIZE_PTR; + if (nsp + sz > CCALL_SIZE_STACK) + return 1; /* Too many arguments. */ + cc->nsp = nsp + sz; + memcpy((uint8_t *)cc->stack + nsp, dp, sz); } return 0; /* Ok. */ } @@ -1022,22 +1026,23 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, } else { sz = CTSIZE_PTR; } - sz = (sz + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1); - n = sz / CTSIZE_PTR; /* Number of GPRs or stack slots needed. */ + n = (sz + CTSIZE_PTR-1) / CTSIZE_PTR; /* Number of GPRs or stack slots needed. */ CCALL_HANDLE_REGARG /* Handle register arguments. */ /* Otherwise pass argument on stack. */ - if (CCALL_ALIGN_STACKARG && !rp && (d->info & CTF_ALIGN) > CTALIGN_PTR) { - MSize align = (1u << ctype_align(d->info-CTALIGN_PTR)) -1; - nsp = (nsp + align) & ~align; /* Align argument on stack. */ + if (CCALL_ALIGN_STACKARG) { /* Align argument on stack. */ + MSize align = (1u << ctype_align(d->info)) - 1; + if (rp) + align = CTSIZE_PTR-1; + nsp = (nsp + align) & ~align; } - if (nsp + n > CCALL_MAXSTACK) { /* Too many arguments. */ + dp = ((uint8_t *)cc->stack) + nsp; + nsp += n * CTSIZE_PTR; + if (nsp > CCALL_SIZE_STACK) { /* Too many arguments. */ err_nyi: lj_err_caller(L, LJ_ERR_FFI_NYICALL); } - dp = &cc->stack[nsp]; - nsp += n; isva = 0; done: @@ -1099,10 +1104,10 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, #if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) cc->nfpr = nfpr; /* Required for vararg functions. */ #endif - cc->nsp = nsp; - cc->spadj = (CCALL_SPS_FREE + CCALL_SPS_EXTRA)*CTSIZE_PTR; - if (nsp > CCALL_SPS_FREE) - cc->spadj += (((nsp-CCALL_SPS_FREE)*CTSIZE_PTR + 15u) & ~15u); + cc->nsp = (nsp + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1); + cc->spadj = (CCALL_SPS_FREE + CCALL_SPS_EXTRA) * CTSIZE_PTR; + if (cc->nsp > CCALL_SPS_FREE * CTSIZE_PTR) + cc->spadj += (((cc->nsp - CCALL_SPS_FREE * CTSIZE_PTR) + 15u) & ~15u); return gcsteps; } diff --git a/src/lj_ccall.h b/src/lj_ccall.h index 547415f7..57300817 100644 --- a/src/lj_ccall.h +++ b/src/lj_ccall.h @@ -152,14 +152,15 @@ typedef union FPRArg { LJ_STATIC_ASSERT(CCALL_NUM_GPR <= CCALL_MAX_GPR); LJ_STATIC_ASSERT(CCALL_NUM_FPR <= CCALL_MAX_FPR); -#define CCALL_MAXSTACK 32 +#define CCALL_NUM_STACK 31 +#define CCALL_SIZE_STACK (CCALL_NUM_STACK * CTSIZE_PTR) /* -- C call state -------------------------------------------------------- */ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState { void (*func)(void); /* Pointer to called function. */ uint32_t spadj; /* Stack pointer adjustment. */ - uint8_t nsp; /* Number of stack slots. */ + uint8_t nsp; /* Number of bytes on stack. */ uint8_t retref; /* Return value by reference. */ #if LJ_TARGET_X64 uint8_t ngpr; /* Number of arguments in GPRs. */ @@ -178,7 +179,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState { FPRArg fpr[CCALL_NUM_FPR]; /* Arguments/results in FPRs. */ #endif GPRArg gpr[CCALL_NUM_GPR]; /* Arguments/results in GPRs. */ - GPRArg stack[CCALL_MAXSTACK]; /* Stack slots. */ + GPRArg stack[CCALL_NUM_STACK]; /* Stack slots. */ } CCallState; /* -- C call handling ----------------------------------------------------- */ diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc index 4f0798e0..0d1ea95f 100644 --- a/src/vm_arm.dasc +++ b/src/vm_arm.dasc @@ -2571,16 +2571,16 @@ static void build_subroutines(BuildCtx *ctx) |.endif | mov r11, sp | sub sp, sp, CARG1 // Readjust stack. - | subs CARG2, CARG2, #1 + | subs CARG2, CARG2, #4 |.if HFABI | vldm RB, {d0-d7} |.endif | ldr RB, CCSTATE->func | bmi >2 |1: // Copy stack slots. - | ldr CARG4, [CARG3, CARG2, lsl #2] - | str CARG4, [sp, CARG2, lsl #2] - | subs CARG2, CARG2, #1 + | ldr CARG4, [CARG3, CARG2] + | str CARG4, [sp, CARG2] + | subs CARG2, CARG2, #4 | bpl <1 |2: | ldrd CARG12, CCSTATE->gpr[0] diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index a7a9392c..698b4210 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc @@ -2222,14 +2222,14 @@ static void build_subroutines(BuildCtx *ctx) | ldr TMP0w, CCSTATE:x0->spadj | ldrb TMP1w, CCSTATE->nsp | add TMP2, CCSTATE, #offsetof(CCallState, stack) - | subs TMP1, TMP1, #1 + | subs TMP1, TMP1, #8 | ldr TMP3, CCSTATE->func | sub sp, sp, TMP0 | bmi >2 |1: // Copy stack slots - | ldr TMP0, [TMP2, TMP1, lsl #3] - | str TMP0, [sp, TMP1, lsl #3] - | subs TMP1, TMP1, #1 + | ldr TMP0, [TMP2, TMP1] + | str TMP0, [sp, TMP1] + | subs TMP1, TMP1, #8 | bpl <1 |2: | ldp x0, x1, CCSTATE->gpr[0] diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc index 94a878b9..f276745c 100644 --- a/src/vm_mips.dasc +++ b/src/vm_mips.dasc @@ -2951,7 +2951,6 @@ static void build_subroutines(BuildCtx *ctx) | move TMP2, sp | subu sp, sp, TMP1 | sw ra, -4(TMP2) - | sll CARG2, CARG2, 2 | sw r16, -8(TMP2) | sw CCSTATE, -12(TMP2) | move r16, TMP2 diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc index f8e181ee..6c215f2b 100644 --- a/src/vm_mips64.dasc +++ b/src/vm_mips64.dasc @@ -3065,7 +3065,6 @@ static void build_subroutines(BuildCtx *ctx) | move TMP2, sp | dsubu sp, sp, TMP1 | sd ra, -8(TMP2) - | sll CARG2, CARG2, 3 | sd r16, -16(TMP2) | sd CCSTATE, -24(TMP2) | move r16, TMP2 diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc index 73a70a00..f2e5a08f 100644 --- a/src/vm_ppc.dasc +++ b/src/vm_ppc.dasc @@ -3269,14 +3269,13 @@ static void build_subroutines(BuildCtx *ctx) | stw TMP0, 4(sp) | cmpwi cr1, CARG3, 0 | mr TMP2, sp - | addic. CARG2, CARG2, -1 + | addic. CARG2, CARG2, -4 | stwux sp, sp, TMP1 | crnot 4*cr1+eq, 4*cr1+eq // For vararg calls. | stw r14, -4(TMP2) | stw CCSTATE, -8(TMP2) | mr r14, TMP2 | la TMP1, CCSTATE->stack - | slwi CARG2, CARG2, 2 | blty >2 | la TMP2, 8(sp) |1: diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index a8649b4e..3635ba28 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc @@ -2755,12 +2755,12 @@ static void build_subroutines(BuildCtx *ctx) | | // Copy stack slots. | movzx ecx, byte CCSTATE->nsp - | sub ecx, 1 + | sub ecx, 8 | js >2 |1: - | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)] - | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax - | sub ecx, 1 + | mov rax, [CCSTATE+rcx+offsetof(CCallState, stack)] + | mov [rsp+rcx+CCALL_SPS_EXTRA*8], rax + | sub ecx, 8 | jns <1 |2: | diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index bda9d7d7..c44a24ff 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc @@ -3314,19 +3314,25 @@ static void build_subroutines(BuildCtx *ctx) | | // Copy stack slots. | movzx ecx, byte CCSTATE->nsp - | sub ecx, 1 + |.if X64 + | sub ecx, 8 | js >2 |1: - |.if X64 - | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)] - | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax - |.else - | mov eax, [CCSTATE+ecx*4+offsetof(CCallState, stack)] - | mov [esp+ecx*4], eax - |.endif - | sub ecx, 1 + | mov rax, [CCSTATE+rcx+offsetof(CCallState, stack)] + | mov [rsp+rcx+CCALL_SPS_EXTRA*8], rax + | sub ecx, 8 | jns <1 |2: + |.else + | sub ecx, 4 + | js >2 + |1: + | mov eax, [CCSTATE+ecx+offsetof(CCallState, stack)] + | mov [esp+ecx], eax + | sub ecx, 4 + | jns <1 + |2: + |.endif | |.if X64 | movzx eax, byte CCSTATE->nfpr From 83954100dba9fc0cf5eeaf122f007df35ec9a604 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Tue, 29 Aug 2023 02:21:51 +0200 Subject: [PATCH 09/13] FFI/ARM64/OSX: Handle non-standard OSX C calling conventions. Contributed by Peter Cawley. #205 --- src/lj_asm_arm64.h | 75 ++++++++++++++++++++++++++++++++++++---------- src/lj_ccall.c | 11 ++++--- src/lj_ccall.h | 6 ++++ src/lj_crecord.c | 27 +++++++++++++++++ 4 files changed, 98 insertions(+), 21 deletions(-) diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index 1d5cca4f..3889883d 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -419,7 +419,7 @@ static int asm_fuseorshift(ASMState *as, IRIns *ir) static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) { uint32_t n, nargs = CCI_XNARGS(ci); - int32_t ofs = 0; + int32_t spofs = 0, spalign = LJ_HASFFI && LJ_TARGET_OSX ? 0 : 7; Reg gpr, fpr = REGARG_FIRSTFPR; if (ci->func) emit_call(as, ci->func); @@ -438,8 +438,14 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) fpr++; } else { Reg r = ra_alloc1(as, ref, RSET_FPR); - emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isnum(ir->t)) ? 4 : 0)); - ofs += 8; + int32_t al = spalign; +#if LJ_HASFFI && LJ_TARGET_OSX + al |= irt_isnum(ir->t) ? 7 : 3; +#endif + spofs = (spofs + al) & ~al; + if (LJ_BE && al >= 7 && !irt_isnum(ir->t)) spofs += 4, al -= 4; + emit_spstore(as, ir, r, spofs); + spofs += al + 1; } } else { if (gpr <= REGARG_LASTGPR) { @@ -449,10 +455,27 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) gpr++; } else { Reg r = ra_alloc1(as, ref, RSET_GPR); - emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_is64(ir->t)) ? 4 : 0)); - ofs += 8; + int32_t al = spalign; +#if LJ_HASFFI && LJ_TARGET_OSX + al |= irt_size(ir->t) - 1; +#endif + spofs = (spofs + al) & ~al; + if (al >= 3) { + if (LJ_BE && al >= 7 && !irt_is64(ir->t)) spofs += 4, al -= 4; + emit_spstore(as, ir, r, spofs); + } else { + lj_assertA(al == 0 || al == 1, "size %d unexpected", al + 1); + emit_lso(as, al ? A64I_STRH : A64I_STRB, r, RID_SP, spofs); + } + spofs += al + 1; } } +#if LJ_HASFFI && LJ_TARGET_OSX + } else { /* Marker for start of varargs. */ + gpr = REGARG_LASTGPR+1; + fpr = REGARG_LASTFPR+1; + spalign = 7; +#endif } } } @@ -1976,19 +1999,41 @@ static void asm_tail_prep(ASMState *as) /* Ensure there are enough stack slots for call arguments. */ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) { - IRRef args[CCI_NARGS_MAX*2]; +#if LJ_HASFFI uint32_t i, nargs = CCI_XNARGS(ci); - int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; - asm_collectargs(as, ir, ci, args); - for (i = 0; i < nargs; i++) { - if (args[i] && irt_isfp(IR(args[i])->t)) { - if (nfpr > 0) nfpr--; else nslots += 2; - } else { - if (ngpr > 0) ngpr--; else nslots += 2; + if (nargs > (REGARG_NUMGPR < REGARG_NUMFPR ? REGARG_NUMGPR : REGARG_NUMFPR) || + (LJ_TARGET_OSX && (ci->flags & CCI_VARARG))) { + IRRef args[CCI_NARGS_MAX*2]; + int ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; + int spofs = 0, spalign = LJ_TARGET_OSX ? 0 : 7, nslots; + asm_collectargs(as, ir, ci, args); + for (i = 0; i < nargs; i++) { + int al = spalign; + if (!args[i]) { +#if LJ_TARGET_OSX + /* Marker for start of varaargs. */ + nfpr = 0; + ngpr = 0; + spalign = 7; +#endif + } else if (irt_isfp(IR(args[i])->t)) { + if (nfpr > 0) { nfpr--; continue; } +#if LJ_TARGET_OSX + al |= irt_isnum(IR(args[i])->t) ? 7 : 3; +#endif + } else { + if (ngpr > 0) { ngpr--; continue; } +#if LJ_TARGET_OSX + al |= irt_size(IR(args[i])->t) - 1; +#endif + } + spofs = (spofs + 2*al+1) & ~al; /* Align and bump stack pointer. */ } + nslots = (spofs + 3) >> 2; + if (nslots > as->evenspill) /* Leave room for args in stack slots. */ + as->evenspill = nslots; } - if (nslots > as->evenspill) /* Leave room for args in stack slots. */ - as->evenspill = nslots; +#endif return REGSP_HINT(RID_RET); } diff --git a/src/lj_ccall.c b/src/lj_ccall.c index 9001cb5a..00e753b9 100644 --- a/src/lj_ccall.c +++ b/src/lj_ccall.c @@ -348,7 +348,6 @@ goto done; \ } else { \ nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \ - if (LJ_TARGET_OSX && d->size < 8) goto err_nyi; \ } \ } else { /* Try to pass argument in GPRs. */ \ if (!LJ_TARGET_OSX && (d->info & CTF_ALIGN) > CTALIGN_PTR) \ @@ -359,7 +358,6 @@ goto done; \ } else { \ ngpr = maxgpr; /* Prevent reordering. */ \ - if (LJ_TARGET_OSX && d->size < 8) goto err_nyi; \ } \ } @@ -1023,7 +1021,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, CCALL_HANDLE_STRUCTARG } else if (ctype_iscomplex(d->info)) { CCALL_HANDLE_COMPLEXARG - } else { + } else if (!(CCALL_PACK_STACKARG && ctype_isenum(d->info))) { sz = CTSIZE_PTR; } n = (sz + CTSIZE_PTR-1) / CTSIZE_PTR; /* Number of GPRs or stack slots needed. */ @@ -1033,12 +1031,12 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, /* Otherwise pass argument on stack. */ if (CCALL_ALIGN_STACKARG) { /* Align argument on stack. */ MSize align = (1u << ctype_align(d->info)) - 1; - if (rp) + if (rp || (CCALL_PACK_STACKARG && isva && align < CTSIZE_PTR-1)) align = CTSIZE_PTR-1; nsp = (nsp + align) & ~align; } dp = ((uint8_t *)cc->stack) + nsp; - nsp += n * CTSIZE_PTR; + nsp += CCALL_PACK_STACKARG ? sz : n * CTSIZE_PTR; if (nsp > CCALL_SIZE_STACK) { /* Too many arguments. */ err_nyi: lj_err_caller(L, LJ_ERR_FFI_NYICALL); @@ -1053,7 +1051,8 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, } lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg)); /* Extend passed integers to 32 bits at least. */ - if (ctype_isinteger_or_bool(d->info) && d->size < 4) { + if (ctype_isinteger_or_bool(d->info) && d->size < 4 && + (!CCALL_PACK_STACKARG || !((uintptr_t)dp & 3))) { /* Assumes LJ_LE. */ if (d->info & CTF_UNSIGNED) *(uint32_t *)dp = d->size == 1 ? (uint32_t)*(uint8_t *)dp : (uint32_t)*(uint16_t *)dp; diff --git a/src/lj_ccall.h b/src/lj_ccall.h index 57300817..24646d90 100644 --- a/src/lj_ccall.h +++ b/src/lj_ccall.h @@ -75,6 +75,9 @@ typedef union FPRArg { #define CCALL_NARG_FPR 8 #define CCALL_NRET_FPR 4 #define CCALL_SPS_FREE 0 +#if LJ_TARGET_OSX +#define CCALL_PACK_STACKARG 1 +#endif typedef intptr_t GPRArg; typedef union FPRArg { @@ -139,6 +142,9 @@ typedef union FPRArg { #ifndef CCALL_ALIGN_STACKARG #define CCALL_ALIGN_STACKARG 1 #endif +#ifndef CCALL_PACK_STACKARG +#define CCALL_PACK_STACKARG 0 +#endif #ifndef CCALL_ALIGN_CALLSTATE #define CCALL_ALIGN_CALLSTATE 8 #endif diff --git a/src/lj_crecord.c b/src/lj_crecord.c index 04bc895d..d7a522fb 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c @@ -1118,6 +1118,12 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd, ngpr = 1; else if (ctype_cconv(ct->info) == CTCC_FASTCALL) ngpr = 2; +#elif LJ_TARGET_ARM64 +#if LJ_ABI_WIN +#error "NYI: ARM64 Windows ABI calling conventions" +#elif LJ_TARGET_OSX + int ngpr = CCALL_NARG_GPR; +#endif #endif /* Skip initial attributes. */ @@ -1143,6 +1149,14 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd, } else { if (!(ct->info & CTF_VARARG)) lj_trace_err(J, LJ_TRERR_NYICALL); /* Too many arguments. */ +#if LJ_TARGET_ARM64 && LJ_TARGET_OSX + if (ngpr >= 0) { + ngpr = -1; + args[n++] = TREF_NIL; /* Marker for start of varargs. */ + if (n >= CCI_NARGS_MAX) + lj_trace_err(J, LJ_TRERR_NYICALL); + } +#endif did = lj_ccall_ctid_vararg(cts, o); /* Infer vararg type. */ } d = ctype_raw(cts, did); @@ -1151,6 +1165,15 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd, lj_trace_err(J, LJ_TRERR_NYICALL); tr = crec_ct_tv(J, d, 0, *base, o); if (ctype_isinteger_or_bool(d->info)) { +#if LJ_TARGET_ARM64 && LJ_TARGET_OSX + if (!ngpr) { + /* Fixed args passed on the stack use their unpromoted size. */ + if (d->size != lj_ir_type_size[tref_type(tr)]) { + lj_assertJ(d->size == 1 || d->size==2, "unexpected size %d", d->size); + tr = emitconv(tr, d->size==1 ? IRT_U8 : IRT_U16, tref_type(tr), 0); + } + } else +#endif if (d->size < 4) { if ((d->info & CTF_UNSIGNED)) tr = emitconv(tr, IRT_INT, d->size==1 ? IRT_U8 : IRT_U16, 0); @@ -1188,6 +1211,10 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd, } } #endif +#elif LJ_TARGET_ARM64 && LJ_TARGET_OSX + if (!ctype_isfp(d->info) && ngpr) { + ngpr--; + } #endif args[n] = tr; } From c6ee7e19d107b4f9a140bb2ccf99162e26318c69 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Tue, 29 Aug 2023 22:27:38 +0200 Subject: [PATCH 10/13] Update external MSDN URL in code. Thanks to Kyle Marshall. #1060 --- src/lj_jit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lj_jit.h b/src/lj_jit.h index 66aa9aad..911c899c 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -86,7 +86,7 @@ #define JIT_F_OPT_DEFAULT JIT_F_OPT_3 #if LJ_TARGET_WINDOWS || LJ_64 -/* See: http://blogs.msdn.com/oldnewthing/archive/2003/10/08/55239.aspx */ +/* See: https://devblogs.microsoft.com/oldnewthing/20031008-00/?p=42223 */ #define JIT_P_sizemcode_DEFAULT 64 #else /* Could go as low as 4K, but the mmap() overhead would be rather high. */ From 7ff8f26eb852953778736cf244b2884e339d80aa Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Tue, 29 Aug 2023 22:35:10 +0200 Subject: [PATCH 11/13] ARM64: Fix register allocation for IR_*LOAD. Thanks to Peter Cawley. #1062 --- src/lj_asm_arm64.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index 3889883d..c216fced 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -1107,6 +1107,8 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) } type = ra_scratch(as, rset_clear(gpr, tmp)); idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, type), A64I_LDRx); + rset_clear(gpr, idx); + if (ofs & FUSE_REG) rset_clear(gpr, ofs & 31); if (ir->o == IR_VLOAD) ofs += 8 * ir->op2; /* Always do the type check, even if the load result is unused. */ asm_guardcc(as, irt_isnum(ir->t) ? CC_LS : CC_NE); @@ -1114,7 +1116,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t), "bad load type %d", irt_type(ir->t)); emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), - ra_allock(as, LJ_TISNUM << 15, rset_exclude(gpr, idx)), tmp); + ra_allock(as, LJ_TISNUM << 15, gpr), tmp); } else if (irt_isaddr(ir->t)) { emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), type); emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp); From 2f6c451ce8db5b5bc88126c9856e15f25fd5beae Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Tue, 29 Aug 2023 22:38:20 +0200 Subject: [PATCH 12/13] ARM64: Improve register allocation for integer IR_MUL/IR_MULOV. Thanks to Peter Cawley. #1062 --- src/lj_asm_arm64.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index c216fced..5e690308 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -1441,7 +1441,7 @@ static void asm_intneg(ASMState *as, IRIns *ir) static void asm_intmul(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest)); + Reg left = ra_alloc1(as, ir->op1, RSET_GPR); Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); if (irt_isguard(ir->t)) { /* IR_MULOV */ asm_guardcc(as, CC_NE); From 41fb94defa8f830ce69a8122b03f6ac3216d392a Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Wed, 30 Aug 2023 01:10:52 +0200 Subject: [PATCH 13/13] Add randomized register allocation for fuzz testing. This must be explicitly enabled with: -DLUAJIT_RANDOM_RA Thanks to Peter Cawley. #1062 --- src/Makefile.dep | 2 +- src/lj_asm.c | 49 +++++++++++++++++++++++++++++++++++++++++++++ src/lj_target.h | 10 +++++---- src/lj_target_x86.h | 4 ++-- 4 files changed, 58 insertions(+), 7 deletions(-) diff --git a/src/Makefile.dep b/src/Makefile.dep index 400ef8b0..fda77c83 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep @@ -55,7 +55,7 @@ lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_buf.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h \ lj_jit.h lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h \ lj_traceerr.h lj_snap.h lj_asm.h lj_vm.h lj_target.h lj_target_*.h \ - lj_emit_*.h lj_asm_*.h + lj_prng.h lj_emit_*.h lj_asm_*.h lj_assert.o: lj_assert.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \ lj_bcdef.h diff --git a/src/lj_asm.c b/src/lj_asm.c index 71079b30..c02a1b9e 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -29,6 +29,7 @@ #include "lj_dispatch.h" #include "lj_vm.h" #include "lj_target.h" +#include "lj_prng.h" #ifdef LUA_USE_ASSERT #include @@ -93,6 +94,12 @@ typedef struct ASMState { MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */ MCode *realign; /* Realign loop if not NULL. */ +#ifdef LUAJIT_RANDOM_RA + /* Randomize register allocation. OK for fuzz testing, not for production. */ + uint64_t prngbits; + PRNGState prngstate; +#endif + #ifdef RID_NUM_KREF intptr_t krefk[RID_NUM_KREF]; #endif @@ -173,6 +180,41 @@ IRFLDEF(FLOFS) 0 }; +#ifdef LUAJIT_RANDOM_RA +/* Return a fixed number of random bits from the local PRNG state. */ +static uint32_t ra_random_bits(ASMState *as, uint32_t nbits) { + uint64_t b = as->prngbits; + uint32_t res = (1u << nbits) - 1u; + if (b <= res) b = lj_prng_u64(&as->prngstate) | (1ull << 63); + res &= (uint32_t)b; + as->prngbits = b >> nbits; + return res; +} + +/* Pick a random register from a register set. */ +static Reg rset_pickrandom(ASMState *as, RegSet rs) +{ + Reg r = rset_pickbot_(rs); + rs >>= r; + if (rs > 1) { /* More than one bit set? */ + while (1) { + /* We need to sample max. the GPR or FPR half of the set. */ + uint32_t d = ra_random_bits(as, RSET_BITS-1); + if ((rs >> d) & 1) { + r += d; + break; + } + } + } + return r; +} +#define rset_picktop(rs) rset_pickrandom(as, rs) +#define rset_pickbot(rs) rset_pickrandom(as, rs) +#else +#define rset_picktop(rs) rset_picktop_(rs) +#define rset_pickbot(rs) rset_pickbot_(rs) +#endif + /* -- Target-specific instruction emitter --------------------------------- */ #if LJ_TARGET_X86ORX64 @@ -2442,6 +2484,9 @@ void lj_asm_trace(jit_State *J, GCtrace *T) as->realign = NULL; as->loopinv = 0; as->parent = J->parent ? traceref(J, J->parent) : NULL; +#ifdef LUAJIT_RANDOM_RA + (void)lj_prng_u64(&J2G(J)->prng); /* Ensure PRNG step between traces. */ +#endif /* Reserve MCode memory. */ as->mctop = as->mctoporig = lj_mcode_reserve(J, &as->mcbot); @@ -2483,6 +2528,10 @@ void lj_asm_trace(jit_State *J, GCtrace *T) #endif as->ir = J->curfinal->ir; /* Use the copied IR. */ as->curins = J->cur.nins = as->orignins; +#ifdef LUAJIT_RANDOM_RA + as->prngstate = J2G(J)->prng; /* Must (re)start from identical state. */ + as->prngbits = 0; +#endif RA_DBG_START(); RA_DBGX((as, "===== STOP =====")); diff --git a/src/lj_target.h b/src/lj_target.h index 2f4d21c1..09d19bd9 100644 --- a/src/lj_target.h +++ b/src/lj_target.h @@ -57,8 +57,10 @@ typedef uint32_t RegSP; */ #if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 typedef uint64_t RegSet; +#define RSET_BITS 6 #else typedef uint32_t RegSet; +#define RSET_BITS 5 #endif #define RID2RSET(r) (((RegSet)1) << (r)) @@ -70,11 +72,11 @@ typedef uint32_t RegSet; #define rset_clear(rs, r) (rs &= ~RID2RSET(r)) #define rset_exclude(rs, r) (rs & ~RID2RSET(r)) #if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 -#define rset_picktop(rs) ((Reg)(__builtin_clzll(rs)^63)) -#define rset_pickbot(rs) ((Reg)__builtin_ctzll(rs)) +#define rset_picktop_(rs) ((Reg)(__builtin_clzll(rs)^63)) +#define rset_pickbot_(rs) ((Reg)__builtin_ctzll(rs)) #else -#define rset_picktop(rs) ((Reg)lj_fls(rs)) -#define rset_pickbot(rs) ((Reg)lj_ffs(rs)) +#define rset_picktop_(rs) ((Reg)lj_fls(rs)) +#define rset_pickbot_(rs) ((Reg)lj_ffs(rs)) #endif /* -- Register allocation cost -------------------------------------------- */ diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index 7b8d62ad..3482309b 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h @@ -116,8 +116,8 @@ enum { #if LJ_64 /* Prefer the low 8 regs of each type to reduce REX prefixes. */ -#undef rset_picktop -#define rset_picktop(rs) (lj_fls(lj_bswap(rs)) ^ 0x18) +#undef rset_picktop_ +#define rset_picktop_(rs) (lj_fls(lj_bswap(rs)) ^ 0x18) #endif /* -- Spill slots --------------------------------------------------------- */