From 3bdc6498c4c012a8fbf9cfa2756a5b07f56f1540 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 15 Aug 2024 00:07:34 +0200 Subject: [PATCH 1/3] Limit CSE for IR_CARG to fix loop optimizations. Thanks to Peter Cawley. #1244 --- src/lj_opt_fold.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index b437d672..98ec28c6 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -2171,6 +2171,17 @@ LJFOLD(CNEW any any) LJFOLD(XSNEW any any) LJFOLDX(lj_ir_emit) +/* -- Miscellaneous ------------------------------------------------------- */ + +LJFOLD(CARG any any) +LJFOLDF(cse_carg) +{ + TRef tr = lj_opt_cse(J); + if (tref_ref(tr) < J->chain[IR_LOOP]) /* CSE across loop? */ + return EMITFOLD; /* Raw emit. Assumes fins is left intact by CSE. */ + return tr; +} + /* ------------------------------------------------------------------------ */ /* Every entry in the generated hash table is a 32 bit pattern: From 7369eff67d46d7f5fac9ee064e3fbf97a15458de Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 15 Aug 2024 00:10:01 +0200 Subject: [PATCH 2/3] Fix IR_ABC hoisting. Reported by pwnhacker0x18. Fixed by Peter Cawley. #1194 --- src/lj_opt_fold.c | 5 +++-- src/lj_record.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 98ec28c6..622ff0a9 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -1702,9 +1702,10 @@ LJFOLDF(abc_k) LJFOLD(ABC any any) LJFOLDF(abc_invar) { - /* Invariant ABC marked as PTR. Drop if op1 is invariant, too. */ + /* Invariant ABC marked as P32 or U32. Drop if op1 is invariant too. */ if (!irt_isint(fins->t) && fins->op1 < J->chain[IR_LOOP] && - !irt_isphi(IR(fins->op1)->t)) + (irt_isu32(fins->t) || + (!irref_isk(fins->op1) && !irt_isphi(IR(fins->op1)->t)))) return DROPFOLD; return NEXTFOLD; } diff --git a/src/lj_record.c b/src/lj_record.c index f2a06f41..207327b3 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -1069,12 +1069,13 @@ static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize) /* Runtime value for stop of loop is within bounds? */ if ((uint64_t)stop + ofs < (uint64_t)asize) { /* Emit invariant bounds check for stop. */ - emitir(IRTG(IR_ABC, IRT_P32), asizeref, ofs == 0 ? J->scev.stop : + uint32_t abc = IRTG(IR_ABC, tref_isk(asizeref) ? IRT_U32 : IRT_P32); + emitir(abc, asizeref, ofs == 0 ? J->scev.stop : emitir(IRTI(IR_ADD), J->scev.stop, ofsref)); /* Emit invariant bounds check for start, if not const or negative. */ if (!(J->scev.dir && J->scev.start && (int64_t)IR(J->scev.start)->i + ofs >= 0)) - emitir(IRTG(IR_ABC, IRT_P32), asizeref, ikey); + emitir(abc, asizeref, ikey); return; } } From 86e7123bb1782a5f200ba5e83b8c4f3fbad4f7bc Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 15 Aug 2024 00:17:19 +0200 Subject: [PATCH 3/3] Different fix for partial snapshot restore due to stack overflow. Reported by Junlong Li. Fixed by Peter Cawley. #1196 --- src/lj_snap.c | 3 ++- src/lj_trace.c | 4 +--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/lj_snap.c b/src/lj_snap.c index 9858c110..0c317b52 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c @@ -873,7 +873,8 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) lua_State *L = J->L; /* Set interpreter PC to the next PC to get correct error messages. */ - setcframe_pc(cframe_raw(L->cframe), pc+1); + setcframe_pc(L->cframe, pc+1); + setcframe_pc(cframe_raw(cframe_prev(L->cframe)), pc); /* Make sure the stack is big enough for the slots from the snapshot. */ if (LJ_UNLIKELY(L->base + snap->topslot >= tvref(L->maxstack))) { diff --git a/src/lj_trace.c b/src/lj_trace.c index 8385f3d1..d015f2ab 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -788,10 +788,8 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) exd.J = J; exd.exptr = exptr; errcode = lj_vm_cpcall(L, NULL, &exd, trace_exit_cp); - if (errcode) { - setcframe_pc(cframe_raw(L->cframe), L); /* Point to any valid memory. */ + if (errcode) return -errcode; /* Return negated error code. */ - } lj_vmevent_send(L, TEXIT, lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK);