diff --git a/doc/changes.html b/doc/changes.html
index 64dc4c2a..febb03c7 100644
--- a/doc/changes.html
+++ b/doc/changes.html
@@ -90,7 +90,7 @@ Please take a look at the commit history for more details.
Improvements to the JIT compiler:
-- Add trace stitching (disabled for now).
+- Add trace stitching.
- Compile various builtins: string.char(), string.reverse(), string.lower(), string.upper(), string.rep(), string.format(), table.concat(), bit.tohex(), getfenv(0), debug.getmetatable().
- Compile string.find() for fixed string searches (no patterns).
- Compile BC_TSETM, e.g. {1,2,3,f()}.
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c
index a08113ca..6cc05a24 100644
--- a/src/lj_ffrecord.c
+++ b/src/lj_ffrecord.c
@@ -96,18 +96,10 @@ static ptrdiff_t results_wanted(jit_State *J)
return -1;
}
-#ifdef LUAJIT_TRACE_STITCHING
-/* This feature is disabled for now due to a design mistake. Sorry.
-**
-** It causes unpredictable behavior and crashes when a full trace flush
-** happens with a stitching continuation still in the stack somewhere.
-*/
-
/* Trace stitching: add continuation below frame to start a new trace. */
static void recff_stitch(jit_State *J)
{
ASMFunction cont = lj_cont_stitch;
- TraceNo traceno = J->cur.traceno;
lua_State *L = J->L;
TValue *base = L->base;
const BCIns *pc = frame_pc(base-1);
@@ -120,7 +112,7 @@ static void recff_stitch(jit_State *J)
setframe_ftsz(base+1, ((char *)(base+1) - (char *)pframe) + FRAME_CONT);
setcont(base, cont);
setframe_pc(base, pc);
- if (LJ_DUALNUM) setintV(base-1, traceno); else base[-1].u64 = traceno;
+ setnilV(base-1); /* Incorrect, but rec_check_slots() won't run anymore. */
L->base += 2;
L->top += 2;
@@ -132,7 +124,9 @@ static void recff_stitch(jit_State *J)
trcont = lj_ir_kptr(J, (void *)cont);
#endif
J->base[0] = trcont | TREF_CONT;
- J->base[-1] = LJ_DUALNUM ? lj_ir_kint(J,traceno) : lj_ir_knum_u64(J,traceno);
+ J->ktracep = lj_ir_k64_reserve(J);
+ lua_assert(irt_toitype_(IRT_P64) == LJ_TTRACE);
+ J->base[-1] = emitir(IRT(IR_XLOAD, IRT_P64), lj_ir_kptr(J, &J->ktracep->gcr), 0);
J->base += 2;
J->baseslot += 2;
J->framedepth++;
@@ -181,31 +175,6 @@ static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd)
/* Must stop the trace for classic C functions with arbitrary side-effects. */
#define recff_c recff_nyi
-#else
-/* Fallback handler for fast functions that are not recorded (yet). */
-static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd)
-{
- setfuncV(J->L, &J->errinfo, J->fn);
- lj_trace_err_info(J, LJ_TRERR_NYIFF);
- UNUSED(rd);
-}
-
-/* Throw error for unsupported variant of fast function. */
-LJ_NORET static void recff_nyiu(jit_State *J, RecordFFData *rd)
-{
- setfuncV(J->L, &J->errinfo, J->fn);
- lj_trace_err_info(J, LJ_TRERR_NYIFFU);
- UNUSED(rd);
-}
-
-/* Must abort the trace for classic C functions with arbitrary side-effects. */
-static void LJ_FASTCALL recff_c(jit_State *J, RecordFFData *rd)
-{
- setfuncV(J->L, &J->errinfo, J->fn);
- lj_trace_err_info(J, LJ_TRERR_NYICF);
- UNUSED(rd);
-}
-#endif
/* Emit BUFHDR for the global temporary buffer. */
static TRef recff_bufhdr(jit_State *J)
diff --git a/src/lj_gc.c b/src/lj_gc.c
index 99d664aa..afd39972 100644
--- a/src/lj_gc.c
+++ b/src/lj_gc.c
@@ -69,7 +69,7 @@ static void gc_mark(global_State *g, GCobj *o)
gray2black(o); /* Closed upvalues are never gray. */
} else if (gct != ~LJ_TSTR && gct != ~LJ_TCDATA) {
lua_assert(gct == ~LJ_TFUNC || gct == ~LJ_TTAB ||
- gct == ~LJ_TTHREAD || gct == ~LJ_TPROTO);
+ gct == ~LJ_TTHREAD || gct == ~LJ_TPROTO || gct == ~LJ_TTRACE);
setgcrefr(o->gch.gclist, g->gc.gray);
setgcref(g->gc.gray, o);
}
diff --git a/src/lj_ir.c b/src/lj_ir.c
index 9682e05e..567aec86 100644
--- a/src/lj_ir.c
+++ b/src/lj_ir.c
@@ -209,24 +209,13 @@ void lj_ir_k64_freeall(jit_State *J)
lj_mem_free(J2G(J), k, sizeof(K64Array));
k = next;
}
+ setmref(J->k64, NULL);
}
-/* Find 64 bit constant in chained array or add it. */
-cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64)
+/* Get new 64 bit constant slot. */
+static TValue *ir_k64_add(jit_State *J, K64Array *kp, uint64_t u64)
{
- K64Array *k, *kp = NULL;
TValue *ntv;
- MSize idx;
- /* Search for the constant in the whole chain of arrays. */
- for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) {
- kp = k; /* Remember previous element in list. */
- for (idx = 0; idx < k->numk; idx++) { /* Search one array. */
- TValue *tv = &k->k[idx];
- if (tv->u64 == u64) /* Needed for +-0/NaN/absmask. */
- return tv;
- }
- }
- /* Constant was not found, need to add it. */
if (!(kp && kp->numk < LJ_MIN_K64SZ)) { /* Allocate a new array. */
K64Array *kn = lj_mem_newt(J->L, sizeof(K64Array), K64Array);
setmref(kn->next, NULL);
@@ -242,6 +231,33 @@ cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64)
return ntv;
}
+/* Find 64 bit constant in chained array or add it. */
+cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64)
+{
+ K64Array *k, *kp = NULL;
+ MSize idx;
+ /* Search for the constant in the whole chain of arrays. */
+ for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) {
+ kp = k; /* Remember previous element in list. */
+ for (idx = 0; idx < k->numk; idx++) { /* Search one array. */
+ TValue *tv = &k->k[idx];
+ if (tv->u64 == u64) /* Needed for +-0/NaN/absmask. */
+ return tv;
+ }
+ }
+ /* Otherwise add a new constant. */
+ return ir_k64_add(J, kp, u64);
+}
+
+TValue *lj_ir_k64_reserve(jit_State *J)
+{
+ K64Array *k, *kp = NULL;
+ lj_ir_k64_find(J, 0); /* Intern dummy 0 to protect the reserved slot. */
+ /* Find last K64Array, if any. */
+ for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) kp = k;
+ return ir_k64_add(J, kp, 0); /* Set to 0. Final value is set later. */
+}
+
/* Intern 64 bit constant, given by its address. */
TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv)
{
diff --git a/src/lj_iropt.h b/src/lj_iropt.h
index 4e424e70..4106ef8a 100644
--- a/src/lj_iropt.h
+++ b/src/lj_iropt.h
@@ -40,6 +40,7 @@ static LJ_AINLINE IRRef lj_ir_nextins(jit_State *J)
LJ_FUNC TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k);
LJ_FUNC void lj_ir_k64_freeall(jit_State *J);
LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv);
+LJ_FUNC TValue *lj_ir_k64_reserve(jit_State *J);
LJ_FUNC cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64);
LJ_FUNC TRef lj_ir_knum_u64(jit_State *J, uint64_t u64);
LJ_FUNC TRef lj_ir_knumint(jit_State *J, lua_Number n);
diff --git a/src/lj_jit.h b/src/lj_jit.h
index 1df56cae..10900bf6 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -381,6 +381,7 @@ typedef struct jit_State {
GCRef *trace; /* Array of traces. */
TraceNo freetrace; /* Start of scan for next free trace. */
MSize sizetrace; /* Size of trace array. */
+ TValue *ktracep; /* Pointer to K64Array slot with GCtrace pointer. */
IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */
TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */
diff --git a/src/lj_snap.c b/src/lj_snap.c
index d8e7987c..fa9abb74 100644
--- a/src/lj_snap.c
+++ b/src/lj_snap.c
@@ -631,8 +631,8 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
} else if (irt_isnum(t)) {
setnumV(o, ex->fpr[r-RID_MIN_FPR]);
#endif
- } else if (LJ_64 && irt_islightud(t)) {
- /* 64 bit lightuserdata which may escape already has the tag bits. */
+ } else if (LJ_64 && irt_is64(t)) {
+ /* 64 bit values that already have the tag bits. */
o->u64 = ex->gpr[r-RID_MIN_GPR];
} else if (irt_ispri(t)) {
setpriV(o, irt_toitype(t));
diff --git a/src/lj_trace.c b/src/lj_trace.c
index 42f4321d..1d0c2e5e 100644
--- a/src/lj_trace.c
+++ b/src/lj_trace.c
@@ -274,7 +274,7 @@ int lj_trace_flushall(lua_State *L)
if (T->root == 0)
trace_flushroot(J, T);
lj_gdbjit_deltrace(J, T);
- T->traceno = 0;
+ T->traceno = T->link = 0; /* Blacklist the link for cont_stitch. */
setgcrefnull(J->trace[i]);
}
}
@@ -284,6 +284,7 @@ int lj_trace_flushall(lua_State *L)
memset(J->penalty, 0, sizeof(J->penalty));
/* Free the whole machine code and invalidate all exit stub groups. */
lj_mcode_free(J);
+ lj_ir_k64_freeall(J);
memset(J->exitstubgroup, 0, sizeof(J->exitstubgroup));
lj_vmevent_send(L, TRACE,
setstrV(L, L->top++, lj_str_newlit(L, "flush"));
@@ -402,6 +403,7 @@ static void trace_start(jit_State *J)
J->postproc = LJ_POST_NONE;
lj_resetsplit(J);
J->retryrec = 0;
+ J->ktracep = NULL;
setgcref(J->cur.startpt, obj2gco(J->pt));
L = J->L;
@@ -477,6 +479,9 @@ static void trace_stop(jit_State *J)
lj_mcode_commit(J, J->cur.mcode);
J->postproc = LJ_POST_NONE;
trace_save(J, T);
+ if (J->ktracep) { /* Patch K64Array slot with the final GCtrace pointer. */
+ setgcV(J->L, J->ktracep, obj2gco(T), LJ_TTRACE);
+ }
L = J->L;
lj_vmevent_send(L, TRACE,
diff --git a/src/lj_traceerr.h b/src/lj_traceerr.h
index 12e90d03..d434be15 100644
--- a/src/lj_traceerr.h
+++ b/src/lj_traceerr.h
@@ -25,8 +25,6 @@ TREDEF(BADTYPE, "bad argument type")
TREDEF(CJITOFF, "JIT compilation disabled for function")
TREDEF(CUNROLL, "call unroll limit reached")
TREDEF(DOWNREC, "down-recursion, restarting")
-TREDEF(NYICF, "NYI: C function %s")
-TREDEF(NYIFF, "NYI: FastFunc %s")
TREDEF(NYIFFU, "NYI: unsupported variant of FastFunc %s")
TREDEF(NYIRETL, "NYI: return to lower frame")
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc
index 0bd9b147..af722f9e 100644
--- a/src/vm_arm.dasc
+++ b/src/vm_arm.dasc
@@ -2086,7 +2086,7 @@ static void build_subroutines(BuildCtx *ctx)
| // RA = resultptr, CARG4 = meta base
| ldr RB, SAVE_MULTRES
| ldr INS, [PC, #-4]
- | ldr CARG3, [CARG4, #-24] // Save previous trace number.
+ | ldr TRACE:CARG3, [CARG4, #-24] // Save previous trace.
| subs RB, RB, #8
| decode_RA8 RC, INS // Call base.
| beq >2
@@ -2101,23 +2101,20 @@ static void build_subroutines(BuildCtx *ctx)
| decode_RA8 RA, INS
| decode_RB8 RB, INS
| add RA, RA, RB
- | ldr CARG1, [DISPATCH, #DISPATCH_J(trace)]
|3:
| cmp RA, RC
| mvn CARG2, #~LJ_TNIL
| bhi >9 // More results wanted?
|
- | ldr TRACE:RA, [CARG1, CARG3, lsl #2]
- | cmp TRACE:RA, #0
- | beq ->cont_nop
- | ldrh RC, TRACE:RA->link
- | cmp RC, CARG3
+ | ldrh RA, TRACE:CARG3->traceno
+ | ldrh RC, TRACE:CARG3->link
+ | cmp RC, RA
| beq ->cont_nop // Blacklisted.
| cmp RC, #0
| bne =>BC_JLOOP // Jump to stitched trace.
|
| // Stitch a new trace to the previous trace.
- | str CARG3, [DISPATCH, #DISPATCH_J(exitno)]
+ | str RA, [DISPATCH, #DISPATCH_J(exitno)]
| str L, [DISPATCH, #DISPATCH_J(L)]
| str BASE, L->base
| sub CARG1, DISPATCH, #-GG_DISP2J
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
index 7cfdf4b1..134ed569 100644
--- a/src/vm_mips.dasc
+++ b/src/vm_mips.dasc
@@ -2015,7 +2015,7 @@ static void build_subroutines(BuildCtx *ctx)
|.if JIT
| // RA = resultptr, RB = meta base
| lw INS, -4(PC)
- | lw TMP3, -24+LO(RB) // Save previous trace number.
+ | lw TMP2, -24+LO(RB) // Save previous trace.
| decode_RA8a RC, INS
| addiu AT, MULTRES, -8
| decode_RA8b RC
@@ -2034,17 +2034,13 @@ static void build_subroutines(BuildCtx *ctx)
| decode_RA8b RA
| decode_RB8b RB
| addu RA, RA, RB
- | lw TMP1, DISPATCH_J(trace)(DISPATCH)
| addu RA, BASE, RA
|3:
| sltu AT, RC, RA
| bnez AT, >9 // More results wanted?
- |. sll TMP2, TMP3, 2
+ |. nop
|
- | addu TMP2, TMP1, TMP2
- | lw TRACE:TMP2, 0(TMP2)
- | beqz TRACE:TMP2, ->cont_nop
- |. nop
+ | lhu TMP3, TRACE:TMP2->traceno
| lhu RD, TRACE:TMP2->link
| beq RD, TMP3, ->cont_nop // Blacklisted.
|. load_got lj_dispatch_stitch
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
index 9299c554..0d6915fd 100644
--- a/src/vm_ppc.dasc
+++ b/src/vm_ppc.dasc
@@ -2525,7 +2525,7 @@ static void build_subroutines(BuildCtx *ctx)
|.if JIT
| // RA = resultptr, RB = meta base
| lwz INS, -4(PC)
- | lwz TMP3, -20(RB) // Save previous trace number.
+ | lwz TRACE:TMP2, -20(RB) // Save previous trace.
| addic. TMP1, MULTRES, -8
| decode_RA8 RC, INS // Call base.
| beq >2
@@ -2540,15 +2540,11 @@ static void build_subroutines(BuildCtx *ctx)
| decode_RA8 RA, INS
| decode_RB8 RB, INS
| add RA, RA, RB
- | lwz TMP1, DISPATCH_J(trace)(DISPATCH)
|3:
| cmplw RA, RC
| bgt >9 // More results wanted?
|
- | slwi TMP2, TMP3, 2
- | lwzx TRACE:TMP2, TMP1, TMP2
- | cmpwi TRACE:TMP2, 0
- | beq ->cont_nop
+ | lhz TMP3, TRACE:TMP2->traceno
| lhz RD, TRACE:TMP2->link
| cmpw RD, TMP3
| cmpwi cr1, RD, 0
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index f31e595b..96ac1da8 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -2667,8 +2667,8 @@ static void build_subroutines(BuildCtx *ctx)
|->cont_stitch: // Trace stitching.
|.if JIT
| // BASE = base, RC = result, RB = mbase
- | mov RA, [RB-24] // Save previous trace number.
- | mov TMP1, RA
+ | mov TRACE:RA, [RB-24] // Save previous trace.
+ | mov TMP1, TRACE:RA
| mov TMP3, DISPATCH // Need one more register.
| mov DISPATCH, MULTRES
| movzx RA, PC_RA
@@ -2699,11 +2699,8 @@ static void build_subroutines(BuildCtx *ctx)
| ja >9 // More results wanted?
|
| mov DISPATCH, TMP3
- | mov RB, TMP1 // Get previous trace number.
- | mov RA, [DISPATCH+DISPATCH_J(trace)]
- | mov TRACE:RD, [RA+RB*4]
- | test TRACE:RD, TRACE:RD
- | jz ->cont_nop
+ | mov TRACE:RD, TMP1 // Get previous trace.
+ | movzx RB, word TRACE:RD->traceno
| movzx RD, word TRACE:RD->link
| cmp RD, RB
| je ->cont_nop // Blacklisted.