Improve sinking

This commit is contained in:
XmiliaH 2021-01-05 19:37:17 +01:00
parent 1e66d0f9e6
commit c956050a12
4 changed files with 231 additions and 129 deletions

View File

@ -387,12 +387,16 @@ local function dump_snap(tr)
end
-- Return a register name or stack slot for a rid/sp location.
local function ridsp_name(ridsp, ins)
local function ridsp_name(ridsp, ins, op)
if not disass then disass = require("jit.dis_"..jit.arch) end
local rid, slot = band(ridsp, 0xff), shr(ridsp, 8)
if rid == 253 or rid == 254 then
if op == "TNEW " or op == "TDUP " or op == "CNEW " then
return (slot == 0) and " {sink" or format(" {ri%02d", slot)
else
return (slot == 0 or slot == 255) and " {sink" or format(" {%04d", ins-slot)
end
end
if ridsp > 255 then return format("[%x]", slot*4) end
if rid < 128 then return disass.regname(rid) end
return ""
@ -476,7 +480,7 @@ local function dump_ir(tr, dumpsnap, dumpreg)
(dumpreg or op ~= "RENAME") then
local rid = band(ridsp, 255)
if dumpreg then
out:write(format("%04d %-6s", ins, ridsp_name(ridsp, ins)))
out:write(format("%04d %-6s", ins, ridsp_name(ridsp, ins, op)))
else
out:write(format("%04d ", ins))
end

View File

@ -302,7 +302,7 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...)
#define ra_weak(as, r) rset_set(as->weakset, (r))
#define ra_noweak(as, r) rset_clear(as->weakset, (r))
#define ra_used(ir) (ra_hasreg((ir)->r) || ra_hasspill((ir)->s))
#define ra_used(ir) (ra_hasreg((ir)->r) || ((ir)->r != RID_SUNK && (ir)->r != RID_SINK && ra_hasspill((ir)->s)))
/* Setup register allocator. */
static void ra_setup(ASMState *as)
@ -884,7 +884,9 @@ static int asm_sunk_store(ASMState *as, IRIns *ira, IRIns *irs)
}
return 0;
} else {
return (ira + irs->s == irs); /* Quick check. */
if (ira + irs->s != irs) return 0;
return irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
irs->o == IR_FSTORE || irs->o == IR_XSTORE;
}
}

View File

@ -14,6 +14,7 @@
#include "lj_jit.h"
#include "lj_iropt.h"
#include "lj_target.h"
#include "lj_dispatch.h"
/* Some local macros to save typing. Undef'd at the end. */
#define IR(ref) (&J->cur.ir[(ref)])
@ -52,6 +53,8 @@ static int sink_checkphi(jit_State *J, IRIns *ira, IRRef ref)
IRIns *ir = IR(ref);
if (irt_isphi(ir->t) || (ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT &&
irt_isphi(IR(ir->op1)->t))) {
if ((ira->prev & 0x1FFF) == 0x1FFF)
return 0; /* This would cause an overflow, just force the allocation to not be sunken. */
ira->prev++;
return 1; /* Sinkable PHI. */
}
@ -61,6 +64,14 @@ static int sink_checkphi(jit_State *J, IRIns *ira, IRRef ref)
return 1; /* Constant (non-PHI). */
}
/* Set prev of all instructions to 0. */
static void sink_prepare(jit_State *J) {
IRIns *ir, *irlast = IR(J->cur.nins-1);
for (ir = irlast ; ir->o != IR_BASE; ir--) {
ir->prev = 0;
}
}
/* Mark non-sinkable allocations using single-pass backward propagation.
**
** Roots for the marking process are:
@ -71,13 +82,20 @@ static int sink_checkphi(jit_State *J, IRIns *ira, IRRef ref)
** - Stores with non-constant keys.
** - All stored values.
*/
static void sink_mark_ins(jit_State *J)
static int sink_mark_ins(jit_State *J, int lightsink)
{
int remark = 0;
int heavysinks = 0;
IRIns *ir, *irlast = IR(J->cur.nins-1);
for (ir = irlast ; ; ir--) {
switch (ir->o) {
case IR_BASE:
return; /* Finished. */
if (!remark)
return heavysinks;
ir = irlast + 1;
remark = 0;
heavysinks = 0;
break;
case IR_ALOAD: case IR_HLOAD: case IR_XLOAD: case IR_TBAR: case IR_ALEN:
irt_setmark(IR(ir->op1)->t); /* Mark ref for remaining loads. */
break;
@ -87,9 +105,16 @@ static void sink_mark_ins(jit_State *J)
break;
case IR_ASTORE: case IR_HSTORE: case IR_FSTORE: case IR_XSTORE: {
IRIns *ira = sink_checkalloc(J, ir);
if (!ira || (irt_isphi(ira->t) && !sink_checkphi(J, ira, ir->op2)))
IRIns *irv = IR(ir->op2);
if (!ira || (irt_isphi(ira->t) && !sink_checkphi(J, ira, ir->op2)) || (irt_ismarked(ira->t))) {
irt_setmark(IR(ir->op1)->t); /* Mark ineligible ref. */
irt_setmark(IR(ir->op2)->t); /* Mark stored value. */
irt_setmark(irv->t); /* Mark stored value. */
} else if (lightsink || (irv->o != IR_TNEW && irv->o != IR_TDUP && irv->o != IR_CNEW)) {
irt_setmark(irv->t);
} else {
ira->prev |= 0x2000; /* For this allocation is a store that assumes it is sinkable. */
irv->prev |= 0x4000; /* The sunken allocation is required for a other sunken allocation. It requires a global index. */
}
break;
}
#if LJ_HASFFI
@ -112,7 +137,8 @@ static void sink_mark_ins(jit_State *J)
break;
case IR_PHI: {
IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
irl->prev = irr->prev = 0; /* Clear PHI value counts. */
irl->prev &= 0xC000;
irr->prev &= 0xC000; /* Clear PHI value counts. */
if (irl->o == irr->o &&
(irl->o == IR_TNEW || irl->o == IR_TDUP ||
(LJ_HASFFI && (irl->o == IR_CNEW || irl->o == IR_CNEWI))))
@ -121,6 +147,21 @@ static void sink_mark_ins(jit_State *J)
irt_setmark(irr->t);
break;
}
case IR_TNEW: case IR_TDUP: case IR_CNEW:
if ((ir->prev & 0x2000) && irt_ismarked(ir->t)) {
ir->prev &= ~0x2000;
remark = 1; /* There is an store that assumed that this allocation can be sunken, but it can't. We need to redo the whool process so that this store can mark it's value. */
}
if (!irt_ismarked(ir->t) && (ir->prev & 0x4000)) {
ir->prev |= 0x8000;
heavysinks++;
} else {
ir->prev &= ~0x8000;
}
ir->prev &= ~0x4000;
if (!irt_isphi(ir->t))
ir->prev &= ~0x2000;
/* fallthrough */
default:
if (irt_ismarked(ir->t) || irt_isguard(ir->t)) { /* Propagate mark. */
if (ir->op1 >= REF_FIRST) irt_setmark(IR(ir->op1)->t);
@ -144,26 +185,34 @@ static void sink_mark_snap(jit_State *J, SnapShot *snap)
}
/* Iteratively remark PHI refs with differing marks or PHI value counts. */
static void sink_remark_phi(jit_State *J)
static int sink_remark_phi(jit_State *J)
{
IRIns *ir;
int remark;
int require_remark = 0;
do {
remark = 0;
for (ir = IR(J->cur.nins-1); ir->o == IR_PHI; ir--) {
IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
if (!((irl->t.irt ^ irr->t.irt) & IRT_MARK) && irl->prev == irr->prev)
if (!((irl->t.irt ^ irr->t.irt) & IRT_MARK) && (irl->prev & 0x1FFF) == (irr->prev & 0x1FFF))
continue;
remark |= (~(irl->t.irt & irr->t.irt) & IRT_MARK);
if ((IR(ir->op1)->prev & 0x2000) || (IR(ir->op2)->prev & 0x2000)) {
IR(ir->op1)->prev &= ~0x2000;
IR(ir->op2)->prev &= ~0x2000;
require_remark |= (~(irl->t.irt & irr->t.irt) & IRT_MARK);
}
irt_setmark(IR(ir->op1)->t);
irt_setmark(IR(ir->op2)->t);
}
} while (remark);
return require_remark;
}
/* Sweep instructions and tag sunken allocations and stores. */
static void sink_sweep_ins(jit_State *J)
{
int index = 0;
IRIns *ir, *irbase = IR(REF_BASE);
for (ir = IR(J->cur.nins-1) ; ir >= irbase; ir--) {
switch (ir->o) {
@ -190,8 +239,14 @@ static void sink_sweep_ins(jit_State *J)
#endif
case IR_TNEW: case IR_TDUP:
if (!irt_ismarked(ir->t)) {
ir->t.irt &= ~IRT_GUARD;
if (ir->prev & 0x8000) {
index++; /* A sunken store requires this for unsinking. */
lj_assertJ(index <= 0xFF, "Too many heavy sinks");
ir->prev = REGSP(RID_SINK, index);
} else {
ir->prev = REGSP(RID_SINK, 0);
}
ir->t.irt &= ~IRT_GUARD;
J->cur.sinktags = 1; /* Signal present SINK tags to assembler. */
} else {
irt_clearmark(ir->t);
@ -236,11 +291,15 @@ void lj_opt_sink(jit_State *J)
if ((J->flags & need) == need &&
(J->chain[IR_TNEW] || J->chain[IR_TDUP] ||
(LJ_HASFFI && (J->chain[IR_CNEW] || J->chain[IR_CNEWI])))) {
sink_prepare(J);
if (!J->loopref)
sink_mark_snap(J, &J->cur.snap[J->cur.nsnap-1]);
sink_mark_ins(J);
if (J->loopref)
sink_remark_phi(J);
int heavysinks;
int dolightsink = 0;
do {
heavysinks = sink_mark_ins(J, dolightsink);
dolightsink |= heavysinks >= 0xFF;
} while ((J->loopref && sink_remark_phi(J)) || heavysinks >= 0xFF);
sink_sweep_ins(J);
}
}

View File

@ -428,21 +428,6 @@ static TRef snap_dedup(jit_State *J, SnapEntry *map, MSize nmax, IRRef ref)
return 0;
}
/* Emit parent reference with de-duplication. */
static TRef snap_pref(jit_State *J, GCtrace *T, SnapEntry *map, MSize nmax,
BloomFilter seen, IRRef ref)
{
IRIns *ir = &T->ir[ref];
TRef tr;
if (irref_isk(ref))
tr = snap_replay_const(J, ir);
else if (!regsp_used(ir->prev))
tr = 0;
else if (!bloomtest(seen, ref) || (tr = snap_dedup(J, map, nmax, ref)) == 0)
tr = emitir(IRT(IR_PVAL, irt_type(ir->t)), ref - REF_BIAS, 0);
return tr;
}
/* Check whether a sunk store corresponds to an allocation. Slow path. */
static int snap_sunk_store2(GCtrace *T, IRIns *ira, IRIns *irs)
{
@ -460,10 +445,110 @@ static int snap_sunk_store2(GCtrace *T, IRIns *ira, IRIns *irs)
static LJ_AINLINE int snap_sunk_store(GCtrace *T, IRIns *ira, IRIns *irs)
{
if (irs->s != 255)
return (ira + irs->s == irs); /* Fast check. */
return ira + irs->s == irs && (irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
irs->o == IR_FSTORE || irs->o == IR_XSTORE); /* Fast check. */
return snap_sunk_store2(T, ira, irs);
}
/* Emit parent reference with de-duplication. */
static TRef snap_pref(jit_State *J, GCtrace *T, SnapEntry *map, MSize nmax,
BloomFilter seen, IRRef ref, TRef* heavy_restores, int restore)
{
IRIns *ir = &T->ir[ref];
TRef tr;
if (irref_isk(ref))
tr = snap_replay_const(J, ir);
else if (ir->r == RID_SUNK) {
SnapShot *snap = &T->snap[J->exitno];
IRIns *irlast = &T->ir[snap->ref];
TRef op1, op2;
tr = ir->s > 0 ? heavy_restores[ir->s - 1] : 0;
if (tr == 0) {
op1 = ir->op1;
if (op1 >= T->nk) op1 = snap_pref(J, T, map, nmax, seen, op1, heavy_restores, restore);
op2 = ir->op2;
if (op2 >= T->nk) op2 = snap_pref(J, T, map, nmax, seen, op2, heavy_restores, restore);
if (LJ_HASFFI && ir->o == IR_CNEWI) {
if (LJ_32 && ref+1 < T->nins && (ir+1)->o == IR_HIOP) {
TRef pref = snap_pref(J, T, map, nmax, seen, (ir+1)->op2, heavy_restores, restore);
if (restore) {
lj_needsplit(J); /* Emit joining HIOP. */
op2 = emitir_raw(IRT(IR_HIOP, IRT_I64), op2, pref);
}
}
tr = restore ? emitir(ir->ot & ~(IRT_MARK|IRT_ISPHI), op1, op2) : 1;
if(ir->s > 0)
heavy_restores[ir->s - 1] = tr;
} else {
IRIns *irs;
tr = restore ? emitir(ir->ot, op1, op2) : 1;
if(ir->s > 0)
heavy_restores[ir->s - 1] = tr;
for (irs = ir+1; irs < irlast; irs++)
if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
IRIns *irr = &T->ir[irs->op1];
TRef val, key = irr->op2, tmp = tr;
if (restore) {
if (irr->o != IR_FREF) {
IRIns *irk = &T->ir[key];
if (irr->o == IR_HREFK)
key = lj_ir_kslot(J, snap_replay_const(J, &T->ir[irk->op1]),
irk->op2);
else
key = snap_replay_const(J, irk);
if (irr->o == IR_HREFK || irr->o == IR_AREF) {
IRIns *irf = &T->ir[irr->op1];
tmp = emitir(irf->ot, tmp, irf->op2);
}
}
tmp = emitir(irr->ot, tmp, key);
}
val = snap_pref(J, T, map, nmax, seen, irs->op2, heavy_restores, restore);
if (val == 0) {
IRIns *irc = &T->ir[irs->op2];
lj_assertJ((irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT) || irc->o == IR_TNEW || irc->o == IR_TDUP,
"sunk store for parent IR %04d with bad op %d",
ref - REF_BIAS, irc->o);
val = snap_pref(J, T, map, nmax, seen, irc->op1, heavy_restores, restore);
if (restore && irc->o == IR_CONV)
val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT);
} else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) &&
irs+1 < irlast && (irs+1)->o == IR_HIOP) {
IRType t = IRT_I64;
if (LJ_SOFTFP32 && irt_type((irs+1)->t) == IRT_SOFTFP)
t = IRT_NUM;
if (restore)
lj_needsplit(J);
if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) {
if (restore) {
uint64_t k = (uint32_t)T->ir[irs->op2].i +
((uint64_t)T->ir[(irs+1)->op2].i << 32);
val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, k);
}
} else {
TRef pref = snap_pref(J, T, map, nmax, seen, (irs+1)->op2, heavy_restores, restore);
if (restore)
val = emitir_raw(IRT(IR_HIOP, t), val, pref);
}
if (restore)
tmp = emitir(IRT(irs->o, t), tmp, val);
continue;
}
if (restore)
tmp = emitir(irs->ot, tmp, val);
} else if (LJ_HASFFI && irs->o == IR_XBAR && ir->o == IR_CNEW) {
if (restore)
emitir(IRT(IR_XBAR, IRT_NIL), 0, 0);
}
}
}
} else if (!regsp_used(ir->prev))
tr = 0;
else if (!bloomtest(seen, ref) || (tr = snap_dedup(J, map, nmax, ref)) == 0)
tr = emitir(IRT(IR_PVAL, irt_type(ir->t)), ref - REF_BIAS, 0);
return tr;
}
/* Replay snapshot state to setup side trace. */
void lj_snap_replay(jit_State *J, GCtrace *T)
{
@ -508,7 +593,7 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
J->baseslot = s+1;
}
if (pass23) {
IRIns *irlast = &T->ir[snap->ref];
TRef heavy_restores[0xff] = {0};
pass23 = 0;
/* Emit dependent PVALs. */
for (n = 0; n < nent; n++) {
@ -521,101 +606,26 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
lj_assertJ(ir->o == IR_TNEW || ir->o == IR_TDUP ||
ir->o == IR_CNEW || ir->o == IR_CNEWI,
"sunk parent IR %04d has bad op %d", refp - REF_BIAS, ir->o);
if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1);
if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2);
if (LJ_HASFFI && ir->o == IR_CNEWI) {
if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP)
snap_pref(J, T, map, nent, seen, (ir+1)->op2);
} else {
IRIns *irs;
for (irs = ir+1; irs < irlast; irs++)
if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
if (snap_pref(J, T, map, nent, seen, irs->op2) == 0)
snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1);
else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) &&
irs+1 < irlast && (irs+1)->o == IR_HIOP)
snap_pref(J, T, map, nent, seen, (irs+1)->op2);
}
}
snap_pref(J, T, map, nent, seen, refp, heavy_restores, 0);
} else if (!irref_isk(refp) && !regsp_used(ir->prev)) {
lj_assertJ(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT,
"sunk parent IR %04d has bad op %d", refp - REF_BIAS, ir->o);
J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1);
J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1, heavy_restores, 0);
}
}
for (n = 0; pass23 && n < 0xff; n++)
heavy_restores[n] = 0;
/* Replay sunk instructions. */
for (n = 0; pass23 && n < nent; n++) {
SnapEntry sn = map[n];
IRRef refp = snap_ref(sn);
IRIns *ir = &T->ir[refp];
if (regsp_reg(ir->r) == RID_SUNK) {
TRef op1, op2;
if (J->slot[snap_slot(sn)] != snap_slot(sn)) { /* De-dup allocs. */
J->slot[snap_slot(sn)] = J->slot[J->slot[snap_slot(sn)]];
continue;
}
op1 = ir->op1;
if (op1 >= T->nk) op1 = snap_pref(J, T, map, nent, seen, op1);
op2 = ir->op2;
if (op2 >= T->nk) op2 = snap_pref(J, T, map, nent, seen, op2);
if (LJ_HASFFI && ir->o == IR_CNEWI) {
if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) {
lj_needsplit(J); /* Emit joining HIOP. */
op2 = emitir_raw(IRT(IR_HIOP, IRT_I64), op2,
snap_pref(J, T, map, nent, seen, (ir+1)->op2));
}
J->slot[snap_slot(sn)] = emitir(ir->ot & ~(IRT_MARK|IRT_ISPHI), op1, op2);
} else {
IRIns *irs;
TRef tr = emitir(ir->ot, op1, op2);
J->slot[snap_slot(sn)] = tr;
for (irs = ir+1; irs < irlast; irs++)
if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
IRIns *irr = &T->ir[irs->op1];
TRef val, key = irr->op2, tmp = tr;
if (irr->o != IR_FREF) {
IRIns *irk = &T->ir[key];
if (irr->o == IR_HREFK)
key = lj_ir_kslot(J, snap_replay_const(J, &T->ir[irk->op1]),
irk->op2);
else
key = snap_replay_const(J, irk);
if (irr->o == IR_HREFK || irr->o == IR_AREF) {
IRIns *irf = &T->ir[irr->op1];
tmp = emitir(irf->ot, tmp, irf->op2);
}
}
tmp = emitir(irr->ot, tmp, key);
val = snap_pref(J, T, map, nent, seen, irs->op2);
if (val == 0) {
IRIns *irc = &T->ir[irs->op2];
lj_assertJ(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT,
"sunk store for parent IR %04d with bad op %d",
refp - REF_BIAS, irc->o);
val = snap_pref(J, T, map, nent, seen, irc->op1);
val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT);
} else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) &&
irs+1 < irlast && (irs+1)->o == IR_HIOP) {
IRType t = IRT_I64;
if (LJ_SOFTFP32 && irt_type((irs+1)->t) == IRT_SOFTFP)
t = IRT_NUM;
lj_needsplit(J);
if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) {
uint64_t k = (uint32_t)T->ir[irs->op2].i +
((uint64_t)T->ir[(irs+1)->op2].i << 32);
val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, k);
} else {
val = emitir_raw(IRT(IR_HIOP, t), val,
snap_pref(J, T, map, nent, seen, (irs+1)->op2));
}
tmp = emitir(IRT(irs->o, t), tmp, val);
continue;
}
tmp = emitir(irs->ot, tmp, val);
} else if (LJ_HASFFI && irs->o == IR_XBAR && ir->o == IR_CNEW) {
emitir(IRT(IR_XBAR, IRT_NIL), 0, 0);
}
}
J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, refp, heavy_restores, 1);
}
}
}
@ -630,12 +640,12 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
SnapNo snapno, BloomFilter rfilt,
IRIns *ir, TValue *o);
IRIns *ir, TValue *o, TValue* heavy_restores);
/* Restore a value from the trace exit state. */
static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
SnapNo snapno, BloomFilter rfilt,
IRRef ref, TValue *o)
IRRef ref, TValue *o, TValue* heavy_restores)
{
IRIns *ir = &T->ir[ref];
IRType1 t = ir->t;
@ -651,6 +661,12 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
}
return;
}
if (ir->r == RID_SUNK) {
/* This allocation is also sunken. */
lj_assertJ(ir->s > 0, "Heavy sunken allocation has no global index");
snap_unsink(J, T, ex, snapno, rfilt, ir, o, heavy_restores);
return;
}
if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
rs = snap_renameref(T, snapno, ref, rs);
if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */
@ -675,7 +691,7 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
if (ra_noreg(r)) {
lj_assertJ(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT,
"restore from IR %04d has no reg", ref - REF_BIAS);
snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o);
snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o, heavy_restores);
if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o));
return;
} else if (irt_isinteger(t)) {
@ -768,11 +784,17 @@ static void snap_restoredata(jit_State *J, GCtrace *T, ExitState *ex,
/* Unsink allocation from the trace exit state. Unsink sunk stores. */
static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
SnapNo snapno, BloomFilter rfilt,
IRIns *ir, TValue *o)
IRIns *ir, TValue *o, TValue* heavy_restores)
{
lj_assertJ(ir->o == IR_TNEW || ir->o == IR_TDUP ||
ir->o == IR_CNEW || ir->o == IR_CNEWI,
"sunk allocation with bad op %d", ir->o);
if (ir->s > 0) {
if (!tvisnil(&heavy_restores[ir->s - 1])) {
copyTV(J->L, o, &heavy_restores[ir->s - 1]);
return;
}
}
#if LJ_HASFFI
if (ir->o == IR_CNEW || ir->o == IR_CNEWI) {
CTState *cts = ctype_cts(J->L);
@ -781,6 +803,9 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
CTInfo info = lj_ctype_info(cts, id, &sz);
GCcdata *cd = lj_cdata_newx(cts, id, sz, info);
setcdataV(J->L, o, cd);
if (ir->s > 0) {
copyTV(J->L, &heavy_restores[ir->s - 1], o);
}
if (ir->o == IR_CNEWI) {
uint8_t *p = (uint8_t *)cdataptr(cd);
lj_assertJ(sz == 4 || sz == 8, "sunk cdata with bad size %d", sz);
@ -830,6 +855,9 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
GCtab *t = ir->o == IR_TNEW ? lj_tab_new(J->L, ir->op1, ir->op2) :
lj_tab_dup(J->L, ir_ktab(&T->ir[ir->op1]));
settabV(J->L, o, t);
if (ir->s > 0) {
copyTV(J->L, &heavy_restores[ir->s - 1], o);
}
irlast = &T->ir[T->snap[snapno].ref];
for (irs = ir+1; irs < irlast; irs++)
if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
@ -841,7 +869,7 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
if (irk->o == IR_FREF) {
lj_assertJ(irk->op2 == IRFL_TAB_META,
"sunk store with bad field %d", irk->op2);
snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp);
snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp, heavy_restores);
/* NOBARRIER: The table is new (marked white). */
setgcref(t->metatable, obj2gco(tabV(&tmp)));
} else {
@ -850,9 +878,9 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
lj_ir_kvalue(J->L, &tmp, irk);
val = lj_tab_set(J->L, t, &tmp);
/* NOBARRIER: The table is new (marked white). */
snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val);
snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val, heavy_restores);
if (LJ_SOFTFP32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp);
snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp, heavy_restores);
val->u32.hi = tmp.u32.lo;
}
}
@ -877,8 +905,13 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
#endif
TValue *frame;
BloomFilter rfilt = snap_renamefilter(T, snapno);
BloomFilter seen = 0;
const BCIns *pc = snap_pc(&map[nent]);
lua_State *L = J->L;
TValue heavy_restores[0xff];
for (n = 0; n < 0xff; n++)
setnilV(&heavy_restores[n]);
/* Set interpreter PC to the next PC to get correct error messages. */
setcframe_pc(cframe_raw(L->cframe), pc+1);
@ -901,20 +934,24 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
IRRef ref = snap_ref(sn);
IRIns *ir = &T->ir[ref];
if (ir->r == RID_SUNK) {
if (bloomtest(seen, ref)) {
MSize j;
for (j = 0; j < n; j++)
if (snap_ref(map[j]) == ref) { /* De-duplicate sunk allocations. */
copyTV(L, o, &frame[snap_slot(map[j])]);
goto dupslot;
}
snap_unsink(J, T, ex, snapno, rfilt, ir, o);
} else {
bloomset(seen, ref);
}
snap_unsink(J, T, ex, snapno, rfilt, ir, o, heavy_restores);
dupslot:
continue;
}
snap_restoreval(J, T, ex, snapno, rfilt, ref, o);
snap_restoreval(J, T, ex, snapno, rfilt, ref, o, heavy_restores);
if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM) && tvisint(o)) {
TValue tmp;
snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp);
snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp, heavy_restores);
o->u32.hi = tmp.u32.lo;
#if !LJ_FR2
} else if ((sn & (SNAP_CONT|SNAP_FRAME))) {