mirror of
https://github.com/LuaJIT/LuaJIT.git
synced 2025-02-07 23:24:09 +00:00
Add allocation sinking and store sinking optimization.
This commit is contained in:
parent
79e1eaa73b
commit
0af3f47ba0
@ -443,7 +443,7 @@ LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o \
|
||||
lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_api.o \
|
||||
lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o \
|
||||
lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \
|
||||
lj_opt_dce.o lj_opt_loop.o lj_opt_split.o \
|
||||
lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \
|
||||
lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \
|
||||
lj_asm.o lj_trace.o lj_gdbjit.o \
|
||||
lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_ccallback.o \
|
||||
|
@ -142,6 +142,8 @@ lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
|
||||
lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \
|
||||
lj_arch.h lj_str.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \
|
||||
lj_dispatch.h lj_traceerr.h lj_vm.h
|
||||
lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
|
||||
lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h
|
||||
lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \
|
||||
lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_ircall.h \
|
||||
lj_iropt.h lj_vm.h
|
||||
@ -153,8 +155,9 @@ lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
|
||||
lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
|
||||
lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h
|
||||
lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
|
||||
lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \
|
||||
lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h lj_target_*.h
|
||||
lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \
|
||||
lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \
|
||||
lj_target_*.h lj_ctype.h lj_cdata.h
|
||||
lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
|
||||
lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_meta.h \
|
||||
lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h lj_ir.h \
|
||||
@ -188,12 +191,13 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
|
||||
lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \
|
||||
lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_lib.h lj_ir.c lj_ircall.h \
|
||||
lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \
|
||||
lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_mcode.c lj_snap.c \
|
||||
lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c lj_crecord.h \
|
||||
lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h lj_asm_*.h \
|
||||
lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c lib_base.c \
|
||||
lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c lib_os.c \
|
||||
lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c lib_init.c
|
||||
lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c \
|
||||
lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \
|
||||
lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \
|
||||
lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \
|
||||
lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \
|
||||
lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \
|
||||
lib_init.c
|
||||
luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h
|
||||
host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \
|
||||
lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \
|
||||
|
@ -374,10 +374,13 @@ local function dump_snap(tr)
|
||||
end
|
||||
|
||||
-- Return a register name or stack slot for a rid/sp location.
|
||||
local function ridsp_name(ridsp)
|
||||
local function ridsp_name(ridsp, ins)
|
||||
if not disass then disass = require("jit.dis_"..jit.arch) end
|
||||
local rid = band(ridsp, 0xff)
|
||||
if ridsp > 255 then return format("[%x]", shr(ridsp, 8)*4) end
|
||||
local rid, slot = band(ridsp, 0xff), shr(ridsp, 8)
|
||||
if rid == 253 or rid == 254 then
|
||||
return slot == 0 and " {sink" or format(" {%04d", ins-slot)
|
||||
end
|
||||
if ridsp > 255 then return format("[%x]", slot*4) end
|
||||
if rid < 128 then return disass.regname(rid) end
|
||||
return ""
|
||||
end
|
||||
@ -458,13 +461,15 @@ local function dump_ir(tr, dumpsnap, dumpreg)
|
||||
end
|
||||
elseif op ~= "NOP " and op ~= "CARG " and
|
||||
(dumpreg or op ~= "RENAME") then
|
||||
local rid = band(ridsp, 255)
|
||||
if dumpreg then
|
||||
out:write(format("%04d %-5s ", ins, ridsp_name(ridsp)))
|
||||
out:write(format("%04d %-6s", ins, ridsp_name(ridsp, ins)))
|
||||
else
|
||||
out:write(format("%04d ", ins))
|
||||
end
|
||||
out:write(format("%s%s %s %s ",
|
||||
band(ot, 128) == 0 and " " or ">",
|
||||
(rid == 254 or rid == 253) and "}" or
|
||||
(band(ot, 128) == 0 and " " or ">"),
|
||||
band(ot, 64) == 0 and " " or "+",
|
||||
irtype[t], op))
|
||||
local m1, m2 = band(m, 3), band(m, 3*4)
|
||||
|
49
src/lj_asm.c
49
src/lj_asm.c
@ -782,11 +782,35 @@ static int asm_snap_canremat(ASMState *as)
|
||||
static void asm_snap_alloc1(ASMState *as, IRRef ref)
|
||||
{
|
||||
IRIns *ir = IR(ref);
|
||||
if (!ra_used(ir)) {
|
||||
RegSet allow = (!LJ_SOFTFP && irt_isnum(ir->t)) ? RSET_FPR : RSET_GPR;
|
||||
/* Get a weak register if we have a free one or can rematerialize. */
|
||||
if (!(ra_used(ir) || ir->r == RID_SUNK)) {
|
||||
if (ir->r == RID_SINK) {
|
||||
ir->r = RID_SUNK;
|
||||
#if LJ_HASFFI
|
||||
if (ir->o == IR_CNEWI) { /* Allocate CNEWI value. */
|
||||
asm_snap_alloc1(as, ir->op2);
|
||||
if (LJ_32 && (ir+1)->o == IR_HIOP)
|
||||
asm_snap_alloc1(as, (ir+1)->op2);
|
||||
}
|
||||
#endif
|
||||
else { /* Allocate stored values for TNEW, TDUP and CNEW. */
|
||||
IRIns *irs;
|
||||
lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW);
|
||||
for (irs = IR(as->curins); irs > ir; irs--)
|
||||
if (irs->r == RID_SINK && ir + irs->s == irs) {
|
||||
lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
|
||||
irs->o == IR_FSTORE || irs->o == IR_XSTORE);
|
||||
asm_snap_alloc1(as, irs->op2);
|
||||
if (LJ_32 && (irs+1)->o == IR_HIOP)
|
||||
asm_snap_alloc1(as, (irs+1)->op2);
|
||||
}
|
||||
}
|
||||
} else if (ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT) {
|
||||
asm_snap_alloc1(as, ir->op1);
|
||||
} else {
|
||||
RegSet allow = (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR;
|
||||
if ((as->freeset & allow) ||
|
||||
(allow == RSET_FPR && asm_snap_canremat(as))) {
|
||||
/* Get a weak register if we have a free one or can rematerialize. */
|
||||
Reg r = ra_allocref(as, ref, allow); /* Allocate a register. */
|
||||
if (!irt_isphi(ir->t))
|
||||
ra_weak(as, r); /* But mark it as weakly referenced. */
|
||||
@ -798,6 +822,7 @@ static void asm_snap_alloc1(ASMState *as, IRRef ref)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Allocate refs escaping to a snapshot. */
|
||||
static void asm_snap_alloc(ASMState *as)
|
||||
@ -848,7 +873,7 @@ static void asm_snap_prep(ASMState *as)
|
||||
{
|
||||
if (as->curins < as->snapref) {
|
||||
do {
|
||||
lua_assert(as->snapno != 0);
|
||||
if (as->snapno == 0) return; /* Called by sunk stores before snap #0. */
|
||||
as->snapno--;
|
||||
as->snapref = as->T->snap[as->snapno].ref;
|
||||
} while (as->curins < as->snapref);
|
||||
@ -1180,6 +1205,8 @@ static void asm_phi(ASMState *as, IRIns *ir)
|
||||
RegSet afree = (as->freeset & allow);
|
||||
IRIns *irl = IR(ir->op1);
|
||||
IRIns *irr = IR(ir->op2);
|
||||
if (ir->r == RID_SINK) /* Sink PHI. */
|
||||
return;
|
||||
/* Spill slot shuffling is not implemented yet (but rarely needed). */
|
||||
if (ra_hasspill(irl->s) || ra_hasspill(irr->s))
|
||||
lj_trace_err(as->J, LJ_TRERR_NYIPHI);
|
||||
@ -1494,7 +1521,7 @@ static void asm_tail_link(ASMState *as)
|
||||
/* -- Trace setup --------------------------------------------------------- */
|
||||
|
||||
/* Clear reg/sp for all instructions and add register hints. */
|
||||
static void asm_setup_regsp(ASMState *as)
|
||||
static void asm_setup_regsp(ASMState *as, int sink)
|
||||
{
|
||||
GCtrace *T = as->T;
|
||||
IRRef nins = T->nins;
|
||||
@ -1545,6 +1572,14 @@ static void asm_setup_regsp(ASMState *as)
|
||||
inloop = 0;
|
||||
as->evenspill = SPS_FIRST;
|
||||
for (lastir = IR(nins); ir < lastir; ir++) {
|
||||
if (sink) {
|
||||
if (ir->r == RID_SINK)
|
||||
continue;
|
||||
if (ir->r == RID_SUNK) { /* Revert after ASM restart. */
|
||||
ir->r = RID_SINK;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
switch (ir->o) {
|
||||
case IR_LOOP:
|
||||
inloop = 1;
|
||||
@ -1716,6 +1751,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
|
||||
ASMState as_;
|
||||
ASMState *as = &as_;
|
||||
MCode *origtop;
|
||||
int sink;
|
||||
|
||||
/* Ensure an initialized instruction beyond the last one for HIOP checks. */
|
||||
J->cur.nins = lj_ir_nextins(J);
|
||||
@ -1736,6 +1772,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
|
||||
as->mcp = as->mctop;
|
||||
as->mclim = as->mcbot + MCLIM_REDZONE;
|
||||
asm_setup_target(as);
|
||||
sink = (IR(REF_BASE)->prev == 1);
|
||||
|
||||
do {
|
||||
as->mcp = as->mctop;
|
||||
@ -1751,7 +1788,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
|
||||
as->gcsteps = 0;
|
||||
as->sectref = as->loopref;
|
||||
as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED;
|
||||
asm_setup_regsp(as);
|
||||
asm_setup_regsp(as, sink);
|
||||
if (!as->loopref)
|
||||
asm_tail_link(as);
|
||||
|
||||
|
@ -693,6 +693,8 @@ static void asm_newref(ASMState *as, IRIns *ir)
|
||||
{
|
||||
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
|
||||
IRRef args[3];
|
||||
if (ir->r == RID_SINK) /* Sink newref. */
|
||||
return;
|
||||
args[0] = ASMREF_L; /* lua_State *L */
|
||||
args[1] = ir->op1; /* GCtab *t */
|
||||
args[2] = ASMREF_TMP1; /* cTValue *key */
|
||||
@ -836,10 +838,14 @@ static void asm_xload(ASMState *as, IRIns *ir)
|
||||
|
||||
static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
|
||||
{
|
||||
if (ir->r == RID_SINK) { /* Sink store. */
|
||||
asm_snap_prep(as);
|
||||
} else {
|
||||
Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
|
||||
asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
|
||||
rset_exclude(RSET_GPR, src), ofs);
|
||||
}
|
||||
}
|
||||
|
||||
static void asm_ahuvload(ASMState *as, IRIns *ir)
|
||||
{
|
||||
@ -876,6 +882,9 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
|
||||
|
||||
static void asm_ahustore(ASMState *as, IRIns *ir)
|
||||
{
|
||||
if (ir->r == RID_SINK) { /* Sink store. */
|
||||
asm_snap_prep(as);
|
||||
} else {
|
||||
RegSet allow = RSET_GPR;
|
||||
Reg idx, src = RID_NONE, type = RID_NONE;
|
||||
int32_t ofs = 0;
|
||||
@ -892,6 +901,7 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
|
||||
if (ra_hasreg(src)) emit_lso(as, ARMI_STR, src, idx, ofs);
|
||||
emit_lso(as, ARMI_STR, type, idx, ofs+4);
|
||||
}
|
||||
}
|
||||
|
||||
static void asm_sload(ASMState *as, IRIns *ir)
|
||||
{
|
||||
@ -1382,6 +1392,9 @@ static void asm_hiop(ASMState *as, IRIns *ir)
|
||||
asm_fpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO);
|
||||
return;
|
||||
} else if ((ir-1)->o == IR_XSTORE) {
|
||||
if ((ir-1)->r == RID_SINK)
|
||||
asm_snap_prep(as);
|
||||
else
|
||||
asm_xstore(as, ir, 4);
|
||||
return;
|
||||
}
|
||||
|
@ -769,6 +769,9 @@ nolo:
|
||||
|
||||
static void asm_newref(ASMState *as, IRIns *ir)
|
||||
{
|
||||
if (ir->r == RID_SINK) { /* Sink newref. */
|
||||
return;
|
||||
} else {
|
||||
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
|
||||
IRRef args[3];
|
||||
args[0] = ASMREF_L; /* lua_State *L */
|
||||
@ -778,6 +781,7 @@ static void asm_newref(ASMState *as, IRIns *ir)
|
||||
asm_gencall(as, ci, args);
|
||||
asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
|
||||
}
|
||||
}
|
||||
|
||||
static void asm_uref(ASMState *as, IRIns *ir)
|
||||
{
|
||||
@ -912,10 +916,15 @@ static void asm_xload(ASMState *as, IRIns *ir)
|
||||
|
||||
static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
|
||||
{
|
||||
if (ir->r == RID_SINK) { /* Sink store. */
|
||||
asm_snap_prep(as);
|
||||
return;
|
||||
} else {
|
||||
Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
|
||||
asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
|
||||
rset_exclude(RSET_GPR, src), ofs);
|
||||
}
|
||||
}
|
||||
|
||||
static void asm_ahuvload(ASMState *as, IRIns *ir)
|
||||
{
|
||||
@ -947,6 +956,10 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
|
||||
RegSet allow = RSET_GPR;
|
||||
Reg idx, src = RID_NONE, type = RID_NONE;
|
||||
int32_t ofs = 0;
|
||||
if (ir->r == RID_SINK) { /* Sink store. */
|
||||
asm_snap_prep(as);
|
||||
return;
|
||||
}
|
||||
if (irt_isnum(ir->t)) {
|
||||
src = ra_alloc1(as, ir->op2, RSET_FPR);
|
||||
} else {
|
||||
@ -1561,8 +1574,12 @@ static void asm_hiop(ASMState *as, IRIns *ir)
|
||||
return;
|
||||
} else if ((ir-1)->o == IR_XSTORE) {
|
||||
as->curins--; /* Handle both stores here. */
|
||||
if ((ir-1)->r == RID_SINK) {
|
||||
asm_snap_prep(as);
|
||||
} else {
|
||||
asm_xstore(as, ir, LJ_LE ? 4 : 0);
|
||||
asm_xstore(as, ir-1, LJ_LE ? 0 : 4);
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
|
||||
|
@ -773,6 +773,8 @@ static void asm_newref(ASMState *as, IRIns *ir)
|
||||
{
|
||||
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
|
||||
IRRef args[3];
|
||||
if (ir->r == RID_SINK) /* Sink newref. */
|
||||
return;
|
||||
args[0] = ASMREF_L; /* lua_State *L */
|
||||
args[1] = ir->op1; /* GCtab *t */
|
||||
args[2] = ASMREF_TMP1; /* cTValue *key */
|
||||
@ -892,6 +894,9 @@ static void asm_fload(ASMState *as, IRIns *ir)
|
||||
|
||||
static void asm_fstore(ASMState *as, IRIns *ir)
|
||||
{
|
||||
if (ir->r == RID_SINK) { /* Sink store. */
|
||||
asm_snap_prep(as);
|
||||
} else {
|
||||
Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
|
||||
IRIns *irf = IR(ir->op1);
|
||||
Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
|
||||
@ -899,6 +904,7 @@ static void asm_fstore(ASMState *as, IRIns *ir)
|
||||
PPCIns pi = asm_fxstoreins(ir);
|
||||
emit_tai(as, pi, src, idx, ofs);
|
||||
}
|
||||
}
|
||||
|
||||
static void asm_xload(ASMState *as, IRIns *ir)
|
||||
{
|
||||
@ -912,6 +918,10 @@ static void asm_xload(ASMState *as, IRIns *ir)
|
||||
static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
|
||||
{
|
||||
IRIns *irb;
|
||||
if (ir->r == RID_SINK) { /* Sink store. */
|
||||
asm_snap_prep(as);
|
||||
return;
|
||||
}
|
||||
if (ofs == 0 && mayfuse(as, ir->op2) && (irb = IR(ir->op2))->o == IR_BSWAP &&
|
||||
ra_noreg(irb->r) && (irt_isint(ir->t) || irt_isu32(ir->t))) {
|
||||
/* Fuse BSWAP with XSTORE to stwbrx. */
|
||||
@ -968,6 +978,10 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
|
||||
RegSet allow = RSET_GPR;
|
||||
Reg idx, src = RID_NONE, type = RID_NONE;
|
||||
int32_t ofs = AHUREF_LSX;
|
||||
if (ir->r == RID_SINK) { /* Sink store. */
|
||||
asm_snap_prep(as);
|
||||
return;
|
||||
}
|
||||
if (irt_isnum(ir->t)) {
|
||||
src = ra_alloc1(as, ir->op2, RSET_FPR);
|
||||
} else {
|
||||
@ -1747,8 +1761,12 @@ static void asm_hiop(ASMState *as, IRIns *ir)
|
||||
return;
|
||||
} else if ((ir-1)->o == IR_XSTORE) {
|
||||
as->curins--; /* Handle both stores here. */
|
||||
if ((ir-1)->r == RID_SINK) {
|
||||
asm_snap_prep(as);
|
||||
} else {
|
||||
asm_xstore(as, ir, 0);
|
||||
asm_xstore(as, ir-1, 4);
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
|
||||
|
@ -1155,6 +1155,8 @@ static void asm_newref(ASMState *as, IRIns *ir)
|
||||
IRRef args[3];
|
||||
IRIns *irkey;
|
||||
Reg tmp;
|
||||
if (ir->r == RID_SINK) /* Sink newref. */
|
||||
return;
|
||||
args[0] = ASMREF_L; /* lua_State *L */
|
||||
args[1] = ir->op1; /* GCtab *t */
|
||||
args[2] = ASMREF_TMP1; /* cTValue *key */
|
||||
@ -1259,6 +1261,10 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
|
||||
RegSet allow = RSET_GPR;
|
||||
Reg src = RID_NONE, osrc = RID_NONE;
|
||||
int32_t k = 0;
|
||||
if (ir->r == RID_SINK) { /* Sink store. */
|
||||
asm_snap_prep(as);
|
||||
return;
|
||||
}
|
||||
/* The IRT_I16/IRT_U16 stores should never be simplified for constant
|
||||
** values since mov word [mem], imm16 has a length-changing prefix.
|
||||
*/
|
||||
@ -1372,6 +1378,10 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
|
||||
|
||||
static void asm_ahustore(ASMState *as, IRIns *ir)
|
||||
{
|
||||
if (ir->r == RID_SINK) { /* Sink store. */
|
||||
asm_snap_prep(as);
|
||||
return;
|
||||
}
|
||||
if (irt_isnum(ir->t)) {
|
||||
Reg src = ra_alloc1(as, ir->op2, RSET_FPR);
|
||||
asm_fuseahuref(as, ir->op1, RSET_GPR);
|
||||
@ -2251,6 +2261,9 @@ static void asm_hiop(ASMState *as, IRIns *ir)
|
||||
asm_comp_int64(as, ir);
|
||||
return;
|
||||
} else if ((ir-1)->o == IR_XSTORE) {
|
||||
if ((ir-1)->r == RID_SINK)
|
||||
asm_snap_prep(as);
|
||||
else
|
||||
asm_fxstore(as, ir);
|
||||
return;
|
||||
}
|
||||
|
@ -154,6 +154,7 @@ LJ_FUNC void lj_opt_split(jit_State *J);
|
||||
#else
|
||||
#define lj_opt_split(J) UNUSED(J)
|
||||
#endif
|
||||
LJ_FUNC void lj_opt_sink(jit_State *J);
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -63,19 +63,20 @@
|
||||
#define JIT_F_OPT_NARROW 0x00200000
|
||||
#define JIT_F_OPT_LOOP 0x00400000
|
||||
#define JIT_F_OPT_ABC 0x00800000
|
||||
#define JIT_F_OPT_FUSE 0x01000000
|
||||
#define JIT_F_OPT_SINK 0x01000000
|
||||
#define JIT_F_OPT_FUSE 0x02000000
|
||||
|
||||
/* Optimizations names for -O. Must match the order above. */
|
||||
#define JIT_F_OPT_FIRST JIT_F_OPT_FOLD
|
||||
#define JIT_F_OPTSTRING \
|
||||
"\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4fuse"
|
||||
"\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse"
|
||||
|
||||
/* Optimization levels set a fixed combination of flags. */
|
||||
#define JIT_F_OPT_0 0
|
||||
#define JIT_F_OPT_1 (JIT_F_OPT_FOLD|JIT_F_OPT_CSE|JIT_F_OPT_DCE)
|
||||
#define JIT_F_OPT_2 (JIT_F_OPT_1|JIT_F_OPT_NARROW|JIT_F_OPT_LOOP)
|
||||
#define JIT_F_OPT_3 \
|
||||
(JIT_F_OPT_2|JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_FUSE)
|
||||
#define JIT_F_OPT_3 (JIT_F_OPT_2|\
|
||||
JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_SINK|JIT_F_OPT_FUSE)
|
||||
#define JIT_F_OPT_DEFAULT JIT_F_OPT_3
|
||||
|
||||
#if LJ_TARGET_WINDOWS || LJ_64
|
||||
|
244
src/lj_opt_sink.c
Normal file
244
src/lj_opt_sink.c
Normal file
@ -0,0 +1,244 @@
|
||||
/*
|
||||
** SINK: Allocation Sinking and Store Sinking.
|
||||
** Copyright (C) 2005-2012 Mike Pall. See Copyright Notice in luajit.h
|
||||
*/
|
||||
|
||||
#define lj_opt_sink_c
|
||||
#define LUA_CORE
|
||||
|
||||
#include "lj_obj.h"
|
||||
|
||||
#if LJ_HASJIT
|
||||
|
||||
#include "lj_ir.h"
|
||||
#include "lj_jit.h"
|
||||
#include "lj_iropt.h"
|
||||
#include "lj_target.h"
|
||||
|
||||
/* Some local macros to save typing. Undef'd at the end. */
|
||||
#define IR(ref) (&J->cur.ir[(ref)])
|
||||
|
||||
/* Check whether the store ref points to an eligible allocation. */
|
||||
static IRIns *sink_checkalloc(jit_State *J, IRIns *irs)
|
||||
{
|
||||
IRIns *ir = IR(irs->op1);
|
||||
if (!irref_isk(ir->op2))
|
||||
return NULL; /* Non-constant key. */
|
||||
if (ir->o == IR_HREFK || ir->o == IR_AREF)
|
||||
ir = IR(ir->op1);
|
||||
else if (!(ir->o == IR_HREF || ir->o == IR_NEWREF ||
|
||||
ir->o == IR_FREF || ir->o == IR_ADD))
|
||||
return NULL; /* Unhandled reference type (for XSTORE). */
|
||||
ir = IR(ir->op1);
|
||||
if (!(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW))
|
||||
return NULL; /* Not an allocation. */
|
||||
if (ir + 255 < irs)
|
||||
return NULL; /* Out of range. */
|
||||
return ir; /* Return allocation. */
|
||||
}
|
||||
|
||||
/* Recursively check whether a value depends on a PHI. */
|
||||
static int sink_phidep(jit_State *J, IRRef ref)
|
||||
{
|
||||
IRIns *ir = IR(ref);
|
||||
if (irt_isphi(ir->t)) return 1;
|
||||
if (ir->op1 >= REF_FIRST && sink_phidep(J, ir->op1)) return 1;
|
||||
if (ir->op2 >= REF_FIRST && sink_phidep(J, ir->op2)) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Check whether a value is a sinkable PHI or a non-PHI. */
|
||||
static int sink_checkphi(jit_State *J, IRIns *ira, IRRef ref)
|
||||
{
|
||||
if (ref >= REF_FIRST) {
|
||||
IRIns *ir = IR(ref);
|
||||
if (irt_isphi(ir->t) || (ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT &&
|
||||
irt_isphi(IR(ir->op1)->t))) {
|
||||
ira->prev++;
|
||||
return 1; /* Sinkable PHI. */
|
||||
}
|
||||
return !sink_phidep(J, ref); /* Must be a non-PHI then. */
|
||||
}
|
||||
return 1; /* Constant (non-PHI). */
|
||||
}
|
||||
|
||||
/* Mark non-sinkable allocations using single-pass backward propagation.
|
||||
**
|
||||
** Roots for the marking process are:
|
||||
** - Some PHIs or snapshots (see below).
|
||||
** - Non-PHI, non-constant values stored to PHI allocations.
|
||||
** - All guards.
|
||||
** - Any remaining loads not eliminated by store-to-load forwarding.
|
||||
** - Stores with non-constant keys.
|
||||
** - All stored values.
|
||||
*/
|
||||
static void sink_mark_ins(jit_State *J)
|
||||
{
|
||||
IRIns *ir, *irlast = IR(J->cur.nins-1);
|
||||
for (ir = irlast ; ; ir--) {
|
||||
switch (ir->o) {
|
||||
case IR_BASE:
|
||||
return; /* Finished. */
|
||||
case IR_CALLL: /* IRCALL_lj_tab_len */
|
||||
case IR_ALOAD: case IR_HLOAD: case IR_XLOAD:
|
||||
irt_setmark(IR(ir->op1)->t); /* Mark ref for remaining loads. */
|
||||
break;
|
||||
case IR_FLOAD:
|
||||
if (irt_ismarked(ir->t) || ir->op2 == IRFL_TAB_META)
|
||||
irt_setmark(IR(ir->op1)->t); /* Mark table for remaining loads. */
|
||||
break;
|
||||
case IR_ASTORE: case IR_HSTORE: case IR_FSTORE: case IR_XSTORE: {
|
||||
IRIns *ira = sink_checkalloc(J, ir);
|
||||
if (!ira || (irt_isphi(ira->t) && !sink_checkphi(J, ira, ir->op2)))
|
||||
irt_setmark(IR(ir->op1)->t); /* Mark ineligible ref. */
|
||||
irt_setmark(IR(ir->op2)->t); /* Mark stored value. */
|
||||
break;
|
||||
}
|
||||
#if LJ_HASFFI
|
||||
case IR_CNEWI:
|
||||
if (irt_isphi(ir->t) &&
|
||||
(!sink_checkphi(J, ir, ir->op2) ||
|
||||
(LJ_32 && ir+1 < irlast && (ir+1)->o == IR_HIOP &&
|
||||
!sink_checkphi(J, ir, (ir+1)->op2))))
|
||||
irt_setmark(ir->t); /* Mark ineligible allocation. */
|
||||
/* fallthrough */
|
||||
#endif
|
||||
case IR_USTORE:
|
||||
irt_setmark(IR(ir->op2)->t); /* Mark stored value. */
|
||||
break;
|
||||
#if LJ_HASFFI
|
||||
case IR_CALLXS:
|
||||
#endif
|
||||
case IR_CALLS:
|
||||
irt_setmark(IR(ir->op1)->t); /* Mark (potentially) stored values. */
|
||||
break;
|
||||
case IR_PHI: {
|
||||
IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
|
||||
irl->prev = irr->prev = 0; /* Clear PHI value counts. */
|
||||
if (irl->o == irr->o &&
|
||||
(irl->o == IR_TNEW || irl->o == IR_TDUP ||
|
||||
(LJ_HASFFI && (irl->o == IR_CNEW || irl->o == IR_CNEWI))))
|
||||
break;
|
||||
irt_setmark(irl->t);
|
||||
irt_setmark(irr->t);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
if (irt_ismarked(ir->t) || irt_isguard(ir->t)) { /* Propagate mark. */
|
||||
if (ir->op1 >= REF_FIRST) irt_setmark(IR(ir->op1)->t);
|
||||
if (ir->op2 >= REF_FIRST) irt_setmark(IR(ir->op2)->t);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Mark all instructions referenced by a snapshot. */
|
||||
static void sink_mark_snap(jit_State *J, SnapShot *snap)
|
||||
{
|
||||
SnapEntry *map = &J->cur.snapmap[snap->mapofs];
|
||||
MSize n, nent = snap->nent;
|
||||
for (n = 0; n < nent; n++) {
|
||||
IRRef ref = snap_ref(map[n]);
|
||||
if (!irref_isk(ref))
|
||||
irt_setmark(IR(ref)->t);
|
||||
}
|
||||
}
|
||||
|
||||
/* Iteratively remark PHI refs with differing marks or PHI value counts. */
|
||||
static void sink_remark_phi(jit_State *J)
|
||||
{
|
||||
IRIns *ir;
|
||||
int remark;
|
||||
do {
|
||||
remark = 0;
|
||||
for (ir = IR(J->cur.nins-1); ir->o == IR_PHI; ir--) {
|
||||
IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
|
||||
if (((irl->t.irt ^ irr->t.irt) & IRT_MARK))
|
||||
remark = 1;
|
||||
else if (irl->prev == irr->prev)
|
||||
continue;
|
||||
irt_setmark(IR(ir->op1)->t);
|
||||
irt_setmark(IR(ir->op2)->t);
|
||||
}
|
||||
} while (remark);
|
||||
}
|
||||
|
||||
/* Sweep instructions and mark sunken allocations and stores. */
|
||||
static void sink_sweep_ins(jit_State *J)
|
||||
{
|
||||
IRIns *ir, *irfirst = IR(J->cur.nk);
|
||||
for (ir = IR(J->cur.nins-1) ; ir >= irfirst; ir--) {
|
||||
switch (ir->o) {
|
||||
case IR_ASTORE: case IR_HSTORE: case IR_FSTORE: case IR_XSTORE: {
|
||||
IRIns *ira = sink_checkalloc(J, ir);
|
||||
if (ira && !irt_ismarked(ira->t))
|
||||
ir->prev = REGSP(RID_SINK, (int)(ir - ira));
|
||||
else
|
||||
ir->prev = REGSP_INIT;
|
||||
break;
|
||||
}
|
||||
case IR_NEWREF:
|
||||
if (!irt_ismarked(ir->t)) {
|
||||
ir->prev = REGSP(RID_SINK, 0);
|
||||
} else {
|
||||
irt_clearmark(ir->t);
|
||||
ir->prev = REGSP_INIT;
|
||||
}
|
||||
break;
|
||||
#if LJ_HASFFI
|
||||
case IR_CNEW: case IR_CNEWI:
|
||||
#endif
|
||||
case IR_TNEW: case IR_TDUP:
|
||||
if (!irt_ismarked(ir->t)) {
|
||||
ir->t.irt &= ~IRT_GUARD;
|
||||
ir->prev = REGSP(RID_SINK, 0);
|
||||
} else {
|
||||
irt_clearmark(ir->t);
|
||||
ir->prev = REGSP_INIT;
|
||||
}
|
||||
break;
|
||||
case IR_PHI: {
|
||||
IRIns *ira = IR(ir->op2);
|
||||
if (!irt_ismarked(ira->t) &&
|
||||
(ira->o == IR_TNEW || ira->o == IR_TDUP ||
|
||||
(LJ_HASFFI && (ira->o == IR_CNEW || ira->o == IR_CNEWI)))) {
|
||||
ir->prev = REGSP(RID_SINK, 0);
|
||||
} else {
|
||||
ir->prev = REGSP_INIT;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
irt_clearmark(ir->t);
|
||||
ir->prev = REGSP_INIT;
|
||||
break;
|
||||
}
|
||||
}
|
||||
IR(REF_BASE)->prev = 1; /* Signal SINK flags to assembler. */
|
||||
}
|
||||
|
||||
/* Allocation sinking and store sinking.
|
||||
**
|
||||
** 1. Mark all non-sinkable allocations.
|
||||
** 2. Then sink all remaining allocations and the related stores.
|
||||
*/
|
||||
void lj_opt_sink(jit_State *J)
|
||||
{
|
||||
const uint32_t need = (JIT_F_OPT_SINK|JIT_F_OPT_FWD|
|
||||
JIT_F_OPT_DCE|JIT_F_OPT_CSE|JIT_F_OPT_FOLD);
|
||||
if ((J->flags & need) == need &&
|
||||
(J->chain[IR_TNEW] || J->chain[IR_TDUP] ||
|
||||
(LJ_HASFFI && (J->chain[IR_CNEW] || J->chain[IR_CNEWI])))) {
|
||||
if (!J->loopref)
|
||||
sink_mark_snap(J, &J->cur.snap[J->cur.nsnap-1]);
|
||||
sink_mark_ins(J);
|
||||
if (J->loopref)
|
||||
sink_remark_phi(J);
|
||||
sink_sweep_ins(J);
|
||||
}
|
||||
}
|
||||
|
||||
#undef IR
|
||||
|
||||
#endif
|
328
src/lj_snap.c
328
src/lj_snap.c
@ -11,6 +11,7 @@
|
||||
#if LJ_HASJIT
|
||||
|
||||
#include "lj_gc.h"
|
||||
#include "lj_tab.h"
|
||||
#include "lj_state.h"
|
||||
#include "lj_frame.h"
|
||||
#include "lj_bc.h"
|
||||
@ -20,10 +21,17 @@
|
||||
#include "lj_trace.h"
|
||||
#include "lj_snap.h"
|
||||
#include "lj_target.h"
|
||||
#if LJ_HASFFI
|
||||
#include "lj_ctype.h"
|
||||
#include "lj_cdata.h"
|
||||
#endif
|
||||
|
||||
/* Some local macros to save typing. Undef'd at the end. */
|
||||
#define IR(ref) (&J->cur.ir[(ref)])
|
||||
|
||||
/* Pass IR on to next optimization in chain (FOLD). */
|
||||
#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
|
||||
|
||||
/* Emit raw IR without passing through optimizations. */
|
||||
#define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J))
|
||||
|
||||
@ -370,6 +378,31 @@ static TRef snap_replay_const(jit_State *J, IRIns *ir)
|
||||
}
|
||||
}
|
||||
|
||||
/* De-duplicate parent reference. */
|
||||
static TRef snap_dedup(jit_State *J, SnapEntry *map, MSize nmax, IRRef ref)
|
||||
{
|
||||
MSize j;
|
||||
for (j = 0; j < nmax; j++)
|
||||
if (snap_ref(map[j]) == ref)
|
||||
return J->slot[snap_slot(map[j])];
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Emit parent reference with de-duplication. */
|
||||
static TRef snap_pref(jit_State *J, GCtrace *T, SnapEntry *map, MSize nmax,
|
||||
BloomFilter seen, IRRef ref)
|
||||
{
|
||||
IRIns *ir = &T->ir[ref];
|
||||
TRef tr;
|
||||
if (irref_isk(ref))
|
||||
tr = snap_replay_const(J, ir);
|
||||
else if (!regsp_used(ir->prev))
|
||||
tr = 0;
|
||||
else if (!bloomtest(seen, ref) || (tr = snap_dedup(J, map, nmax, ref)) == 0)
|
||||
tr = emitir(IRT(IR_PVAL, irt_type(ir->t)), ref - REF_BIAS, 0);
|
||||
return tr;
|
||||
}
|
||||
|
||||
/* Replay snapshot state to setup side trace. */
|
||||
void lj_snap_replay(jit_State *J, GCtrace *T)
|
||||
{
|
||||
@ -377,6 +410,7 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
|
||||
SnapEntry *map = &T->snapmap[snap->mapofs];
|
||||
MSize n, nent = snap->nent;
|
||||
BloomFilter seen = 0;
|
||||
int pass23 = 0;
|
||||
J->framedepth = 0;
|
||||
/* Emit IR for slots inherited from parent snapshot. */
|
||||
for (n = 0; n < nent; n++) {
|
||||
@ -386,21 +420,18 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
|
||||
IRIns *ir = &T->ir[ref];
|
||||
TRef tr;
|
||||
/* The bloom filter avoids O(nent^2) overhead for de-duping slots. */
|
||||
if (bloomtest(seen, ref)) {
|
||||
MSize j;
|
||||
for (j = 0; j < n; j++)
|
||||
if (snap_ref(map[j]) == ref) {
|
||||
tr = J->slot[snap_slot(map[j])];
|
||||
if (bloomtest(seen, ref) && (tr = snap_dedup(J, map, n, ref)) != 0)
|
||||
goto setslot;
|
||||
}
|
||||
}
|
||||
bloomset(seen, ref);
|
||||
if (irref_isk(ref)) {
|
||||
tr = snap_replay_const(J, ir);
|
||||
} else if (!regsp_used(ir->prev)) {
|
||||
pass23 = 1;
|
||||
lua_assert(s != 0);
|
||||
tr = s;
|
||||
} else {
|
||||
IRType t = irt_type(ir->t);
|
||||
uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT;
|
||||
lua_assert(regsp_used(ir->prev));
|
||||
if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
|
||||
if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY);
|
||||
tr = emitir_raw(IRT(IR_SLOAD, t), s, mode);
|
||||
@ -411,13 +442,126 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
|
||||
if ((sn & SNAP_FRAME))
|
||||
J->baseslot = s+1;
|
||||
}
|
||||
if (pass23) {
|
||||
IRIns *irlast = &T->ir[(snap+1)->ref];
|
||||
lua_assert(J->exitno+1 < T->nsnap);
|
||||
pass23 = 0;
|
||||
/* Emit dependent PVALs. */
|
||||
for (n = 0; n < nent; n++) {
|
||||
SnapEntry sn = map[n];
|
||||
IRRef refp = snap_ref(sn);
|
||||
IRIns *ir = &T->ir[refp];
|
||||
if (regsp_reg(ir->r) == RID_SUNK) {
|
||||
if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue;
|
||||
pass23 = 1;
|
||||
lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP ||
|
||||
ir->o == IR_CNEW || ir->o == IR_CNEWI);
|
||||
if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1);
|
||||
if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2);
|
||||
if (LJ_HASFFI && ir->o == IR_CNEWI) {
|
||||
if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP)
|
||||
snap_pref(J, T, map, nent, seen, (ir+1)->op2);
|
||||
} else {
|
||||
IRIns *irs;
|
||||
for (irs = ir+1; irs < irlast; irs++)
|
||||
if (irs->r == RID_SINK && ir + irs->s == irs) {
|
||||
if (snap_pref(J, T, map, nent, seen, irs->op2) == 0)
|
||||
snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1);
|
||||
else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
|
||||
irs+1 < irlast && (irs+1)->o == IR_HIOP)
|
||||
snap_pref(J, T, map, nent, seen, (irs+1)->op2);
|
||||
}
|
||||
}
|
||||
} else if (!irref_isk(refp) && !regsp_used(ir->prev)) {
|
||||
lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
|
||||
J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1);
|
||||
}
|
||||
}
|
||||
/* Replay sunk instructions. */
|
||||
for (n = 0; pass23 && n < nent; n++) {
|
||||
SnapEntry sn = map[n];
|
||||
IRRef refp = snap_ref(sn);
|
||||
IRIns *ir = &T->ir[refp];
|
||||
if (regsp_reg(ir->r) == RID_SUNK) {
|
||||
TRef op1, op2;
|
||||
if (J->slot[snap_slot(sn)] != snap_slot(sn)) { /* De-dup allocs. */
|
||||
J->slot[snap_slot(sn)] = J->slot[J->slot[snap_slot(sn)]];
|
||||
continue;
|
||||
}
|
||||
op1 = ir->op1;
|
||||
if (op1 >= T->nk) op1 = snap_pref(J, T, map, nent, seen, op1);
|
||||
op2 = ir->op2;
|
||||
if (op2 >= T->nk) op2 = snap_pref(J, T, map, nent, seen, op2);
|
||||
if (LJ_HASFFI && ir->o == IR_CNEWI) {
|
||||
if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) {
|
||||
lj_needsplit(J); /* Emit joining HIOP. */
|
||||
op2 = emitir_raw(IRT(IR_HIOP, IRT_I64), op2,
|
||||
snap_pref(J, T, map, nent, seen, (ir+1)->op2));
|
||||
}
|
||||
J->slot[snap_slot(sn)] = emitir(ir->ot, op1, op2);
|
||||
} else {
|
||||
IRIns *irs;
|
||||
TRef tr = emitir(ir->ot, op1, op2);
|
||||
J->slot[snap_slot(sn)] = tr;
|
||||
for (irs = ir+1; irs < irlast; irs++)
|
||||
if (irs->r == RID_SINK && ir + irs->s == irs) {
|
||||
IRIns *irr = &T->ir[irs->op1];
|
||||
TRef val, key = irr->op2, tmp = tr;
|
||||
if (irr->o != IR_FREF) {
|
||||
IRIns *irk = &T->ir[key];
|
||||
if (irr->o == IR_HREFK)
|
||||
key = lj_ir_kslot(J, snap_replay_const(J, &T->ir[irk->op1]),
|
||||
irk->op2);
|
||||
else
|
||||
key = snap_replay_const(J, irk);
|
||||
if (irr->o == IR_HREFK || irr->o == IR_AREF) {
|
||||
IRIns *irf = &T->ir[irr->op1];
|
||||
tmp = emitir(irf->ot, tmp, irf->op2);
|
||||
}
|
||||
}
|
||||
tmp = emitir(irr->ot, tmp, key);
|
||||
val = snap_pref(J, T, map, nent, seen, irs->op2);
|
||||
if (val == 0) {
|
||||
IRIns *irc = &T->ir[irs->op2];
|
||||
lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT);
|
||||
val = snap_pref(J, T, map, nent, seen, irc->op1);
|
||||
val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT);
|
||||
} else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
|
||||
irs+1 < irlast && (irs+1)->o == IR_HIOP) {
|
||||
IRType t = IRT_I64;
|
||||
if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP)
|
||||
t = IRT_NUM;
|
||||
if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) {
|
||||
uint64_t k = (uint32_t)T->ir[irs->op2].i +
|
||||
((uint64_t)T->ir[(irs+1)->op2].i << 32);
|
||||
val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM,
|
||||
lj_ir_k64_find(J, k));
|
||||
} else {
|
||||
val = emitir_raw(IRT(IR_HIOP, t), val,
|
||||
snap_pref(J, T, map, nent, seen, (irs+1)->op2));
|
||||
}
|
||||
tmp = emitir(IRT(irs->o, t), tmp, val);
|
||||
continue;
|
||||
}
|
||||
tmp = emitir(irs->ot, tmp, val);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
J->base = J->slot + J->baseslot;
|
||||
J->maxslot = snap->nslots - J->baseslot;
|
||||
lj_snap_add(J);
|
||||
if (pass23) /* Need explicit GC step _after_ initial snapshot. */
|
||||
emitir_raw(IRTG(IR_GCSTEP, IRT_NIL), 0, 0);
|
||||
}
|
||||
|
||||
/* -- Snapshot restore ---------------------------------------------------- */
|
||||
|
||||
static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
|
||||
SnapNo snapno, BloomFilter rfilt,
|
||||
IRIns *ir, TValue *o);
|
||||
|
||||
/* Restore a value from the trace exit state. */
|
||||
static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
|
||||
SnapNo snapno, BloomFilter rfilt,
|
||||
@ -450,8 +594,12 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
|
||||
}
|
||||
} else { /* Restore from register. */
|
||||
Reg r = regsp_reg(rs);
|
||||
lua_assert(ra_hasreg(r));
|
||||
if (irt_isinteger(t)) {
|
||||
if (ra_noreg(r)) {
|
||||
lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
|
||||
snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o);
|
||||
if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o));
|
||||
return;
|
||||
} else if (irt_isinteger(t)) {
|
||||
setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]);
|
||||
#if !LJ_SOFTFP
|
||||
} else if (irt_isnum(t)) {
|
||||
@ -468,6 +616,148 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
|
||||
}
|
||||
}
|
||||
|
||||
#if LJ_HASFFI
|
||||
/* Restore raw data from the trace exit state. */
|
||||
static void snap_restoredata(GCtrace *T, ExitState *ex,
|
||||
SnapNo snapno, BloomFilter rfilt,
|
||||
IRRef ref, void *dst, CTSize sz)
|
||||
{
|
||||
IRIns *ir = &T->ir[ref];
|
||||
RegSP rs = ir->prev;
|
||||
int32_t *src;
|
||||
union { uint64_t u64; float f; } tmp;
|
||||
if (irref_isk(ref)) {
|
||||
if (ir->o == IR_KNUM || ir->o == IR_KINT64) {
|
||||
src = mref(ir->ptr, int32_t);
|
||||
} else if (sz == 8) {
|
||||
tmp.u64 = (uint64_t)(uint32_t)ir->i;
|
||||
src = (int32_t *)&tmp.u64;
|
||||
} else {
|
||||
src = &ir->i;
|
||||
}
|
||||
} else {
|
||||
if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
|
||||
rs = snap_renameref(T, snapno, ref, rs);
|
||||
if (ra_hasspill(regsp_spill(rs))) {
|
||||
src = &ex->spill[regsp_spill(rs)];
|
||||
} else {
|
||||
Reg r = regsp_reg(rs);
|
||||
if (ra_noreg(r)) {
|
||||
/* Note: this assumes CNEWI is never used for SOFTFP split numbers. */
|
||||
lua_assert(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
|
||||
snap_restoredata(T, ex, snapno, rfilt, ir->op1, dst, 4);
|
||||
*(lua_Number *)dst = (lua_Number)*(int32_t *)dst;
|
||||
return;
|
||||
}
|
||||
src = (int32_t *)&ex->gpr[r-RID_MIN_GPR];
|
||||
#if !LJ_SOFTFP
|
||||
if (r >= RID_MAX_GPR) {
|
||||
src = (int32_t *)&ex->fpr[r-RID_MIN_FPR];
|
||||
#if LJ_TARGET_PPC
|
||||
if (sz == 4) { /* PPC FPRs are always doubles. */
|
||||
tmp.f = (float)*(double *)src;
|
||||
src = (int32_t *)&tmp.f;
|
||||
}
|
||||
#else
|
||||
if (LJ_BE && sz == 4) src++;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
|
||||
if (sz == 4) *(int32_t *)dst = *src;
|
||||
else if (sz == 8) *(int64_t *)dst = *(int64_t *)src;
|
||||
else if (sz == 1) *(int8_t *)dst = (int8_t)*src;
|
||||
else *(int16_t *)dst = (int16_t)*src;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Unsink allocation from the trace exit state. Unsink sunk stores. */
|
||||
static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
|
||||
SnapNo snapno, BloomFilter rfilt,
|
||||
IRIns *ir, TValue *o)
|
||||
{
|
||||
lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP ||
|
||||
ir->o == IR_CNEW || ir->o == IR_CNEWI);
|
||||
#if LJ_HASFFI
|
||||
if (ir->o == IR_CNEW || ir->o == IR_CNEWI) {
|
||||
CTState *cts = ctype_ctsG(J2G(J));
|
||||
CTypeID id = (CTypeID)T->ir[ir->op1].i;
|
||||
CTSize sz = lj_ctype_size(cts, id);
|
||||
GCcdata *cd = lj_cdata_new(cts, id, sz);
|
||||
setcdataV(J->L, o, cd);
|
||||
if (ir->o == IR_CNEWI) {
|
||||
uint8_t *p = (uint8_t *)cdataptr(cd);
|
||||
lua_assert(sz == 4 || sz == 8);
|
||||
if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins && (ir+1)->o == IR_HIOP) {
|
||||
snap_restoredata(T, ex, snapno, rfilt, (ir+1)->op2, LJ_LE?p+4:p, 4);
|
||||
if (LJ_BE) p += 4;
|
||||
sz = 4;
|
||||
}
|
||||
snap_restoredata(T, ex, snapno, rfilt, ir->op2, p, sz);
|
||||
} else {
|
||||
IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref];
|
||||
for (irs = ir+1; irs < irlast; irs++)
|
||||
if (irs->r == RID_SINK && ir + irs->s == irs) {
|
||||
IRIns *iro = &T->ir[T->ir[irs->op1].op2];
|
||||
uint8_t *p = (uint8_t *)cd;
|
||||
CTSize szs;
|
||||
lua_assert(irs->o == IR_XSTORE && T->ir[irs->op1].o == IR_ADD);
|
||||
lua_assert(iro->o == IR_KINT || iro->o == IR_KINT64);
|
||||
if (irt_is64(irs->t)) szs = 8;
|
||||
else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1;
|
||||
else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2;
|
||||
else szs = 4;
|
||||
if (LJ_64 && iro->o == IR_KINT64)
|
||||
p += (int64_t)ir_k64(iro)->u64;
|
||||
else
|
||||
p += iro->i;
|
||||
lua_assert(p >= (uint8_t *)cdataptr(cd) &&
|
||||
p + szs <= (uint8_t *)cdataptr(cd) + sz);
|
||||
if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
|
||||
lua_assert(szs == 4);
|
||||
snap_restoredata(T, ex, snapno, rfilt, (irs+1)->op2, LJ_LE?p+4:p,4);
|
||||
if (LJ_BE) p += 4;
|
||||
}
|
||||
snap_restoredata(T, ex, snapno, rfilt, irs->op2, p, szs);
|
||||
}
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
IRIns *irs, *irlast;
|
||||
GCtab *t = ir->o == IR_TNEW ? lj_tab_new(J->L, ir->op1, ir->op2) :
|
||||
lj_tab_dup(J->L, ir_ktab(&T->ir[ir->op1]));
|
||||
settabV(J->L, o, t);
|
||||
irlast = &T->ir[T->snap[snapno].ref];
|
||||
for (irs = ir+1; irs < irlast; irs++)
|
||||
if (irs->r == RID_SINK && ir + irs->s == irs) {
|
||||
IRIns *irk = &T->ir[irs->op1];
|
||||
TValue tmp, *val;
|
||||
lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
|
||||
irs->o == IR_FSTORE);
|
||||
if (irk->o == IR_FREF) {
|
||||
lua_assert(irk->op2 == IRFL_TAB_META);
|
||||
snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp);
|
||||
/* NOBARRIER: The table is new (marked white). */
|
||||
setgcref(t->metatable, obj2gco(tabV(&tmp)));
|
||||
} else {
|
||||
irk = &T->ir[irk->op2];
|
||||
if (irk->o == IR_KSLOT) irk = &T->ir[irk->op1];
|
||||
lj_ir_kvalue(J->L, &tmp, irk);
|
||||
val = lj_tab_set(J->L, t, &tmp);
|
||||
/* NOBARRIER: The table is new (marked white). */
|
||||
snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val);
|
||||
if (LJ_SOFTFP && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
|
||||
snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp);
|
||||
val->u32.hi = tmp.u32.lo;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Restore interpreter state from exit state with the help of a snapshot. */
|
||||
const BCIns *lj_snap_restore(jit_State *J, void *exptr)
|
||||
{
|
||||
@ -500,10 +790,23 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
|
||||
SnapEntry sn = map[n];
|
||||
if (!(sn & SNAP_NORESTORE)) {
|
||||
TValue *o = &frame[snap_slot(sn)];
|
||||
snap_restoreval(J, T, ex, snapno, rfilt, snap_ref(sn), o);
|
||||
IRRef ref = snap_ref(sn);
|
||||
IRIns *ir = &T->ir[ref];
|
||||
if (ir->r == RID_SUNK) {
|
||||
MSize j;
|
||||
for (j = 0; j < n; j++)
|
||||
if (snap_ref(map[j]) == ref) { /* De-duplicate sunk allocations. */
|
||||
copyTV(L, o, &frame[snap_slot(map[j])]);
|
||||
goto dupslot;
|
||||
}
|
||||
snap_unsink(J, T, ex, snapno, rfilt, ir, o);
|
||||
dupslot:
|
||||
continue;
|
||||
}
|
||||
snap_restoreval(J, T, ex, snapno, rfilt, ref, o);
|
||||
if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) {
|
||||
TValue tmp;
|
||||
snap_restoreval(J, T, ex, snapno, rfilt, snap_ref(sn)+1, &tmp);
|
||||
snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp);
|
||||
o->u32.hi = tmp.u32.lo;
|
||||
} else if ((sn & (SNAP_CONT|SNAP_FRAME))) {
|
||||
/* Overwrite tag with frame link. */
|
||||
@ -528,5 +831,6 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
|
||||
|
||||
#undef IR
|
||||
#undef emitir_raw
|
||||
#undef emitir
|
||||
|
||||
#endif
|
||||
|
@ -16,17 +16,19 @@ typedef uint32_t Reg;
|
||||
|
||||
/* The hi-bit is NOT set for an allocated register. This means the value
|
||||
** can be directly used without masking. The hi-bit is set for a register
|
||||
** allocation hint or for RID_INIT.
|
||||
** allocation hint or for RID_INIT, RID_SINK or RID_SUNK.
|
||||
*/
|
||||
#define RID_NONE 0x80
|
||||
#define RID_MASK 0x7f
|
||||
#define RID_INIT (RID_NONE|RID_MASK)
|
||||
#define RID_SINK (RID_INIT-1)
|
||||
#define RID_SUNK (RID_INIT-2)
|
||||
|
||||
#define ra_noreg(r) ((r) & RID_NONE)
|
||||
#define ra_hasreg(r) (!((r) & RID_NONE))
|
||||
|
||||
/* The ra_hashint() macro assumes a previous test for ra_noreg(). */
|
||||
#define ra_hashint(r) ((r) != RID_INIT)
|
||||
#define ra_hashint(r) ((r) < RID_SUNK)
|
||||
#define ra_gethint(r) ((Reg)((r) & RID_MASK))
|
||||
#define ra_sethint(rr, r) rr = (uint8_t)((r)|RID_NONE)
|
||||
#define ra_samehint(r1, r2) (ra_gethint((r1)^(r2)) == 0)
|
||||
|
@ -606,6 +606,7 @@ static TValue *trace_state(lua_State *L, lua_CFunction dummy, void *ud)
|
||||
J->loopref = J->chain[IR_LOOP]; /* Needed by assembler. */
|
||||
}
|
||||
lj_opt_split(J);
|
||||
lj_opt_sink(J);
|
||||
J->state = LJ_TRACE_ASM;
|
||||
break;
|
||||
|
||||
|
@ -64,6 +64,7 @@
|
||||
#include "lj_opt_dce.c"
|
||||
#include "lj_opt_loop.c"
|
||||
#include "lj_opt_split.c"
|
||||
#include "lj_opt_sink.c"
|
||||
#include "lj_mcode.c"
|
||||
#include "lj_snap.c"
|
||||
#include "lj_record.c"
|
||||
|
Loading…
Reference in New Issue
Block a user