Add allocation sinking and store sinking optimization.

This commit is contained in:
Mike Pall 2012-07-02 23:47:12 +02:00
parent 79e1eaa73b
commit 0af3f47ba0
15 changed files with 749 additions and 88 deletions

View File

@ -443,7 +443,7 @@ LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o \
lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_api.o \ lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_api.o \
lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o \ lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o \
lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \
lj_opt_dce.o lj_opt_loop.o lj_opt_split.o \ lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \
lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \ lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \
lj_asm.o lj_trace.o lj_gdbjit.o \ lj_asm.o lj_trace.o lj_gdbjit.o \
lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_ccallback.o \ lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_ccallback.o \

View File

@ -142,6 +142,8 @@ lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \
lj_arch.h lj_str.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ lj_arch.h lj_str.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \
lj_dispatch.h lj_traceerr.h lj_vm.h lj_dispatch.h lj_traceerr.h lj_vm.h
lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h
lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \ lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \
lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_ircall.h \ lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_ircall.h \
lj_iropt.h lj_vm.h lj_iropt.h lj_vm.h
@ -153,8 +155,9 @@ lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h
lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \
lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h lj_target_*.h lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \
lj_target_*.h lj_ctype.h lj_cdata.h
lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_meta.h \ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_meta.h \
lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h lj_ir.h \ lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h lj_ir.h \
@ -188,12 +191,13 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \ lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \
lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_lib.h lj_ir.c lj_ircall.h \ lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_lib.h lj_ir.c lj_ircall.h \
lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \ lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \
lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_mcode.c lj_snap.c \ lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c \
lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c lj_crecord.h \ lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \
lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h lj_asm_*.h \ lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \
lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c lib_base.c \ lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \
lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c lib_os.c \ lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \
lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c lib_init.c lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \
lib_init.c
luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h
host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \ host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \
lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \ lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \

View File

@ -374,10 +374,13 @@ local function dump_snap(tr)
end end
-- Return a register name or stack slot for a rid/sp location. -- Return a register name or stack slot for a rid/sp location.
local function ridsp_name(ridsp) local function ridsp_name(ridsp, ins)
if not disass then disass = require("jit.dis_"..jit.arch) end if not disass then disass = require("jit.dis_"..jit.arch) end
local rid = band(ridsp, 0xff) local rid, slot = band(ridsp, 0xff), shr(ridsp, 8)
if ridsp > 255 then return format("[%x]", shr(ridsp, 8)*4) end if rid == 253 or rid == 254 then
return slot == 0 and " {sink" or format(" {%04d", ins-slot)
end
if ridsp > 255 then return format("[%x]", slot*4) end
if rid < 128 then return disass.regname(rid) end if rid < 128 then return disass.regname(rid) end
return "" return ""
end end
@ -458,13 +461,15 @@ local function dump_ir(tr, dumpsnap, dumpreg)
end end
elseif op ~= "NOP " and op ~= "CARG " and elseif op ~= "NOP " and op ~= "CARG " and
(dumpreg or op ~= "RENAME") then (dumpreg or op ~= "RENAME") then
local rid = band(ridsp, 255)
if dumpreg then if dumpreg then
out:write(format("%04d %-5s ", ins, ridsp_name(ridsp))) out:write(format("%04d %-6s", ins, ridsp_name(ridsp, ins)))
else else
out:write(format("%04d ", ins)) out:write(format("%04d ", ins))
end end
out:write(format("%s%s %s %s ", out:write(format("%s%s %s %s ",
band(ot, 128) == 0 and " " or ">", (rid == 254 or rid == 253) and "}" or
(band(ot, 128) == 0 and " " or ">"),
band(ot, 64) == 0 and " " or "+", band(ot, 64) == 0 and " " or "+",
irtype[t], op)) irtype[t], op))
local m1, m2 = band(m, 3), band(m, 3*4) local m1, m2 = band(m, 3), band(m, 3*4)

View File

@ -782,19 +782,44 @@ static int asm_snap_canremat(ASMState *as)
static void asm_snap_alloc1(ASMState *as, IRRef ref) static void asm_snap_alloc1(ASMState *as, IRRef ref)
{ {
IRIns *ir = IR(ref); IRIns *ir = IR(ref);
if (!ra_used(ir)) { if (!(ra_used(ir) || ir->r == RID_SUNK)) {
RegSet allow = (!LJ_SOFTFP && irt_isnum(ir->t)) ? RSET_FPR : RSET_GPR; if (ir->r == RID_SINK) {
/* Get a weak register if we have a free one or can rematerialize. */ ir->r = RID_SUNK;
if ((as->freeset & allow) || #if LJ_HASFFI
(allow == RSET_FPR && asm_snap_canremat(as))) { if (ir->o == IR_CNEWI) { /* Allocate CNEWI value. */
Reg r = ra_allocref(as, ref, allow); /* Allocate a register. */ asm_snap_alloc1(as, ir->op2);
if (!irt_isphi(ir->t)) if (LJ_32 && (ir+1)->o == IR_HIOP)
ra_weak(as, r); /* But mark it as weakly referenced. */ asm_snap_alloc1(as, (ir+1)->op2);
checkmclim(as); }
RA_DBGX((as, "snapreg $f $r", ref, ir->r)); #endif
else { /* Allocate stored values for TNEW, TDUP and CNEW. */
IRIns *irs;
lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW);
for (irs = IR(as->curins); irs > ir; irs--)
if (irs->r == RID_SINK && ir + irs->s == irs) {
lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
irs->o == IR_FSTORE || irs->o == IR_XSTORE);
asm_snap_alloc1(as, irs->op2);
if (LJ_32 && (irs+1)->o == IR_HIOP)
asm_snap_alloc1(as, (irs+1)->op2);
}
}
} else if (ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT) {
asm_snap_alloc1(as, ir->op1);
} else { } else {
ra_spill(as, ir); /* Otherwise force a spill slot. */ RegSet allow = (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR;
RA_DBGX((as, "snapspill $f $s", ref, ir->s)); if ((as->freeset & allow) ||
(allow == RSET_FPR && asm_snap_canremat(as))) {
/* Get a weak register if we have a free one or can rematerialize. */
Reg r = ra_allocref(as, ref, allow); /* Allocate a register. */
if (!irt_isphi(ir->t))
ra_weak(as, r); /* But mark it as weakly referenced. */
checkmclim(as);
RA_DBGX((as, "snapreg $f $r", ref, ir->r));
} else {
ra_spill(as, ir); /* Otherwise force a spill slot. */
RA_DBGX((as, "snapspill $f $s", ref, ir->s));
}
} }
} }
} }
@ -848,7 +873,7 @@ static void asm_snap_prep(ASMState *as)
{ {
if (as->curins < as->snapref) { if (as->curins < as->snapref) {
do { do {
lua_assert(as->snapno != 0); if (as->snapno == 0) return; /* Called by sunk stores before snap #0. */
as->snapno--; as->snapno--;
as->snapref = as->T->snap[as->snapno].ref; as->snapref = as->T->snap[as->snapno].ref;
} while (as->curins < as->snapref); } while (as->curins < as->snapref);
@ -1180,6 +1205,8 @@ static void asm_phi(ASMState *as, IRIns *ir)
RegSet afree = (as->freeset & allow); RegSet afree = (as->freeset & allow);
IRIns *irl = IR(ir->op1); IRIns *irl = IR(ir->op1);
IRIns *irr = IR(ir->op2); IRIns *irr = IR(ir->op2);
if (ir->r == RID_SINK) /* Sink PHI. */
return;
/* Spill slot shuffling is not implemented yet (but rarely needed). */ /* Spill slot shuffling is not implemented yet (but rarely needed). */
if (ra_hasspill(irl->s) || ra_hasspill(irr->s)) if (ra_hasspill(irl->s) || ra_hasspill(irr->s))
lj_trace_err(as->J, LJ_TRERR_NYIPHI); lj_trace_err(as->J, LJ_TRERR_NYIPHI);
@ -1494,7 +1521,7 @@ static void asm_tail_link(ASMState *as)
/* -- Trace setup --------------------------------------------------------- */ /* -- Trace setup --------------------------------------------------------- */
/* Clear reg/sp for all instructions and add register hints. */ /* Clear reg/sp for all instructions and add register hints. */
static void asm_setup_regsp(ASMState *as) static void asm_setup_regsp(ASMState *as, int sink)
{ {
GCtrace *T = as->T; GCtrace *T = as->T;
IRRef nins = T->nins; IRRef nins = T->nins;
@ -1545,6 +1572,14 @@ static void asm_setup_regsp(ASMState *as)
inloop = 0; inloop = 0;
as->evenspill = SPS_FIRST; as->evenspill = SPS_FIRST;
for (lastir = IR(nins); ir < lastir; ir++) { for (lastir = IR(nins); ir < lastir; ir++) {
if (sink) {
if (ir->r == RID_SINK)
continue;
if (ir->r == RID_SUNK) { /* Revert after ASM restart. */
ir->r = RID_SINK;
continue;
}
}
switch (ir->o) { switch (ir->o) {
case IR_LOOP: case IR_LOOP:
inloop = 1; inloop = 1;
@ -1716,6 +1751,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
ASMState as_; ASMState as_;
ASMState *as = &as_; ASMState *as = &as_;
MCode *origtop; MCode *origtop;
int sink;
/* Ensure an initialized instruction beyond the last one for HIOP checks. */ /* Ensure an initialized instruction beyond the last one for HIOP checks. */
J->cur.nins = lj_ir_nextins(J); J->cur.nins = lj_ir_nextins(J);
@ -1736,6 +1772,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
as->mcp = as->mctop; as->mcp = as->mctop;
as->mclim = as->mcbot + MCLIM_REDZONE; as->mclim = as->mcbot + MCLIM_REDZONE;
asm_setup_target(as); asm_setup_target(as);
sink = (IR(REF_BASE)->prev == 1);
do { do {
as->mcp = as->mctop; as->mcp = as->mctop;
@ -1751,7 +1788,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
as->gcsteps = 0; as->gcsteps = 0;
as->sectref = as->loopref; as->sectref = as->loopref;
as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED; as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED;
asm_setup_regsp(as); asm_setup_regsp(as, sink);
if (!as->loopref) if (!as->loopref)
asm_tail_link(as); asm_tail_link(as);

View File

@ -693,6 +693,8 @@ static void asm_newref(ASMState *as, IRIns *ir)
{ {
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
IRRef args[3]; IRRef args[3];
if (ir->r == RID_SINK) /* Sink newref. */
return;
args[0] = ASMREF_L; /* lua_State *L */ args[0] = ASMREF_L; /* lua_State *L */
args[1] = ir->op1; /* GCtab *t */ args[1] = ir->op1; /* GCtab *t */
args[2] = ASMREF_TMP1; /* cTValue *key */ args[2] = ASMREF_TMP1; /* cTValue *key */
@ -836,9 +838,13 @@ static void asm_xload(ASMState *as, IRIns *ir)
static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
{ {
Reg src = ra_alloc1(as, ir->op2, RSET_GPR); if (ir->r == RID_SINK) { /* Sink store. */
asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, asm_snap_prep(as);
rset_exclude(RSET_GPR, src), ofs); } else {
Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
rset_exclude(RSET_GPR, src), ofs);
}
} }
static void asm_ahuvload(ASMState *as, IRIns *ir) static void asm_ahuvload(ASMState *as, IRIns *ir)
@ -876,21 +882,25 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
static void asm_ahustore(ASMState *as, IRIns *ir) static void asm_ahustore(ASMState *as, IRIns *ir)
{ {
RegSet allow = RSET_GPR; if (ir->r == RID_SINK) { /* Sink store. */
Reg idx, src = RID_NONE, type = RID_NONE; asm_snap_prep(as);
int32_t ofs = 0; } else {
int hiop = ((ir+1)->o == IR_HIOP); RegSet allow = RSET_GPR;
if (!irt_ispri(ir->t)) { Reg idx, src = RID_NONE, type = RID_NONE;
src = ra_alloc1(as, ir->op2, allow); int32_t ofs = 0;
rset_clear(allow, src); int hiop = ((ir+1)->o == IR_HIOP);
if (!irt_ispri(ir->t)) {
src = ra_alloc1(as, ir->op2, allow);
rset_clear(allow, src);
}
if (hiop)
type = ra_alloc1(as, (ir+1)->op2, allow);
else
type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type));
if (ra_hasreg(src)) emit_lso(as, ARMI_STR, src, idx, ofs);
emit_lso(as, ARMI_STR, type, idx, ofs+4);
} }
if (hiop)
type = ra_alloc1(as, (ir+1)->op2, allow);
else
type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type));
if (ra_hasreg(src)) emit_lso(as, ARMI_STR, src, idx, ofs);
emit_lso(as, ARMI_STR, type, idx, ofs+4);
} }
static void asm_sload(ASMState *as, IRIns *ir) static void asm_sload(ASMState *as, IRIns *ir)
@ -1382,7 +1392,10 @@ static void asm_hiop(ASMState *as, IRIns *ir)
asm_fpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO); asm_fpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO);
return; return;
} else if ((ir-1)->o == IR_XSTORE) { } else if ((ir-1)->o == IR_XSTORE) {
asm_xstore(as, ir, 4); if ((ir-1)->r == RID_SINK)
asm_snap_prep(as);
else
asm_xstore(as, ir, 4);
return; return;
} }
if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ if (!usehi) return; /* Skip unused hiword op for all remaining ops. */

View File

@ -769,14 +769,18 @@ nolo:
static void asm_newref(ASMState *as, IRIns *ir) static void asm_newref(ASMState *as, IRIns *ir)
{ {
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; if (ir->r == RID_SINK) { /* Sink newref. */
IRRef args[3]; return;
args[0] = ASMREF_L; /* lua_State *L */ } else {
args[1] = ir->op1; /* GCtab *t */ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
args[2] = ASMREF_TMP1; /* cTValue *key */ IRRef args[3];
asm_setupresult(as, ir, ci); /* TValue * */ args[0] = ASMREF_L; /* lua_State *L */
asm_gencall(as, ci, args); args[1] = ir->op1; /* GCtab *t */
asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2); args[2] = ASMREF_TMP1; /* cTValue *key */
asm_setupresult(as, ir, ci); /* TValue * */
asm_gencall(as, ci, args);
asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
}
} }
static void asm_uref(ASMState *as, IRIns *ir) static void asm_uref(ASMState *as, IRIns *ir)
@ -912,9 +916,14 @@ static void asm_xload(ASMState *as, IRIns *ir)
static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
{ {
Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); if (ir->r == RID_SINK) { /* Sink store. */
asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, asm_snap_prep(as);
rset_exclude(RSET_GPR, src), ofs); return;
} else {
Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
rset_exclude(RSET_GPR, src), ofs);
}
} }
static void asm_ahuvload(ASMState *as, IRIns *ir) static void asm_ahuvload(ASMState *as, IRIns *ir)
@ -947,6 +956,10 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
RegSet allow = RSET_GPR; RegSet allow = RSET_GPR;
Reg idx, src = RID_NONE, type = RID_NONE; Reg idx, src = RID_NONE, type = RID_NONE;
int32_t ofs = 0; int32_t ofs = 0;
if (ir->r == RID_SINK) { /* Sink store. */
asm_snap_prep(as);
return;
}
if (irt_isnum(ir->t)) { if (irt_isnum(ir->t)) {
src = ra_alloc1(as, ir->op2, RSET_FPR); src = ra_alloc1(as, ir->op2, RSET_FPR);
} else { } else {
@ -1561,8 +1574,12 @@ static void asm_hiop(ASMState *as, IRIns *ir)
return; return;
} else if ((ir-1)->o == IR_XSTORE) { } else if ((ir-1)->o == IR_XSTORE) {
as->curins--; /* Handle both stores here. */ as->curins--; /* Handle both stores here. */
asm_xstore(as, ir, LJ_LE ? 4 : 0); if ((ir-1)->r == RID_SINK) {
asm_xstore(as, ir-1, LJ_LE ? 0 : 4); asm_snap_prep(as);
} else {
asm_xstore(as, ir, LJ_LE ? 4 : 0);
asm_xstore(as, ir-1, LJ_LE ? 0 : 4);
}
return; return;
} }
if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ if (!usehi) return; /* Skip unused hiword op for all remaining ops. */

View File

@ -773,6 +773,8 @@ static void asm_newref(ASMState *as, IRIns *ir)
{ {
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
IRRef args[3]; IRRef args[3];
if (ir->r == RID_SINK) /* Sink newref. */
return;
args[0] = ASMREF_L; /* lua_State *L */ args[0] = ASMREF_L; /* lua_State *L */
args[1] = ir->op1; /* GCtab *t */ args[1] = ir->op1; /* GCtab *t */
args[2] = ASMREF_TMP1; /* cTValue *key */ args[2] = ASMREF_TMP1; /* cTValue *key */
@ -892,12 +894,16 @@ static void asm_fload(ASMState *as, IRIns *ir)
static void asm_fstore(ASMState *as, IRIns *ir) static void asm_fstore(ASMState *as, IRIns *ir)
{ {
Reg src = ra_alloc1(as, ir->op2, RSET_GPR); if (ir->r == RID_SINK) { /* Sink store. */
IRIns *irf = IR(ir->op1); asm_snap_prep(as);
Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); } else {
int32_t ofs = field_ofs[irf->op2]; Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
PPCIns pi = asm_fxstoreins(ir); IRIns *irf = IR(ir->op1);
emit_tai(as, pi, src, idx, ofs); Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
int32_t ofs = field_ofs[irf->op2];
PPCIns pi = asm_fxstoreins(ir);
emit_tai(as, pi, src, idx, ofs);
}
} }
static void asm_xload(ASMState *as, IRIns *ir) static void asm_xload(ASMState *as, IRIns *ir)
@ -912,6 +918,10 @@ static void asm_xload(ASMState *as, IRIns *ir)
static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
{ {
IRIns *irb; IRIns *irb;
if (ir->r == RID_SINK) { /* Sink store. */
asm_snap_prep(as);
return;
}
if (ofs == 0 && mayfuse(as, ir->op2) && (irb = IR(ir->op2))->o == IR_BSWAP && if (ofs == 0 && mayfuse(as, ir->op2) && (irb = IR(ir->op2))->o == IR_BSWAP &&
ra_noreg(irb->r) && (irt_isint(ir->t) || irt_isu32(ir->t))) { ra_noreg(irb->r) && (irt_isint(ir->t) || irt_isu32(ir->t))) {
/* Fuse BSWAP with XSTORE to stwbrx. */ /* Fuse BSWAP with XSTORE to stwbrx. */
@ -968,6 +978,10 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
RegSet allow = RSET_GPR; RegSet allow = RSET_GPR;
Reg idx, src = RID_NONE, type = RID_NONE; Reg idx, src = RID_NONE, type = RID_NONE;
int32_t ofs = AHUREF_LSX; int32_t ofs = AHUREF_LSX;
if (ir->r == RID_SINK) { /* Sink store. */
asm_snap_prep(as);
return;
}
if (irt_isnum(ir->t)) { if (irt_isnum(ir->t)) {
src = ra_alloc1(as, ir->op2, RSET_FPR); src = ra_alloc1(as, ir->op2, RSET_FPR);
} else { } else {
@ -1747,8 +1761,12 @@ static void asm_hiop(ASMState *as, IRIns *ir)
return; return;
} else if ((ir-1)->o == IR_XSTORE) { } else if ((ir-1)->o == IR_XSTORE) {
as->curins--; /* Handle both stores here. */ as->curins--; /* Handle both stores here. */
asm_xstore(as, ir, 0); if ((ir-1)->r == RID_SINK) {
asm_xstore(as, ir-1, 4); asm_snap_prep(as);
} else {
asm_xstore(as, ir, 0);
asm_xstore(as, ir-1, 4);
}
return; return;
} }
if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ if (!usehi) return; /* Skip unused hiword op for all remaining ops. */

View File

@ -1155,6 +1155,8 @@ static void asm_newref(ASMState *as, IRIns *ir)
IRRef args[3]; IRRef args[3];
IRIns *irkey; IRIns *irkey;
Reg tmp; Reg tmp;
if (ir->r == RID_SINK) /* Sink newref. */
return;
args[0] = ASMREF_L; /* lua_State *L */ args[0] = ASMREF_L; /* lua_State *L */
args[1] = ir->op1; /* GCtab *t */ args[1] = ir->op1; /* GCtab *t */
args[2] = ASMREF_TMP1; /* cTValue *key */ args[2] = ASMREF_TMP1; /* cTValue *key */
@ -1259,6 +1261,10 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
RegSet allow = RSET_GPR; RegSet allow = RSET_GPR;
Reg src = RID_NONE, osrc = RID_NONE; Reg src = RID_NONE, osrc = RID_NONE;
int32_t k = 0; int32_t k = 0;
if (ir->r == RID_SINK) { /* Sink store. */
asm_snap_prep(as);
return;
}
/* The IRT_I16/IRT_U16 stores should never be simplified for constant /* The IRT_I16/IRT_U16 stores should never be simplified for constant
** values since mov word [mem], imm16 has a length-changing prefix. ** values since mov word [mem], imm16 has a length-changing prefix.
*/ */
@ -1372,6 +1378,10 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
static void asm_ahustore(ASMState *as, IRIns *ir) static void asm_ahustore(ASMState *as, IRIns *ir)
{ {
if (ir->r == RID_SINK) { /* Sink store. */
asm_snap_prep(as);
return;
}
if (irt_isnum(ir->t)) { if (irt_isnum(ir->t)) {
Reg src = ra_alloc1(as, ir->op2, RSET_FPR); Reg src = ra_alloc1(as, ir->op2, RSET_FPR);
asm_fuseahuref(as, ir->op1, RSET_GPR); asm_fuseahuref(as, ir->op1, RSET_GPR);
@ -2251,7 +2261,10 @@ static void asm_hiop(ASMState *as, IRIns *ir)
asm_comp_int64(as, ir); asm_comp_int64(as, ir);
return; return;
} else if ((ir-1)->o == IR_XSTORE) { } else if ((ir-1)->o == IR_XSTORE) {
asm_fxstore(as, ir); if ((ir-1)->r == RID_SINK)
asm_snap_prep(as);
else
asm_fxstore(as, ir);
return; return;
} }
if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ if (!usehi) return; /* Skip unused hiword op for all remaining ops. */

View File

@ -154,6 +154,7 @@ LJ_FUNC void lj_opt_split(jit_State *J);
#else #else
#define lj_opt_split(J) UNUSED(J) #define lj_opt_split(J) UNUSED(J)
#endif #endif
LJ_FUNC void lj_opt_sink(jit_State *J);
#endif #endif

View File

@ -63,19 +63,20 @@
#define JIT_F_OPT_NARROW 0x00200000 #define JIT_F_OPT_NARROW 0x00200000
#define JIT_F_OPT_LOOP 0x00400000 #define JIT_F_OPT_LOOP 0x00400000
#define JIT_F_OPT_ABC 0x00800000 #define JIT_F_OPT_ABC 0x00800000
#define JIT_F_OPT_FUSE 0x01000000 #define JIT_F_OPT_SINK 0x01000000
#define JIT_F_OPT_FUSE 0x02000000
/* Optimizations names for -O. Must match the order above. */ /* Optimizations names for -O. Must match the order above. */
#define JIT_F_OPT_FIRST JIT_F_OPT_FOLD #define JIT_F_OPT_FIRST JIT_F_OPT_FOLD
#define JIT_F_OPTSTRING \ #define JIT_F_OPTSTRING \
"\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4fuse" "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse"
/* Optimization levels set a fixed combination of flags. */ /* Optimization levels set a fixed combination of flags. */
#define JIT_F_OPT_0 0 #define JIT_F_OPT_0 0
#define JIT_F_OPT_1 (JIT_F_OPT_FOLD|JIT_F_OPT_CSE|JIT_F_OPT_DCE) #define JIT_F_OPT_1 (JIT_F_OPT_FOLD|JIT_F_OPT_CSE|JIT_F_OPT_DCE)
#define JIT_F_OPT_2 (JIT_F_OPT_1|JIT_F_OPT_NARROW|JIT_F_OPT_LOOP) #define JIT_F_OPT_2 (JIT_F_OPT_1|JIT_F_OPT_NARROW|JIT_F_OPT_LOOP)
#define JIT_F_OPT_3 \ #define JIT_F_OPT_3 (JIT_F_OPT_2|\
(JIT_F_OPT_2|JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_FUSE) JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_SINK|JIT_F_OPT_FUSE)
#define JIT_F_OPT_DEFAULT JIT_F_OPT_3 #define JIT_F_OPT_DEFAULT JIT_F_OPT_3
#if LJ_TARGET_WINDOWS || LJ_64 #if LJ_TARGET_WINDOWS || LJ_64

244
src/lj_opt_sink.c Normal file
View File

@ -0,0 +1,244 @@
/*
** SINK: Allocation Sinking and Store Sinking.
** Copyright (C) 2005-2012 Mike Pall. See Copyright Notice in luajit.h
*/
#define lj_opt_sink_c
#define LUA_CORE
#include "lj_obj.h"
#if LJ_HASJIT
#include "lj_ir.h"
#include "lj_jit.h"
#include "lj_iropt.h"
#include "lj_target.h"
/* Some local macros to save typing. Undef'd at the end. */
#define IR(ref) (&J->cur.ir[(ref)])
/* Check whether the store ref points to an eligible allocation. */
static IRIns *sink_checkalloc(jit_State *J, IRIns *irs)
{
IRIns *ir = IR(irs->op1);
if (!irref_isk(ir->op2))
return NULL; /* Non-constant key. */
if (ir->o == IR_HREFK || ir->o == IR_AREF)
ir = IR(ir->op1);
else if (!(ir->o == IR_HREF || ir->o == IR_NEWREF ||
ir->o == IR_FREF || ir->o == IR_ADD))
return NULL; /* Unhandled reference type (for XSTORE). */
ir = IR(ir->op1);
if (!(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW))
return NULL; /* Not an allocation. */
if (ir + 255 < irs)
return NULL; /* Out of range. */
return ir; /* Return allocation. */
}
/* Recursively check whether a value depends on a PHI. */
static int sink_phidep(jit_State *J, IRRef ref)
{
IRIns *ir = IR(ref);
if (irt_isphi(ir->t)) return 1;
if (ir->op1 >= REF_FIRST && sink_phidep(J, ir->op1)) return 1;
if (ir->op2 >= REF_FIRST && sink_phidep(J, ir->op2)) return 1;
return 0;
}
/* Check whether a value is a sinkable PHI or a non-PHI. */
static int sink_checkphi(jit_State *J, IRIns *ira, IRRef ref)
{
if (ref >= REF_FIRST) {
IRIns *ir = IR(ref);
if (irt_isphi(ir->t) || (ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT &&
irt_isphi(IR(ir->op1)->t))) {
ira->prev++;
return 1; /* Sinkable PHI. */
}
return !sink_phidep(J, ref); /* Must be a non-PHI then. */
}
return 1; /* Constant (non-PHI). */
}
/* Mark non-sinkable allocations using single-pass backward propagation.
**
** Roots for the marking process are:
** - Some PHIs or snapshots (see below).
** - Non-PHI, non-constant values stored to PHI allocations.
** - All guards.
** - Any remaining loads not eliminated by store-to-load forwarding.
** - Stores with non-constant keys.
** - All stored values.
*/
static void sink_mark_ins(jit_State *J)
{
IRIns *ir, *irlast = IR(J->cur.nins-1);
for (ir = irlast ; ; ir--) {
switch (ir->o) {
case IR_BASE:
return; /* Finished. */
case IR_CALLL: /* IRCALL_lj_tab_len */
case IR_ALOAD: case IR_HLOAD: case IR_XLOAD:
irt_setmark(IR(ir->op1)->t); /* Mark ref for remaining loads. */
break;
case IR_FLOAD:
if (irt_ismarked(ir->t) || ir->op2 == IRFL_TAB_META)
irt_setmark(IR(ir->op1)->t); /* Mark table for remaining loads. */
break;
case IR_ASTORE: case IR_HSTORE: case IR_FSTORE: case IR_XSTORE: {
IRIns *ira = sink_checkalloc(J, ir);
if (!ira || (irt_isphi(ira->t) && !sink_checkphi(J, ira, ir->op2)))
irt_setmark(IR(ir->op1)->t); /* Mark ineligible ref. */
irt_setmark(IR(ir->op2)->t); /* Mark stored value. */
break;
}
#if LJ_HASFFI
case IR_CNEWI:
if (irt_isphi(ir->t) &&
(!sink_checkphi(J, ir, ir->op2) ||
(LJ_32 && ir+1 < irlast && (ir+1)->o == IR_HIOP &&
!sink_checkphi(J, ir, (ir+1)->op2))))
irt_setmark(ir->t); /* Mark ineligible allocation. */
/* fallthrough */
#endif
case IR_USTORE:
irt_setmark(IR(ir->op2)->t); /* Mark stored value. */
break;
#if LJ_HASFFI
case IR_CALLXS:
#endif
case IR_CALLS:
irt_setmark(IR(ir->op1)->t); /* Mark (potentially) stored values. */
break;
case IR_PHI: {
IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
irl->prev = irr->prev = 0; /* Clear PHI value counts. */
if (irl->o == irr->o &&
(irl->o == IR_TNEW || irl->o == IR_TDUP ||
(LJ_HASFFI && (irl->o == IR_CNEW || irl->o == IR_CNEWI))))
break;
irt_setmark(irl->t);
irt_setmark(irr->t);
break;
}
default:
if (irt_ismarked(ir->t) || irt_isguard(ir->t)) { /* Propagate mark. */
if (ir->op1 >= REF_FIRST) irt_setmark(IR(ir->op1)->t);
if (ir->op2 >= REF_FIRST) irt_setmark(IR(ir->op2)->t);
}
break;
}
}
}
/* Mark all instructions referenced by a snapshot. */
static void sink_mark_snap(jit_State *J, SnapShot *snap)
{
SnapEntry *map = &J->cur.snapmap[snap->mapofs];
MSize n, nent = snap->nent;
for (n = 0; n < nent; n++) {
IRRef ref = snap_ref(map[n]);
if (!irref_isk(ref))
irt_setmark(IR(ref)->t);
}
}
/* Iteratively remark PHI refs with differing marks or PHI value counts. */
static void sink_remark_phi(jit_State *J)
{
IRIns *ir;
int remark;
do {
remark = 0;
for (ir = IR(J->cur.nins-1); ir->o == IR_PHI; ir--) {
IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
if (((irl->t.irt ^ irr->t.irt) & IRT_MARK))
remark = 1;
else if (irl->prev == irr->prev)
continue;
irt_setmark(IR(ir->op1)->t);
irt_setmark(IR(ir->op2)->t);
}
} while (remark);
}
/* Sweep instructions and mark sunken allocations and stores. */
static void sink_sweep_ins(jit_State *J)
{
IRIns *ir, *irfirst = IR(J->cur.nk);
for (ir = IR(J->cur.nins-1) ; ir >= irfirst; ir--) {
switch (ir->o) {
case IR_ASTORE: case IR_HSTORE: case IR_FSTORE: case IR_XSTORE: {
IRIns *ira = sink_checkalloc(J, ir);
if (ira && !irt_ismarked(ira->t))
ir->prev = REGSP(RID_SINK, (int)(ir - ira));
else
ir->prev = REGSP_INIT;
break;
}
case IR_NEWREF:
if (!irt_ismarked(ir->t)) {
ir->prev = REGSP(RID_SINK, 0);
} else {
irt_clearmark(ir->t);
ir->prev = REGSP_INIT;
}
break;
#if LJ_HASFFI
case IR_CNEW: case IR_CNEWI:
#endif
case IR_TNEW: case IR_TDUP:
if (!irt_ismarked(ir->t)) {
ir->t.irt &= ~IRT_GUARD;
ir->prev = REGSP(RID_SINK, 0);
} else {
irt_clearmark(ir->t);
ir->prev = REGSP_INIT;
}
break;
case IR_PHI: {
IRIns *ira = IR(ir->op2);
if (!irt_ismarked(ira->t) &&
(ira->o == IR_TNEW || ira->o == IR_TDUP ||
(LJ_HASFFI && (ira->o == IR_CNEW || ira->o == IR_CNEWI)))) {
ir->prev = REGSP(RID_SINK, 0);
} else {
ir->prev = REGSP_INIT;
}
break;
}
default:
irt_clearmark(ir->t);
ir->prev = REGSP_INIT;
break;
}
}
IR(REF_BASE)->prev = 1; /* Signal SINK flags to assembler. */
}
/* Allocation sinking and store sinking.
**
** 1. Mark all non-sinkable allocations.
** 2. Then sink all remaining allocations and the related stores.
*/
void lj_opt_sink(jit_State *J)
{
const uint32_t need = (JIT_F_OPT_SINK|JIT_F_OPT_FWD|
JIT_F_OPT_DCE|JIT_F_OPT_CSE|JIT_F_OPT_FOLD);
if ((J->flags & need) == need &&
(J->chain[IR_TNEW] || J->chain[IR_TDUP] ||
(LJ_HASFFI && (J->chain[IR_CNEW] || J->chain[IR_CNEWI])))) {
if (!J->loopref)
sink_mark_snap(J, &J->cur.snap[J->cur.nsnap-1]);
sink_mark_ins(J);
if (J->loopref)
sink_remark_phi(J);
sink_sweep_ins(J);
}
}
#undef IR
#endif

View File

@ -11,6 +11,7 @@
#if LJ_HASJIT #if LJ_HASJIT
#include "lj_gc.h" #include "lj_gc.h"
#include "lj_tab.h"
#include "lj_state.h" #include "lj_state.h"
#include "lj_frame.h" #include "lj_frame.h"
#include "lj_bc.h" #include "lj_bc.h"
@ -20,10 +21,17 @@
#include "lj_trace.h" #include "lj_trace.h"
#include "lj_snap.h" #include "lj_snap.h"
#include "lj_target.h" #include "lj_target.h"
#if LJ_HASFFI
#include "lj_ctype.h"
#include "lj_cdata.h"
#endif
/* Some local macros to save typing. Undef'd at the end. */ /* Some local macros to save typing. Undef'd at the end. */
#define IR(ref) (&J->cur.ir[(ref)]) #define IR(ref) (&J->cur.ir[(ref)])
/* Pass IR on to next optimization in chain (FOLD). */
#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
/* Emit raw IR without passing through optimizations. */ /* Emit raw IR without passing through optimizations. */
#define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J)) #define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J))
@ -370,6 +378,31 @@ static TRef snap_replay_const(jit_State *J, IRIns *ir)
} }
} }
/* De-duplicate parent reference. */
static TRef snap_dedup(jit_State *J, SnapEntry *map, MSize nmax, IRRef ref)
{
MSize j;
for (j = 0; j < nmax; j++)
if (snap_ref(map[j]) == ref)
return J->slot[snap_slot(map[j])];
return 0;
}
/* Emit parent reference with de-duplication. */
static TRef snap_pref(jit_State *J, GCtrace *T, SnapEntry *map, MSize nmax,
BloomFilter seen, IRRef ref)
{
IRIns *ir = &T->ir[ref];
TRef tr;
if (irref_isk(ref))
tr = snap_replay_const(J, ir);
else if (!regsp_used(ir->prev))
tr = 0;
else if (!bloomtest(seen, ref) || (tr = snap_dedup(J, map, nmax, ref)) == 0)
tr = emitir(IRT(IR_PVAL, irt_type(ir->t)), ref - REF_BIAS, 0);
return tr;
}
/* Replay snapshot state to setup side trace. */ /* Replay snapshot state to setup side trace. */
void lj_snap_replay(jit_State *J, GCtrace *T) void lj_snap_replay(jit_State *J, GCtrace *T)
{ {
@ -377,6 +410,7 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
SnapEntry *map = &T->snapmap[snap->mapofs]; SnapEntry *map = &T->snapmap[snap->mapofs];
MSize n, nent = snap->nent; MSize n, nent = snap->nent;
BloomFilter seen = 0; BloomFilter seen = 0;
int pass23 = 0;
J->framedepth = 0; J->framedepth = 0;
/* Emit IR for slots inherited from parent snapshot. */ /* Emit IR for slots inherited from parent snapshot. */
for (n = 0; n < nent; n++) { for (n = 0; n < nent; n++) {
@ -386,21 +420,18 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
IRIns *ir = &T->ir[ref]; IRIns *ir = &T->ir[ref];
TRef tr; TRef tr;
/* The bloom filter avoids O(nent^2) overhead for de-duping slots. */ /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */
if (bloomtest(seen, ref)) { if (bloomtest(seen, ref) && (tr = snap_dedup(J, map, n, ref)) != 0)
MSize j; goto setslot;
for (j = 0; j < n; j++)
if (snap_ref(map[j]) == ref) {
tr = J->slot[snap_slot(map[j])];
goto setslot;
}
}
bloomset(seen, ref); bloomset(seen, ref);
if (irref_isk(ref)) { if (irref_isk(ref)) {
tr = snap_replay_const(J, ir); tr = snap_replay_const(J, ir);
} else if (!regsp_used(ir->prev)) {
pass23 = 1;
lua_assert(s != 0);
tr = s;
} else { } else {
IRType t = irt_type(ir->t); IRType t = irt_type(ir->t);
uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT; uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT;
lua_assert(regsp_used(ir->prev));
if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY); if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY);
tr = emitir_raw(IRT(IR_SLOAD, t), s, mode); tr = emitir_raw(IRT(IR_SLOAD, t), s, mode);
@ -411,13 +442,126 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
if ((sn & SNAP_FRAME)) if ((sn & SNAP_FRAME))
J->baseslot = s+1; J->baseslot = s+1;
} }
if (pass23) {
IRIns *irlast = &T->ir[(snap+1)->ref];
lua_assert(J->exitno+1 < T->nsnap);
pass23 = 0;
/* Emit dependent PVALs. */
for (n = 0; n < nent; n++) {
SnapEntry sn = map[n];
IRRef refp = snap_ref(sn);
IRIns *ir = &T->ir[refp];
if (regsp_reg(ir->r) == RID_SUNK) {
if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue;
pass23 = 1;
lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP ||
ir->o == IR_CNEW || ir->o == IR_CNEWI);
if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1);
if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2);
if (LJ_HASFFI && ir->o == IR_CNEWI) {
if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP)
snap_pref(J, T, map, nent, seen, (ir+1)->op2);
} else {
IRIns *irs;
for (irs = ir+1; irs < irlast; irs++)
if (irs->r == RID_SINK && ir + irs->s == irs) {
if (snap_pref(J, T, map, nent, seen, irs->op2) == 0)
snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1);
else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
irs+1 < irlast && (irs+1)->o == IR_HIOP)
snap_pref(J, T, map, nent, seen, (irs+1)->op2);
}
}
} else if (!irref_isk(refp) && !regsp_used(ir->prev)) {
lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1);
}
}
/* Replay sunk instructions. */
for (n = 0; pass23 && n < nent; n++) {
SnapEntry sn = map[n];
IRRef refp = snap_ref(sn);
IRIns *ir = &T->ir[refp];
if (regsp_reg(ir->r) == RID_SUNK) {
TRef op1, op2;
if (J->slot[snap_slot(sn)] != snap_slot(sn)) { /* De-dup allocs. */
J->slot[snap_slot(sn)] = J->slot[J->slot[snap_slot(sn)]];
continue;
}
op1 = ir->op1;
if (op1 >= T->nk) op1 = snap_pref(J, T, map, nent, seen, op1);
op2 = ir->op2;
if (op2 >= T->nk) op2 = snap_pref(J, T, map, nent, seen, op2);
if (LJ_HASFFI && ir->o == IR_CNEWI) {
if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) {
lj_needsplit(J); /* Emit joining HIOP. */
op2 = emitir_raw(IRT(IR_HIOP, IRT_I64), op2,
snap_pref(J, T, map, nent, seen, (ir+1)->op2));
}
J->slot[snap_slot(sn)] = emitir(ir->ot, op1, op2);
} else {
IRIns *irs;
TRef tr = emitir(ir->ot, op1, op2);
J->slot[snap_slot(sn)] = tr;
for (irs = ir+1; irs < irlast; irs++)
if (irs->r == RID_SINK && ir + irs->s == irs) {
IRIns *irr = &T->ir[irs->op1];
TRef val, key = irr->op2, tmp = tr;
if (irr->o != IR_FREF) {
IRIns *irk = &T->ir[key];
if (irr->o == IR_HREFK)
key = lj_ir_kslot(J, snap_replay_const(J, &T->ir[irk->op1]),
irk->op2);
else
key = snap_replay_const(J, irk);
if (irr->o == IR_HREFK || irr->o == IR_AREF) {
IRIns *irf = &T->ir[irr->op1];
tmp = emitir(irf->ot, tmp, irf->op2);
}
}
tmp = emitir(irr->ot, tmp, key);
val = snap_pref(J, T, map, nent, seen, irs->op2);
if (val == 0) {
IRIns *irc = &T->ir[irs->op2];
lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT);
val = snap_pref(J, T, map, nent, seen, irc->op1);
val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT);
} else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
irs+1 < irlast && (irs+1)->o == IR_HIOP) {
IRType t = IRT_I64;
if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP)
t = IRT_NUM;
if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) {
uint64_t k = (uint32_t)T->ir[irs->op2].i +
((uint64_t)T->ir[(irs+1)->op2].i << 32);
val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM,
lj_ir_k64_find(J, k));
} else {
val = emitir_raw(IRT(IR_HIOP, t), val,
snap_pref(J, T, map, nent, seen, (irs+1)->op2));
}
tmp = emitir(IRT(irs->o, t), tmp, val);
continue;
}
tmp = emitir(irs->ot, tmp, val);
}
}
}
}
}
J->base = J->slot + J->baseslot; J->base = J->slot + J->baseslot;
J->maxslot = snap->nslots - J->baseslot; J->maxslot = snap->nslots - J->baseslot;
lj_snap_add(J); lj_snap_add(J);
if (pass23) /* Need explicit GC step _after_ initial snapshot. */
emitir_raw(IRTG(IR_GCSTEP, IRT_NIL), 0, 0);
} }
/* -- Snapshot restore ---------------------------------------------------- */ /* -- Snapshot restore ---------------------------------------------------- */
static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
SnapNo snapno, BloomFilter rfilt,
IRIns *ir, TValue *o);
/* Restore a value from the trace exit state. */ /* Restore a value from the trace exit state. */
static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
SnapNo snapno, BloomFilter rfilt, SnapNo snapno, BloomFilter rfilt,
@ -450,8 +594,12 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
} }
} else { /* Restore from register. */ } else { /* Restore from register. */
Reg r = regsp_reg(rs); Reg r = regsp_reg(rs);
lua_assert(ra_hasreg(r)); if (ra_noreg(r)) {
if (irt_isinteger(t)) { lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o);
if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o));
return;
} else if (irt_isinteger(t)) {
setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]); setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]);
#if !LJ_SOFTFP #if !LJ_SOFTFP
} else if (irt_isnum(t)) { } else if (irt_isnum(t)) {
@ -468,6 +616,148 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
} }
} }
#if LJ_HASFFI
/* Restore raw data from the trace exit state. */
static void snap_restoredata(GCtrace *T, ExitState *ex,
SnapNo snapno, BloomFilter rfilt,
IRRef ref, void *dst, CTSize sz)
{
IRIns *ir = &T->ir[ref];
RegSP rs = ir->prev;
int32_t *src;
union { uint64_t u64; float f; } tmp;
if (irref_isk(ref)) {
if (ir->o == IR_KNUM || ir->o == IR_KINT64) {
src = mref(ir->ptr, int32_t);
} else if (sz == 8) {
tmp.u64 = (uint64_t)(uint32_t)ir->i;
src = (int32_t *)&tmp.u64;
} else {
src = &ir->i;
}
} else {
if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
rs = snap_renameref(T, snapno, ref, rs);
if (ra_hasspill(regsp_spill(rs))) {
src = &ex->spill[regsp_spill(rs)];
} else {
Reg r = regsp_reg(rs);
if (ra_noreg(r)) {
/* Note: this assumes CNEWI is never used for SOFTFP split numbers. */
lua_assert(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
snap_restoredata(T, ex, snapno, rfilt, ir->op1, dst, 4);
*(lua_Number *)dst = (lua_Number)*(int32_t *)dst;
return;
}
src = (int32_t *)&ex->gpr[r-RID_MIN_GPR];
#if !LJ_SOFTFP
if (r >= RID_MAX_GPR) {
src = (int32_t *)&ex->fpr[r-RID_MIN_FPR];
#if LJ_TARGET_PPC
if (sz == 4) { /* PPC FPRs are always doubles. */
tmp.f = (float)*(double *)src;
src = (int32_t *)&tmp.f;
}
#else
if (LJ_BE && sz == 4) src++;
#endif
}
#endif
}
}
lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
if (sz == 4) *(int32_t *)dst = *src;
else if (sz == 8) *(int64_t *)dst = *(int64_t *)src;
else if (sz == 1) *(int8_t *)dst = (int8_t)*src;
else *(int16_t *)dst = (int16_t)*src;
}
#endif
/* Unsink allocation from the trace exit state. Unsink sunk stores. */
static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
SnapNo snapno, BloomFilter rfilt,
IRIns *ir, TValue *o)
{
lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP ||
ir->o == IR_CNEW || ir->o == IR_CNEWI);
#if LJ_HASFFI
if (ir->o == IR_CNEW || ir->o == IR_CNEWI) {
CTState *cts = ctype_ctsG(J2G(J));
CTypeID id = (CTypeID)T->ir[ir->op1].i;
CTSize sz = lj_ctype_size(cts, id);
GCcdata *cd = lj_cdata_new(cts, id, sz);
setcdataV(J->L, o, cd);
if (ir->o == IR_CNEWI) {
uint8_t *p = (uint8_t *)cdataptr(cd);
lua_assert(sz == 4 || sz == 8);
if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins && (ir+1)->o == IR_HIOP) {
snap_restoredata(T, ex, snapno, rfilt, (ir+1)->op2, LJ_LE?p+4:p, 4);
if (LJ_BE) p += 4;
sz = 4;
}
snap_restoredata(T, ex, snapno, rfilt, ir->op2, p, sz);
} else {
IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref];
for (irs = ir+1; irs < irlast; irs++)
if (irs->r == RID_SINK && ir + irs->s == irs) {
IRIns *iro = &T->ir[T->ir[irs->op1].op2];
uint8_t *p = (uint8_t *)cd;
CTSize szs;
lua_assert(irs->o == IR_XSTORE && T->ir[irs->op1].o == IR_ADD);
lua_assert(iro->o == IR_KINT || iro->o == IR_KINT64);
if (irt_is64(irs->t)) szs = 8;
else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1;
else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2;
else szs = 4;
if (LJ_64 && iro->o == IR_KINT64)
p += (int64_t)ir_k64(iro)->u64;
else
p += iro->i;
lua_assert(p >= (uint8_t *)cdataptr(cd) &&
p + szs <= (uint8_t *)cdataptr(cd) + sz);
if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
lua_assert(szs == 4);
snap_restoredata(T, ex, snapno, rfilt, (irs+1)->op2, LJ_LE?p+4:p,4);
if (LJ_BE) p += 4;
}
snap_restoredata(T, ex, snapno, rfilt, irs->op2, p, szs);
}
}
} else
#endif
{
IRIns *irs, *irlast;
GCtab *t = ir->o == IR_TNEW ? lj_tab_new(J->L, ir->op1, ir->op2) :
lj_tab_dup(J->L, ir_ktab(&T->ir[ir->op1]));
settabV(J->L, o, t);
irlast = &T->ir[T->snap[snapno].ref];
for (irs = ir+1; irs < irlast; irs++)
if (irs->r == RID_SINK && ir + irs->s == irs) {
IRIns *irk = &T->ir[irs->op1];
TValue tmp, *val;
lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
irs->o == IR_FSTORE);
if (irk->o == IR_FREF) {
lua_assert(irk->op2 == IRFL_TAB_META);
snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp);
/* NOBARRIER: The table is new (marked white). */
setgcref(t->metatable, obj2gco(tabV(&tmp)));
} else {
irk = &T->ir[irk->op2];
if (irk->o == IR_KSLOT) irk = &T->ir[irk->op1];
lj_ir_kvalue(J->L, &tmp, irk);
val = lj_tab_set(J->L, t, &tmp);
/* NOBARRIER: The table is new (marked white). */
snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val);
if (LJ_SOFTFP && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp);
val->u32.hi = tmp.u32.lo;
}
}
}
}
}
/* Restore interpreter state from exit state with the help of a snapshot. */ /* Restore interpreter state from exit state with the help of a snapshot. */
const BCIns *lj_snap_restore(jit_State *J, void *exptr) const BCIns *lj_snap_restore(jit_State *J, void *exptr)
{ {
@ -500,10 +790,23 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
SnapEntry sn = map[n]; SnapEntry sn = map[n];
if (!(sn & SNAP_NORESTORE)) { if (!(sn & SNAP_NORESTORE)) {
TValue *o = &frame[snap_slot(sn)]; TValue *o = &frame[snap_slot(sn)];
snap_restoreval(J, T, ex, snapno, rfilt, snap_ref(sn), o); IRRef ref = snap_ref(sn);
IRIns *ir = &T->ir[ref];
if (ir->r == RID_SUNK) {
MSize j;
for (j = 0; j < n; j++)
if (snap_ref(map[j]) == ref) { /* De-duplicate sunk allocations. */
copyTV(L, o, &frame[snap_slot(map[j])]);
goto dupslot;
}
snap_unsink(J, T, ex, snapno, rfilt, ir, o);
dupslot:
continue;
}
snap_restoreval(J, T, ex, snapno, rfilt, ref, o);
if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) { if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) {
TValue tmp; TValue tmp;
snap_restoreval(J, T, ex, snapno, rfilt, snap_ref(sn)+1, &tmp); snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp);
o->u32.hi = tmp.u32.lo; o->u32.hi = tmp.u32.lo;
} else if ((sn & (SNAP_CONT|SNAP_FRAME))) { } else if ((sn & (SNAP_CONT|SNAP_FRAME))) {
/* Overwrite tag with frame link. */ /* Overwrite tag with frame link. */
@ -528,5 +831,6 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
#undef IR #undef IR
#undef emitir_raw #undef emitir_raw
#undef emitir
#endif #endif

View File

@ -16,17 +16,19 @@ typedef uint32_t Reg;
/* The hi-bit is NOT set for an allocated register. This means the value /* The hi-bit is NOT set for an allocated register. This means the value
** can be directly used without masking. The hi-bit is set for a register ** can be directly used without masking. The hi-bit is set for a register
** allocation hint or for RID_INIT. ** allocation hint or for RID_INIT, RID_SINK or RID_SUNK.
*/ */
#define RID_NONE 0x80 #define RID_NONE 0x80
#define RID_MASK 0x7f #define RID_MASK 0x7f
#define RID_INIT (RID_NONE|RID_MASK) #define RID_INIT (RID_NONE|RID_MASK)
#define RID_SINK (RID_INIT-1)
#define RID_SUNK (RID_INIT-2)
#define ra_noreg(r) ((r) & RID_NONE) #define ra_noreg(r) ((r) & RID_NONE)
#define ra_hasreg(r) (!((r) & RID_NONE)) #define ra_hasreg(r) (!((r) & RID_NONE))
/* The ra_hashint() macro assumes a previous test for ra_noreg(). */ /* The ra_hashint() macro assumes a previous test for ra_noreg(). */
#define ra_hashint(r) ((r) != RID_INIT) #define ra_hashint(r) ((r) < RID_SUNK)
#define ra_gethint(r) ((Reg)((r) & RID_MASK)) #define ra_gethint(r) ((Reg)((r) & RID_MASK))
#define ra_sethint(rr, r) rr = (uint8_t)((r)|RID_NONE) #define ra_sethint(rr, r) rr = (uint8_t)((r)|RID_NONE)
#define ra_samehint(r1, r2) (ra_gethint((r1)^(r2)) == 0) #define ra_samehint(r1, r2) (ra_gethint((r1)^(r2)) == 0)

View File

@ -606,6 +606,7 @@ static TValue *trace_state(lua_State *L, lua_CFunction dummy, void *ud)
J->loopref = J->chain[IR_LOOP]; /* Needed by assembler. */ J->loopref = J->chain[IR_LOOP]; /* Needed by assembler. */
} }
lj_opt_split(J); lj_opt_split(J);
lj_opt_sink(J);
J->state = LJ_TRACE_ASM; J->state = LJ_TRACE_ASM;
break; break;

View File

@ -64,6 +64,7 @@
#include "lj_opt_dce.c" #include "lj_opt_dce.c"
#include "lj_opt_loop.c" #include "lj_opt_loop.c"
#include "lj_opt_split.c" #include "lj_opt_split.c"
#include "lj_opt_sink.c"
#include "lj_mcode.c" #include "lj_mcode.c"
#include "lj_snap.c" #include "lj_snap.c"
#include "lj_record.c" #include "lj_record.c"