From 847b9cf2534233198e0229c209ab4c4040f5166e Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 13 Sep 2010 01:17:38 +0200 Subject: [PATCH] Record vararg expressions with varargs defined off-trace. Add SLOAD variant to access the frame type/size. --- lib/dump.lua | 5 ++- src/lj_asm.c | 47 +++++++++++++++++----------- src/lj_ir.h | 9 +++--- src/lj_opt_fold.c | 9 ++++-- src/lj_record.c | 80 +++++++++++++++++++++++++++++++++-------------- 5 files changed, 102 insertions(+), 48 deletions(-) diff --git a/lib/dump.lua b/lib/dump.lua index 82a6d964..a00862d0 100644 --- a/lib/dump.lua +++ b/lib/dump.lua @@ -210,7 +210,10 @@ local colorize, irtype -- Lookup table to convert some literals into names. local litname = { ["SLOAD "] = { [0] = "", "I", "R", "RI", "P", "PI", "PR", "PRI", - "T", "IT", "RT", "RIT", "PT", "PIT", "PRT", "PRIT", }, + "T", "IT", "RT", "RIT", "PT", "PIT", "PRT", "PRIT", + "F", "IF", "RF", "RIF", "PF", "PIF", "PRF", "PRIF", + "TF", "ITF", "RTF", "RITF", "PTF", "PITF", "PRTF", "PRITF", + }, ["XLOAD "] = { [0] = "", "R", "U", "RU", }, ["TOINT "] = { [0] = "check", "index", "", }, ["FLOAD "] = vmdef.irfield, diff --git a/src/lj_asm.c b/src/lj_asm.c index 7773abe1..d26d0b4b 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -1110,23 +1110,34 @@ static int noconflict(ASMState *as, IRRef ref, IROp conflict) return 1; /* Ok, no conflict. */ } +/* Fuse array base into memory operand. */ +static IRRef asm_fuseabase(ASMState *as, IRRef ref) +{ + IRIns *irb = IR(ref); + as->mrm.ofs = 0; + if (irb->o == IR_FLOAD) { + IRIns *ira = IR(irb->op1); + lua_assert(irb->op2 == IRFL_TAB_ARRAY); + /* We can avoid the FLOAD of t->array for colocated arrays. */ + if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE && + noconflict(as, irb->op1, IR_NEWREF)) { + as->mrm.ofs = (int32_t)sizeof(GCtab); /* Ofs to colocated array. */ + return irb->op1; /* Table obj. */ + } + } else if (irb->o == IR_ADD && irref_isk(irb->op2)) { + /* Fuse base offset (vararg load). */ + as->mrm.ofs = IR(irb->op2)->i; + return irb->op1; + } + return ref; /* Otherwise use the given array base. */ +} + /* Fuse array reference into memory operand. */ static void asm_fusearef(ASMState *as, IRIns *ir, RegSet allow) { - IRIns *irb = IR(ir->op1); - IRIns *ira, *irx; + IRIns *irx; lua_assert(ir->o == IR_AREF); - lua_assert(irb->o == IR_FLOAD && irb->op2 == IRFL_TAB_ARRAY); - ira = IR(irb->op1); - if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE && - noconflict(as, irb->op1, IR_NEWREF)) { - /* We can avoid the FLOAD of t->array for colocated arrays. */ - as->mrm.base = (uint8_t)ra_alloc1(as, irb->op1, allow); /* Table obj. */ - as->mrm.ofs = (int32_t)sizeof(GCtab); /* Ofs to colocated array. */ - } else { - as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow); /* Array base. */ - as->mrm.ofs = 0; - } + as->mrm.base = (uint8_t)ra_alloc1(as, asm_fuseabase(as, ir->op1), allow); irx = IR(ir->op2); if (irref_isk(ir->op2)) { as->mrm.ofs += 8*irx->i; @@ -1277,10 +1288,10 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) } else if (mayfuse(as, ref)) { RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR; if (ir->o == IR_SLOAD) { - if (!irt_isint(ir->t) && !(ir->op2 & IRSLOAD_PARENT) && - noconflict(as, ref, IR_RETF)) { + if ((!irt_isint(ir->t) || (ir->op2 & IRSLOAD_FRAME)) && + !(ir->op2 & IRSLOAD_PARENT) && noconflict(as, ref, IR_RETF)) { as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow); - as->mrm.ofs = 8*((int32_t)ir->op1-1); + as->mrm.ofs = 8*((int32_t)ir->op1-1) + ((ir->op2&IRSLOAD_FRAME)?4:0); as->mrm.idx = RID_NONE; return RID_MRM; } @@ -2031,7 +2042,7 @@ static void asm_ahustore(ASMState *as, IRIns *ir) static void asm_sload(ASMState *as, IRIns *ir) { - int32_t ofs = 8*((int32_t)ir->op1-1); + int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); IRType1 t = ir->t; Reg base; lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ @@ -2056,7 +2067,7 @@ static void asm_sload(ASMState *as, IRIns *ir) Reg dest = ra_dest(as, ir, allow); base = ra_alloc1(as, REF_BASE, RSET_GPR); lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); - if (irt_isint(t)) + if (irt_isint(t) && !(ir->op2 & IRSLOAD_FRAME)) emit_rmro(as, XO_CVTSD2SI, dest, base, ofs); else if (irt_isnum(t)) emit_rmro(as, XMM_MOVRM(as), dest, base, ofs); diff --git a/src/lj_ir.h b/src/lj_ir.h index 22127806..cc57560d 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h @@ -189,10 +189,11 @@ IRFLDEF(FLENUM) } IRFieldID; /* SLOAD mode bits, stored in op2. */ -#define IRSLOAD_INHERIT 1 /* Inherited by exits/side traces. */ -#define IRSLOAD_READONLY 2 /* Read-only, omit slot store. */ -#define IRSLOAD_PARENT 4 /* Coalesce with parent trace. */ -#define IRSLOAD_TYPECHECK 8 /* Needs type check. */ +#define IRSLOAD_INHERIT 0x01 /* Inherited by exits/side traces. */ +#define IRSLOAD_READONLY 0x02 /* Read-only, omit slot store. */ +#define IRSLOAD_PARENT 0x04 /* Coalesce with parent trace. */ +#define IRSLOAD_TYPECHECK 0x08 /* Needs type check. */ +#define IRSLOAD_FRAME 0x10 /* Load hiword of frame. */ /* XLOAD mode, stored in op2. */ #define IRXLOAD_READONLY 1 /* Load from read-only data. */ diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 312c9cf0..90520d8c 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -1318,8 +1318,13 @@ LJFOLDX(lj_opt_fwd_fload) LJFOLD(SLOAD any any) LJFOLDF(fwd_sload) { - lua_assert(J->slot[fins->op1] != 0); - return J->slot[fins->op1]; + if ((fins->op2 & IRSLOAD_FRAME)) { + TRef tr = lj_opt_cse(J); + return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr; + } else { + lua_assert(J->slot[fins->op1] != 0); + return J->slot[fins->op1]; + } } LJFOLD(XLOAD KPTR any) diff --git a/src/lj_record.c b/src/lj_record.c index 15d72440..739279ad 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -650,29 +650,6 @@ static void rec_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) lua_assert(J->baseslot >= 1); } -/* -- Vararg handling ----------------------------------------------------- */ - -/* Record vararg instruction. */ -static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) -{ - ptrdiff_t nvararg = frame_delta(J->L->base-1) - J->pt->numparams - 1; - lua_assert(frame_isvarg(J->L->base-1)); - if (J->framedepth == 0) { /* NYI: unknown number of varargs. */ - setintV(&J->errinfo, BC_VARG); - lj_trace_err_info(J, LJ_TRERR_NYIBC); - } else { /* Simple case: known fixed number of varargs defined on-trace. */ - ptrdiff_t i; - if (nresults == -1) { - nresults = nvararg; - J->maxslot = dst + nvararg; - } else if (dst + nresults > J->maxslot) { - J->maxslot = dst + nresults; - } - for (i = 0; i < nresults; i++) - J->base[dst+i] = i < nvararg ? J->base[i - nvararg - 1] : TREF_NIL; - } -} - /* -- Metamethod handling ------------------------------------------------- */ /* Prepare to record call to metamethod. */ @@ -1928,6 +1905,63 @@ static void rec_func_jit(jit_State *J, TraceNo lnk) rec_stop(J, lnk); /* Link to the function. */ } +/* -- Vararg handling ----------------------------------------------------- */ + +/* Record vararg instruction. */ +static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) +{ + int32_t numparams = J->pt->numparams; + ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1; + lua_assert(frame_isvarg(J->L->base-1)); + if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */ + ptrdiff_t i; + if (nvararg < 0) nvararg = 0; + if (nresults == -1) { + nresults = nvararg; + J->maxslot = dst + (BCReg)nvararg; + } else if (dst + nresults > J->maxslot) { + J->maxslot = dst + (BCReg)nresults; + } + for (i = 0; i < nresults; i++) { + J->base[dst+i] = i < nvararg ? J->base[i - nvararg - 1] : TREF_NIL; + lua_assert(J->base[dst+i] != 0); + } + } else { /* Unknown number of varargs passed to trace. */ + TRef fr = emitir(IRTI(IR_SLOAD), 0, IRSLOAD_READONLY|IRSLOAD_FRAME); + int32_t frofs = 8*(1+numparams)+FRAME_VARG; + if (nresults >= 0) { /* Known fixed number of results. */ + ptrdiff_t i; + if (nvararg > 0) { + TRef vbase; + if (nvararg >= nresults) + emitir(IRTGI(IR_GE), fr, lj_ir_kint(J, frofs+8*(int32_t)nresults)); + else + emitir(IRTGI(IR_EQ), fr, lj_ir_kint(J, frame_ftsz(J->L->base-1))); + vbase = emitir(IRTI(IR_SUB), REF_BASE, fr); + vbase = emitir(IRT(IR_ADD, IRT_PTR), vbase, lj_ir_kint(J, frofs-8)); + for (i = 0; i < nvararg; i++) { + IRType t = itype2irt(&J->L->base[i-1-nvararg]); + TRef aref = emitir(IRT(IR_AREF, IRT_PTR), + vbase, lj_ir_kint(J, (int32_t)i)); + TRef tr = emitir(IRTG(IR_ALOAD, t), aref, 0); + if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ + J->base[dst+i] = tr; + } + } else { + emitir(IRTGI(IR_LE), fr, lj_ir_kint(J, frofs)); + nvararg = 0; + } + for (i = nvararg; i < nresults; i++) + J->base[dst+i] = TREF_NIL; + if (dst + (BCReg)nresults > J->maxslot) + J->maxslot = dst + (BCReg)nresults; + } else { + setintV(&J->errinfo, BC_VARG); + lj_trace_err_info(J, LJ_TRERR_NYIBC); + } + } +} + /* -- Record allocations -------------------------------------------------- */ static TRef rec_tnew(jit_State *J, uint32_t ah)