From 47fa9a8d8ff7123e160abfc18c88589bbb7f4b58 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Fri, 26 Apr 2013 13:47:41 +0200 Subject: [PATCH] Change semantics of buffer ops to simplify CSE and DCE. --- src/lj_asm.c | 27 +++++++++---------- src/lj_ffrecord.c | 4 +-- src/lj_ir.h | 4 +-- src/lj_ircall.h | 14 +++++----- src/lj_opt_fold.c | 67 +++++++++++++++++++++++++---------------------- src/lj_record.c | 2 +- 6 files changed, 60 insertions(+), 58 deletions(-) diff --git a/src/lj_asm.c b/src/lj_asm.c index 6ff32940..73df6850 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -1039,20 +1039,18 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref); static void asm_bufhdr(ASMState *as, IRIns *ir) { - if (ra_used(ir)) { - Reg sb = ra_dest(as, ir, RSET_GPR); - if (!(ir->op2 & IRBUFHDR_APPEND)) { - Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); - /* Passing ir isn't strictly correct, but it's an IRT_P32, too. */ - emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p)); - emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b)); - } -#if LJ_TARGET_X86ORX64 - ra_left(as, sb, ir->op1); -#else - ra_leftov(as, sb, ir->op1); -#endif + Reg sb = ra_dest(as, ir, RSET_GPR); + if (!(ir->op2 & IRBUFHDR_APPEND)) { + Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); + /* Passing ir isn't strictly correct, but it's an IRT_P32, too. */ + emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p)); + emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b)); } +#if LJ_TARGET_X86ORX64 + ra_left(as, sb, ir->op1); +#else + ra_leftov(as, sb, ir->op1); +#endif } static void asm_bufput(ASMState *as, IRIns *ir) @@ -1061,7 +1059,6 @@ static void asm_bufput(ASMState *as, IRIns *ir) IRRef args[3]; IRIns *irs; int kchar = -1; - if (!ra_used(ir)) return; args[0] = ir->op1; /* SBuf * */ args[1] = ir->op2; /* GCstr * */ irs = IR(ir->op2); @@ -1107,7 +1104,7 @@ static void asm_bufstr(ASMState *as, IRIns *ir) { const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr]; IRRef args[1]; - args[0] = ir->op2; /* SBuf *sb */ + args[0] = ir->op1; /* SBuf *sb */ as->gcsteps++; asm_setupresult(as, ir, ci); /* GCstr * */ asm_gencall(as, ci, args); diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index 69423760..a6ce2df7 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c @@ -766,7 +766,7 @@ static void LJ_FASTCALL recff_string_char(jit_State *J, RecordFFData *rd) TRef tr = hdr; for (i = 0; J->base[i] != 0; i++) tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, J->base[i]); - J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), hdr, tr); + J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); } UNUSED(rd); } @@ -777,7 +777,7 @@ static void LJ_FASTCALL recff_string_op(jit_State *J, RecordFFData *rd) TRef hdr = emitir(IRT(IR_BUFHDR, IRT_P32), lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET); TRef tr = lj_ir_call(J, rd->data, hdr, str); - J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), hdr, tr); + J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); } /* -- Table library fast functions ---------------------------------------- */ diff --git a/src/lj_ir.h b/src/lj_ir.h index 7ab8ab12..841153d8 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h @@ -121,8 +121,8 @@ _(CNEWI, NW, ref, ref) /* CSE is ok, not marked as A. */ \ \ /* Buffer operations. */ \ - _(BUFHDR, S , ref, lit) \ - _(BUFPUT, S , ref, ref) \ + _(BUFHDR, L , ref, lit) \ + _(BUFPUT, L , ref, ref) \ _(BUFSTR, A , ref, ref) \ \ /* Barriers. */ \ diff --git a/src/lj_ircall.h b/src/lj_ircall.h index 35c063c4..3e190c80 100644 --- a/src/lj_ircall.h +++ b/src/lj_ircall.h @@ -107,13 +107,13 @@ typedef struct CCallInfo { _(ANY, lj_str_fromnum, 2, FN, STR, CCI_L) \ _(ANY, lj_str_fromchar, 2, FN, STR, CCI_L) \ _(ANY, lj_buf_putmem, 3, S, P32, 0) \ - _(ANY, lj_buf_putstr, 2, FS, P32, 0) \ - _(ANY, lj_buf_putchar, 2, FS, P32, 0) \ - _(ANY, lj_buf_putint, 2, FS, P32, 0) \ - _(ANY, lj_buf_putnum, 2, FS, P32, 0) \ - _(ANY, lj_buf_putstr_reverse, 2, FS, P32, 0) \ - _(ANY, lj_buf_putstr_lower, 2, FS, P32, 0) \ - _(ANY, lj_buf_putstr_upper, 2, FS, P32, 0) \ + _(ANY, lj_buf_putstr, 2, FL, P32, 0) \ + _(ANY, lj_buf_putchar, 2, FL, P32, 0) \ + _(ANY, lj_buf_putint, 2, FL, P32, 0) \ + _(ANY, lj_buf_putnum, 2, FL, P32, 0) \ + _(ANY, lj_buf_putstr_reverse, 2, FL, P32, 0) \ + _(ANY, lj_buf_putstr_lower, 2, FL, P32, 0) \ + _(ANY, lj_buf_putstr_upper, 2, FL, P32, 0) \ _(ANY, lj_buf_tostr, 1, FL, STR, 0) \ _(ANY, lj_tab_new1, 2, FS, TAB, CCI_L) \ _(ANY, lj_tab_dup, 2, FS, TAB, CCI_L) \ diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 9c751d98..e9f873b7 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -520,9 +520,23 @@ LJFOLDF(kfold_strcmp) /* -- Constant folding and forwarding for buffers ------------------------- */ -/* Note: buffer ops are not CSEd until the BUFSTR. It's ok to modify them. */ +/* +** Buffer ops perform stores, but their effect is limited to the buffer +** itself. Also, buffer ops are chained: a use of an op implies a use of +** all other ops up the chain. Conversely, if an op is unused, all ops +** up the chain can go unsed. This largely eliminates the need to treat +** them as stores. +** +** Alas, treating them as normal (IRM_N) ops doesn't work, because they +** cannot be CSEd in isolation. CSE for IRM_N is implicitly done in LOOP +** or if FOLD is disabled. +** +** The compromise is to declare them as loads, emit them like stores and +** CSE whole chains manually when the BUFSTR is to be emitted. Any chain +** fragments left over from CSE are eliminated by DCE. +*/ -/* BUFHDR is treated like a store, see below. */ +/* BUFHDR is emitted like a store, see below. */ LJFOLD(BUFPUT BUFHDR BUFSTR) LJFOLDF(bufput_append) @@ -530,14 +544,14 @@ LJFOLDF(bufput_append) /* New buffer, no other buffer op inbetween and same buffer? */ if ((J->flags & JIT_F_OPT_FWD) && !(fleft->op2 & IRBUFHDR_APPEND) && - fleft->prev == fright->op1 && - fleft->op1 == IR(fright->op1)->op1) { + fleft->prev == fright->op2 && + fleft->op1 == IR(fright->op2)->op1) { IRRef ref = fins->op1; IR(ref)->op2 = (fleft->op2 | IRBUFHDR_APPEND); /* Modify BUFHDR. */ - IR(ref)->op1 = fright->op2; + IR(ref)->op1 = fright->op1; return ref; } - return EMITFOLD; /* This is a store and always emitted. */ + return EMITFOLD; /* Always emit, CSE later. */ } LJFOLD(BUFPUT any any) @@ -565,45 +579,36 @@ LJFOLDF(bufput_kgc) } } } - return EMITFOLD; /* This is a store and always emitted. */ + return EMITFOLD; /* Always emit, CSE later. */ } LJFOLD(BUFSTR any any) LJFOLDF(bufstr_kfold_cse) { - lua_assert(fright->o == IR_BUFHDR || fright->o == IR_BUFPUT || - fright->o == IR_CALLS); + lua_assert(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT || + fleft->o == IR_CALLL); if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) { - if (fright->o == IR_BUFHDR) { /* No put operations? */ - if (!(fright->op2 & IRBUFHDR_APPEND)) { /* Empty buffer? */ - lj_ir_rollback(J, fins->op1); /* Eliminate the current chain. */ + if (fleft->o == IR_BUFHDR) { /* No put operations? */ + if (!(fleft->op2 & IRBUFHDR_APPEND)) /* Empty buffer? */ return lj_ir_kstr(J, &J2G(J)->strempty); - } - fins->op2 = fright->prev; /* Relies on checks in bufput_append. */ + fins->op1 = fleft->prev; /* Relies on checks in bufput_append. */ return CSEFOLD; - } else if (fright->o == IR_BUFPUT) { - IRIns *irb = IR(fright->op1); - if (irb->o == IR_BUFHDR && !(irb->op2 & IRBUFHDR_APPEND)) { - lj_ir_rollback(J, fins->op1); /* Eliminate the current chain. */ - return fright->op2; /* Shortcut for a single put operation. */ - } + } else if (fleft->o == IR_BUFPUT) { + IRIns *irb = IR(fleft->op1); + if (irb->o == IR_BUFHDR && !(irb->op2 & IRBUFHDR_APPEND)) + return fleft->op2; /* Shortcut for a single put operation. */ } } /* Try to CSE the whole chain. */ if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) { IRRef ref = J->chain[IR_BUFSTR]; while (ref) { - IRRef last = fins->op2; - IRIns *irs = IR(ref), *ira = fright, *irb = IR(irs->op2); + IRRef last = fins->op1; + IRIns *irs = IR(ref), *ira = fleft, *irb = IR(irs->op1); while (ira->o == irb->o && ira->op2 == irb->op2) { if (ira->o == IR_BUFHDR && !(ira->op2 & IRBUFHDR_APPEND)) { - IRIns *irh; - for (irh = IR(ira->prev); irh != irb; irh = IR(irh->prev)) - if (irh->op1 == irs->op2) - return ref; /* Do CSE, but avoid rollback if append follows. */ - lj_ir_rollback(J, last); /* Eliminate the current chain. */ return ref; /* CSE succeeded. */ - } else if (ira->o == IR_CALLS) { + } else if (ira->o == IR_CALLL) { ira = IR(ira->op1); irb = IR(irb->op1); lua_assert(ira->o == IR_CARG && irb->o == IR_CARG); if (ira->op2 != irb->op2) break; @@ -618,9 +623,9 @@ LJFOLDF(bufstr_kfold_cse) return EMITFOLD; /* No CSE possible. */ } -LJFOLD(CALLS CARG IRCALL_lj_buf_putstr_reverse) -LJFOLD(CALLS CARG IRCALL_lj_buf_putstr_upper) -LJFOLD(CALLS CARG IRCALL_lj_buf_putstr_lower) +LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_reverse) +LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_upper) +LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_lower) LJFOLDF(bufput_kfold_op) { if (irref_isk(fleft->op2)) { diff --git a/src/lj_record.c b/src/lj_record.c index 1beaa75f..8dc102e9 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -1622,7 +1622,7 @@ static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot) do { tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, *trp++); } while (trp <= top); - tr = emitir(IRT(IR_BUFSTR, IRT_STR), hdr, tr); + tr = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); J->maxslot = (BCReg)(xbase - J->base); if (xbase == base) return tr; }