From c4727220e889dccf06427fd6e473741c0829e344 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 15 Mar 2010 17:02:53 +0100 Subject: [PATCH] Add array bounds check elimination (-Oabc, on by default). --- doc/running.html | 2 ++ src/lj_asm.c | 2 +- src/lj_jit.h | 10 +++++--- src/lj_opt_fold.c | 65 +++++++++++++++++++++++++++++++++++++---------- src/lj_record.c | 40 ++++++++++++++++++++++++++++- 5 files changed, 100 insertions(+), 19 deletions(-) diff --git a/doc/running.html b/doc/running.html index 96f1fa09..18e0f30f 100644 --- a/doc/running.html +++ b/doc/running.html @@ -181,6 +181,8 @@ are enabled: dse  •Dead-Store Elimination +abc  •Array Bounds Check Elimination + fuse  •Fusion of operands into instructions

diff --git a/src/lj_asm.c b/src/lj_asm.c index c749ada0..9e8f1fc0 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -2571,7 +2571,7 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) } else { IRRef lref = ir->op1, rref = ir->op2; IROp leftop = (IROp)(IR(lref)->o); - lua_assert(irt_isint(ir->t) || (irt_isaddr(ir->t) && (cc & 0xe) == CC_E)); + lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t)); /* Swap constants (only for ABC) and fusable loads to the right. */ if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) { if ((cc & 0xc) == 0xc) cc ^= 3; /* L <-> G, LE <-> GE */ diff --git a/src/lj_jit.h b/src/lj_jit.h index bd7c7577..76d7942b 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -30,7 +30,7 @@ #endif /* Optimization flags. */ -#define JIT_F_OPT_MASK 0x00ff0000 +#define JIT_F_OPT_MASK 0x0fff0000 #define JIT_F_OPT_FOLD 0x00010000 #define JIT_F_OPT_CSE 0x00020000 @@ -39,18 +39,20 @@ #define JIT_F_OPT_DSE 0x00100000 #define JIT_F_OPT_NARROW 0x00200000 #define JIT_F_OPT_LOOP 0x00400000 -#define JIT_F_OPT_FUSE 0x00800000 +#define JIT_F_OPT_ABC 0x00800000 +#define JIT_F_OPT_FUSE 0x01000000 /* Optimizations names for -O. Must match the order above. */ #define JIT_F_OPT_FIRST JIT_F_OPT_FOLD #define JIT_F_OPTSTRING \ - "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\4fuse" + "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4fuse" /* Optimization levels set a fixed combination of flags. */ #define JIT_F_OPT_0 0 #define JIT_F_OPT_1 (JIT_F_OPT_FOLD|JIT_F_OPT_CSE|JIT_F_OPT_DCE) #define JIT_F_OPT_2 (JIT_F_OPT_1|JIT_F_OPT_NARROW|JIT_F_OPT_LOOP) -#define JIT_F_OPT_3 (JIT_F_OPT_2|JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_FUSE) +#define JIT_F_OPT_3 \ + (JIT_F_OPT_2|JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_FUSE) #define JIT_F_OPT_DEFAULT JIT_F_OPT_3 #if defined(LUA_USE_WIN) || LJ_64 diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index a85b49bc..5eeffae3 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -1,5 +1,6 @@ /* ** FOLD: Constant Folding, Algebraic Simplifications and Reassociation. +** ABCelim: Array Bounds Check Elimination. ** CSE: Common-Subexpression Elimination. ** Copyright (C) 2005-2010 Mike Pall. See Copyright Notice in luajit.h */ @@ -949,31 +950,69 @@ LJFOLDF(reassoc_minmax_right) return NEXTFOLD; } +/* -- Array bounds check elimination -------------------------------------- */ + /* Eliminate ABC across PHIs to handle t[i-1] forwarding case. ** ABC(asize, (i+k)+(-k)) ==> ABC(asize, i), but only if it already exists. ** Could be generalized to (i+k1)+k2 ==> i+(k1+k2), but needs better disambig. */ LJFOLD(ABC any ADD) -LJFOLDF(reassoc_abc) +LJFOLDF(abc_fwd) { - if (irref_isk(fright->op2)) { - IRIns *add2 = IR(fright->op1); - if (add2->o == IR_ADD && irref_isk(add2->op2) && - IR(fright->op2)->i == -IR(add2->op2)->i) { - IRRef ref = J->chain[IR_ABC]; - IRRef lim = add2->op1; - if (fins->op1 > lim) lim = fins->op1; - while (ref > lim) { - IRIns *ir = IR(ref); - if (ir->op1 == fins->op1 && ir->op2 == add2->op1) - return DROPFOLD; - ref = ir->prev; + if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) { + if (irref_isk(fright->op2)) { + IRIns *add2 = IR(fright->op1); + if (add2->o == IR_ADD && irref_isk(add2->op2) && + IR(fright->op2)->i == -IR(add2->op2)->i) { + IRRef ref = J->chain[IR_ABC]; + IRRef lim = add2->op1; + if (fins->op1 > lim) lim = fins->op1; + while (ref > lim) { + IRIns *ir = IR(ref); + if (ir->op1 == fins->op1 && ir->op2 == add2->op1) + return DROPFOLD; + ref = ir->prev; + } } } } return NEXTFOLD; } +/* Eliminate ABC for constants. +** ABC(asize, k1), ABC(asize k2) ==> ABC(asize, max(k1, k2)) +** Drop second ABC if k2 is lower. Otherwise patch first ABC with k2. +*/ +LJFOLD(ABC any KINT) +LJFOLDF(abc_k) +{ + if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) { + IRRef ref = J->chain[IR_ABC]; + IRRef asize = fins->op1; + while (ref > asize) { + IRIns *ir = IR(ref); + if (ir->op1 == asize && irref_isk(ir->op2)) { + int32_t k = IR(ir->op2)->i; + if (fright->i > k) + ir->op2 = fins->op2; + return DROPFOLD; + } + ref = ir->prev; + } + return EMITFOLD; /* Already performed CSE. */ + } + return NEXTFOLD; +} + +/* Eliminate invariant ABC inside loop. */ +LJFOLD(ABC any any) +LJFOLDF(abc_invar) +{ + if (!irt_isint(fins->t) && J->chain[IR_LOOP]) /* Currently marked as PTR. */ + return DROPFOLD; + return NEXTFOLD; +} + /* -- Commutativity ------------------------------------------------------- */ /* The refs of commutative ops are canonicalized. Lower refs go to the right. diff --git a/src/lj_record.c b/src/lj_record.c index 0b0768d6..33272316 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -806,6 +806,44 @@ static void rec_mm_comp(jit_State *J, RecordIndex *ix, int op) /* -- Indexed access ------------------------------------------------------ */ +/* Record bounds-check. */ +static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize) +{ + /* Try to emit invariant bounds checks. */ + if ((J->flags & (JIT_F_OPT_LOOP|JIT_F_OPT_ABC)) == + (JIT_F_OPT_LOOP|JIT_F_OPT_ABC)) { + IRRef ref = tref_ref(ikey); + IRIns *ir = IR(ref); + int32_t ofs = 0; + IRRef ofsref = 0; + /* Handle constant offsets. */ + if (ir->o == IR_ADD && irref_isk(ir->op2)) { + ofsref = ir->op2; + ofs = IR(ofsref)->i; + ref = ir->op1; + ir = IR(ref); + } + /* Got scalar evolution analysis results for this reference? */ + if (ref == J->scev.idx) { + int32_t stop; + lua_assert(irt_isint(J->scev.t) && ir->o == IR_SLOAD); + stop = lj_num2int(numV(&(J->L->base - J->baseslot)[ir->op1 + FORL_STOP])); + /* Runtime value for stop of loop is within bounds? */ + if ((int64_t)stop + ofs < (int64_t)asize) { + /* Emit invariant bounds check for stop. */ + emitir(IRTG(IR_ABC, IRT_PTR), asizeref, ofs == 0 ? J->scev.stop : + emitir(IRTI(IR_ADD), J->scev.stop, ofsref)); + /* Emit invariant bounds check for start, if not const or negative. */ + if (!(J->scev.dir && J->scev.start && + (int64_t)IR(J->scev.start)->i + ofs >= 0)) + emitir(IRTG(IR_ABC, IRT_PTR), asizeref, ikey); + return; + } + } + } + emitir(IRTGI(IR_ABC), asizeref, ikey); /* Emit regular bounds check. */ +} + /* Record indexed key lookup. */ static TRef rec_idx_key(jit_State *J, RecordIndex *ix) { @@ -827,7 +865,7 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix) asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE); if ((MSize)k < t->asize) { /* Currently an array key? */ TRef arrayref; - emitir(IRTGI(IR_ABC), asizeref, ikey); /* Bounds check. */ + rec_idx_abc(J, asizeref, ikey, t->asize); arrayref = emitir(IRT(IR_FLOAD, IRT_PTR), ix->tab, IRFL_TAB_ARRAY); return emitir(IRT(IR_AREF, IRT_PTR), arrayref, ikey); } else { /* Currently not in array (may be an array extension)? */