Optimize table length computation with hinting.

10x faster on loop with t[#t+1] = x idiom. Also used by table.insert.
This commit is contained in:
Mike Pall 2020-05-27 19:20:44 +02:00
parent b2307c8ad8
commit 1a4ff13117
12 changed files with 135 additions and 82 deletions

View File

@ -1634,6 +1634,12 @@ static void asm_fuseequal(ASMState *as, IRIns *ir)
} }
} }
static void asm_alen(ASMState *as, IRIns *ir)
{
asm_callid(as, ir, ir->op2 == REF_NIL ? IRCALL_lj_tab_len :
IRCALL_lj_tab_len_hint);
}
/* -- Instruction dispatch ------------------------------------------------ */ /* -- Instruction dispatch ------------------------------------------------ */
/* Assemble a single instruction. */ /* Assemble a single instruction. */
@ -1716,6 +1722,7 @@ static void asm_ir(ASMState *as, IRIns *ir)
case IR_FLOAD: asm_fload(as, ir); break; case IR_FLOAD: asm_fload(as, ir); break;
case IR_XLOAD: asm_xload(as, ir); break; case IR_XLOAD: asm_xload(as, ir); break;
case IR_SLOAD: asm_sload(as, ir); break; case IR_SLOAD: asm_sload(as, ir); break;
case IR_ALEN: asm_alen(as, ir); break;
case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
case IR_FSTORE: asm_fstore(as, ir); break; case IR_FSTORE: asm_fstore(as, ir); break;

View File

@ -281,7 +281,7 @@ static void LJ_FASTCALL recff_rawlen(jit_State *J, RecordFFData *rd)
if (tref_isstr(tr)) if (tref_isstr(tr))
J->base[0] = emitir(IRTI(IR_FLOAD), tr, IRFL_STR_LEN); J->base[0] = emitir(IRTI(IR_FLOAD), tr, IRFL_STR_LEN);
else if (tref_istab(tr)) else if (tref_istab(tr))
J->base[0] = lj_ir_call(J, IRCALL_lj_tab_len, tr); J->base[0] = emitir(IRTI(IR_ALEN), tr, TREF_NIL);
/* else: Interpreter will throw. */ /* else: Interpreter will throw. */
UNUSED(rd); UNUSED(rd);
} }
@ -1026,7 +1026,7 @@ static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd)
rd->nres = 0; rd->nres = 0;
if (tref_istab(ix.tab) && ix.val) { if (tref_istab(ix.tab) && ix.val) {
if (!J->base[2]) { /* Simple push: t[#t+1] = v */ if (!J->base[2]) { /* Simple push: t[#t+1] = v */
TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, ix.tab); TRef trlen = emitir(IRTI(IR_ALEN), ix.tab, TREF_NIL);
GCtab *t = tabV(&rd->argv[0]); GCtab *t = tabV(&rd->argv[0]);
ix.key = emitir(IRTI(IR_ADD), trlen, lj_ir_kint(J, 1)); ix.key = emitir(IRTI(IR_ADD), trlen, lj_ir_kint(J, 1));
settabV(J->L, &ix.tabv, t); settabV(J->L, &ix.tabv, t);
@ -1050,7 +1050,7 @@ static void LJ_FASTCALL recff_table_concat(jit_State *J, RecordFFData *rd)
lj_opt_narrow_toint(J, J->base[2]) : lj_ir_kint(J, 1); lj_opt_narrow_toint(J, J->base[2]) : lj_ir_kint(J, 1);
TRef tre = (J->base[1] && J->base[2] && !tref_isnil(J->base[3])) ? TRef tre = (J->base[1] && J->base[2] && !tref_isnil(J->base[3])) ?
lj_opt_narrow_toint(J, J->base[3]) : lj_opt_narrow_toint(J, J->base[3]) :
lj_ir_call(J, IRCALL_lj_tab_len, tab); emitir(IRTI(IR_ALEN), tab, TREF_NIL);
TRef hdr = recff_bufhdr(J); TRef hdr = recff_bufhdr(J);
TRef tr = lj_ir_call(J, IRCALL_lj_buf_puttab, hdr, tab, sep, tri, tre); TRef tr = lj_ir_call(J, IRCALL_lj_buf_puttab, hdr, tab, sep, tri, tre);
emitir(IRTG(IR_NE, IRT_PTR), tr, lj_ir_kptr(J, NULL)); emitir(IRTG(IR_NE, IRT_PTR), tr, lj_ir_kptr(J, NULL));

View File

@ -106,6 +106,7 @@
_(XLOAD, L , ref, lit) \ _(XLOAD, L , ref, lit) \
_(SLOAD, L , lit, lit) \ _(SLOAD, L , lit, lit) \
_(VLOAD, L , ref, ___) \ _(VLOAD, L , ref, ___) \
_(ALEN, L , ref, ref) \
\ \
_(ASTORE, S , ref, ref) \ _(ASTORE, S , ref, ref) \
_(HSTORE, S , ref, ref) \ _(HSTORE, S , ref, ref) \

View File

@ -168,6 +168,7 @@ typedef struct CCallInfo {
_(ANY, lj_tab_clear, 1, FS, NIL, 0) \ _(ANY, lj_tab_clear, 1, FS, NIL, 0) \
_(ANY, lj_tab_newkey, 3, S, PGC, CCI_L) \ _(ANY, lj_tab_newkey, 3, S, PGC, CCI_L) \
_(ANY, lj_tab_len, 1, FL, INT, 0) \ _(ANY, lj_tab_len, 1, FL, INT, 0) \
_(ANY, lj_tab_len_hint, 2, FL, INT, 0) \
_(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \ _(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \
_(ANY, lj_gc_barrieruv, 2, FS, NIL, 0) \ _(ANY, lj_gc_barrieruv, 2, FS, NIL, 0) \
_(ANY, lj_mem_newgco, 2, FS, PGC, CCI_L) \ _(ANY, lj_mem_newgco, 2, FS, PGC, CCI_L) \

View File

@ -120,7 +120,7 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J);
LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J); LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J);
LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J); LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J);
LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J); LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J);
LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J); LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_alen(jit_State *J);
LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J); LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J);
LJ_FUNC int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J); LJ_FUNC int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J);
LJ_FUNC int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim); LJ_FUNC int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim);

View File

@ -2132,8 +2132,8 @@ LJFOLDX(lj_opt_fwd_hload)
LJFOLD(ULOAD any) LJFOLD(ULOAD any)
LJFOLDX(lj_opt_fwd_uload) LJFOLDX(lj_opt_fwd_uload)
LJFOLD(CALLL any IRCALL_lj_tab_len) LJFOLD(ALEN any any)
LJFOLDX(lj_opt_fwd_tab_len) LJFOLDX(lj_opt_fwd_alen)
/* Upvalue refs are really loads, but there are no corresponding stores. /* Upvalue refs are really loads, but there are no corresponding stores.
** So CSE is ok for them, except for UREFO across a GC step (see below). ** So CSE is ok for them, except for UREFO across a GC step (see below).

View File

@ -352,10 +352,12 @@ static void loop_unroll(LoopState *lps)
irr = IR(ref); irr = IR(ref);
goto phiconv; goto phiconv;
} }
} else if (ref != REF_DROP && irr->o == IR_CONV && } else if (ref != REF_DROP && ref > invar &&
ref > invar && irr->op1 < invar) { ((irr->o == IR_CONV && irr->op1 < invar) ||
/* May need an extra PHI for a CONV. */ (irr->o == IR_ALEN && irr->op2 < invar &&
ref = irr->op1; irr->op2 != REF_NIL))) {
/* May need an extra PHI for a CONV or ALEN hint. */
ref = irr->o == IR_CONV ? irr->op1 : irr->op2;
irr = IR(ref); irr = IR(ref);
phiconv: phiconv:
if (ref < invar && !irref_isk(ref) && !irt_isphi(irr->t)) { if (ref < invar && !irref_isk(ref) && !irt_isphi(irr->t)) {

View File

@ -363,7 +363,7 @@ TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J)
IRIns *ir; IRIns *ir;
/* Check for any intervening guards (includes conflicting loads). */ /* Check for any intervening guards (includes conflicting loads). */
for (ir = IR(J->cur.nins-1); ir > store; ir--) for (ir = IR(J->cur.nins-1); ir > store; ir--)
if (irt_isguard(ir->t) || ir->o == IR_CALLL) if (irt_isguard(ir->t) || ir->o == IR_ALEN)
goto doemit; /* No elimination possible. */ goto doemit; /* No elimination possible. */
/* Remove redundant store from chain and replace with NOP. */ /* Remove redundant store from chain and replace with NOP. */
*refp = store->prev; *refp = store->prev;
@ -381,6 +381,67 @@ doemit:
return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
} }
/* ALEN forwarding. */
TRef LJ_FASTCALL lj_opt_fwd_alen(jit_State *J)
{
IRRef tab = fins->op1; /* Table reference. */
IRRef lim = tab; /* Search limit. */
IRRef ref;
/* Search for conflicting HSTORE with numeric key. */
ref = J->chain[IR_HSTORE];
while (ref > lim) {
IRIns *store = IR(ref);
IRIns *href = IR(store->op1);
IRIns *key = IR(href->op2);
if (irt_isnum(key->o == IR_KSLOT ? IR(key->op1)->t : key->t)) {
lim = ref; /* Conflicting store found, limits search for ALEN. */
break;
}
ref = store->prev;
}
/* Try to find a matching ALEN. */
ref = J->chain[IR_ALEN];
while (ref > lim) {
/* CSE for ALEN only depends on the table, not the hint. */
if (IR(ref)->op1 == tab) {
IRRef sref;
/* Search for aliasing table.clear. */
if (!fwd_aa_tab_clear(J, ref, tab))
break;
/* Search for hint-forwarding or conflicting store. */
sref = J->chain[IR_ASTORE];
while (sref > ref) {
IRIns *store = IR(sref);
IRIns *aref = IR(store->op1);
IRIns *fref = IR(aref->op1);
if (tab == fref->op1) { /* ASTORE to the same table. */
/* Detect t[#t+1] = x idiom for push. */
IRIns *idx = IR(aref->op2);
if (!irt_isnil(store->t) &&
idx->o == IR_ADD && idx->op1 == ref &&
IR(idx->op2)->o == IR_KINT && IR(idx->op2)->i == 1) {
/* Note: this requires an extra PHI check in loop unroll. */
fins->op2 = aref->op2; /* Set ALEN hint. */
}
goto doemit; /* Conflicting store, possibly giving a hint. */
} else if (aa_table(J, tab, fref->op1) == ALIAS_NO) {
goto doemit; /* Conflicting store. */
}
sref = store->prev;
}
return ref; /* Plain ALEN forwarding. */
}
ref = IR(ref)->prev;
}
doemit:
return EMITFOLD;
}
/* -- ULOAD forwarding ---------------------------------------------------- */ /* -- ULOAD forwarding ---------------------------------------------------- */
/* The current alias analysis for upvalues is very simplistic. It only /* The current alias analysis for upvalues is very simplistic. It only
@ -430,7 +491,6 @@ TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J)
cselim: cselim:
/* Try to find a matching load. Below the conflicting store, if any. */ /* Try to find a matching load. Below the conflicting store, if any. */
ref = J->chain[IR_ULOAD]; ref = J->chain[IR_ULOAD];
while (ref > lim) { while (ref > lim) {
IRIns *ir = IR(ref); IRIns *ir = IR(ref);
@ -845,39 +905,6 @@ doemit:
return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
} }
/* -- Forwarding of lj_tab_len -------------------------------------------- */
/* This is rather simplistic right now, but better than nothing. */
TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J)
{
IRRef tab = fins->op1; /* Table reference. */
IRRef lim = tab; /* Search limit. */
IRRef ref;
/* Any ASTORE is a conflict and limits the search. */
if (J->chain[IR_ASTORE] > lim) lim = J->chain[IR_ASTORE];
/* Search for conflicting HSTORE with numeric key. */
ref = J->chain[IR_HSTORE];
while (ref > lim) {
IRIns *store = IR(ref);
IRIns *href = IR(store->op1);
IRIns *key = IR(href->op2);
if (irt_isnum(key->o == IR_KSLOT ? IR(key->op1)->t : key->t)) {
lim = ref; /* Conflicting store found, limits search for TLEN. */
break;
}
ref = store->prev;
}
/* Search for aliasing table.clear. */
if (!fwd_aa_tab_clear(J, lim, tab))
return lj_ir_emit(J);
/* Try to find a matching load. Below the conflicting store, if any. */
return lj_opt_cselim(J, lim);
}
/* -- ASTORE/HSTORE previous type analysis -------------------------------- */ /* -- ASTORE/HSTORE previous type analysis -------------------------------- */
/* Check whether the previous value for a table store is non-nil. /* Check whether the previous value for a table store is non-nil.

View File

@ -78,8 +78,7 @@ static void sink_mark_ins(jit_State *J)
switch (ir->o) { switch (ir->o) {
case IR_BASE: case IR_BASE:
return; /* Finished. */ return; /* Finished. */
case IR_CALLL: /* IRCALL_lj_tab_len */ case IR_ALOAD: case IR_HLOAD: case IR_XLOAD: case IR_TBAR: case IR_ALEN:
case IR_ALOAD: case IR_HLOAD: case IR_XLOAD: case IR_TBAR:
irt_setmark(IR(ir->op1)->t); /* Mark ref for remaining loads. */ irt_setmark(IR(ir->op1)->t); /* Mark ref for remaining loads. */
break; break;
case IR_FLOAD: case IR_FLOAD:

View File

@ -1058,7 +1058,7 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv)
lj_record_call(J, func, 2); lj_record_call(J, func, 2);
} else { } else {
if (LJ_52 && tref_istab(tr)) if (LJ_52 && tref_istab(tr))
return lj_ir_call(J, IRCALL_lj_tab_len, tr); return emitir(IRTI(IR_ALEN), tr, TREF_NIL);
lj_trace_err(J, LJ_TRERR_NOMM); lj_trace_err(J, LJ_TRERR_NOMM);
} }
return 0; /* No result yet. */ return 0; /* No result yet. */
@ -2191,7 +2191,7 @@ void lj_record_ins(jit_State *J)
if (tref_isstr(rc)) if (tref_isstr(rc))
rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN); rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN);
else if (!LJ_52 && tref_istab(rc)) else if (!LJ_52 && tref_istab(rc))
rc = lj_ir_call(J, IRCALL_lj_tab_len, rc); rc = emitir(IRTI(IR_ALEN), rc, TREF_NIL);
else else
rc = rec_mm_len(J, rc, rcv); rc = rec_mm_len(J, rc, rcv);
break; break;

View File

@ -639,49 +639,62 @@ int lj_tab_next(lua_State *L, GCtab *t, TValue *key)
/* -- Table length calculation -------------------------------------------- */ /* -- Table length calculation -------------------------------------------- */
static MSize unbound_search(GCtab *t, MSize j) /* Compute table length. Slow path with mixed array/hash lookups. */
LJ_NOINLINE static MSize tab_len_slow(GCtab *t, size_t hi)
{ {
cTValue *tv; cTValue *tv;
MSize i = j; /* i is zero or a present index */ size_t lo = hi;
j++; hi++;
/* find `i' and `j' such that i is present and j is not */ /* Widening search for an upper bound. */
while ((tv = lj_tab_getint(t, (int32_t)j)) && !tvisnil(tv)) { while ((tv = lj_tab_getint(t, (int32_t)hi)) && !tvisnil(tv)) {
i = j; lo = hi;
j *= 2; hi += hi;
if (j > (MSize)(INT_MAX-2)) { /* overflow? */ if (hi > (size_t)(INT_MAX-2)) { /* Punt and do a linear search. */
/* table was built with bad purposes: resort to linear search */ lo = 1;
i = 1; while ((tv = lj_tab_getint(t, (int32_t)lo)) && !tvisnil(tv)) lo++;
while ((tv = lj_tab_getint(t, (int32_t)i)) && !tvisnil(tv)) i++; return (MSize)(lo - 1);
return i - 1;
} }
} }
/* now do a binary search between them */ /* Binary search to find a non-nil to nil transition. */
while (j - i > 1) { while (hi - lo > 1) {
MSize m = (i+j)/2; size_t mid = (lo+hi) >> 1;
cTValue *tvb = lj_tab_getint(t, (int32_t)m); cTValue *tvb = lj_tab_getint(t, (int32_t)mid);
if (tvb && !tvisnil(tvb)) i = m; else j = m; if (tvb && !tvisnil(tvb)) lo = mid; else hi = mid;
} }
return i; return (MSize)lo;
} }
/* /* Compute table length. Fast path. */
** Try to find a boundary in table `t'. A `boundary' is an integer index
** such that t[i] is non-nil and t[i+1] is nil (and 0 if t[1] is nil).
*/
MSize LJ_FASTCALL lj_tab_len(GCtab *t) MSize LJ_FASTCALL lj_tab_len(GCtab *t)
{ {
MSize j = (MSize)t->asize; size_t hi = (size_t)t->asize;
if (j > 1 && tvisnil(arrayslot(t, j-1))) { if (hi) hi--;
MSize i = 1; /* In a growing array the last array element is very likely nil. */
while (j - i > 1) { if (hi > 0 && LJ_LIKELY(tvisnil(arrayslot(t, hi)))) {
MSize m = (i+j)/2; /* Binary search to find a non-nil to nil transition in the array. */
if (tvisnil(arrayslot(t, m-1))) j = m; else i = m; size_t lo = 0;
while (hi - lo > 1) {
size_t mid = (lo+hi) >> 1;
if (tvisnil(arrayslot(t, mid))) hi = mid; else lo = mid;
} }
return i-1; return (MSize)lo;
} }
if (j) j--; /* Without a hash part, there's an implicit nil after the last element. */
if (t->hmask <= 0) return t->hmask ? tab_len_slow(t, hi) : (MSize)hi;
return j;
return unbound_search(t, j);
} }
#if LJ_HASJIT
/* Verify hinted table length or compute it. */
MSize LJ_FASTCALL lj_tab_len_hint(GCtab *t, size_t hint)
{
size_t asize = (size_t)t->asize;
cTValue *tv = arrayslot(t, hint);
if (LJ_LIKELY(hint+1 < asize)) {
if (LJ_LIKELY(!tvisnil(tv) && tvisnil(tv+1))) return (MSize)hint;
} else if (hint+1 <= asize && LJ_LIKELY(t->hmask == 0) && !tvisnil(tv)) {
return (MSize)hint;
}
return lj_tab_len(t);
}
#endif

View File

@ -69,5 +69,8 @@ LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key);
LJ_FUNCA int lj_tab_next(lua_State *L, GCtab *t, TValue *key); LJ_FUNCA int lj_tab_next(lua_State *L, GCtab *t, TValue *key);
LJ_FUNCA MSize LJ_FASTCALL lj_tab_len(GCtab *t); LJ_FUNCA MSize LJ_FASTCALL lj_tab_len(GCtab *t);
#if LJ_HASJIT
LJ_FUNC MSize LJ_FASTCALL lj_tab_len_hint(GCtab *t, size_t hint);
#endif
#endif #endif