escape from collisioned strings

- detect when a lot of collisions generated
  - if two full collisions found (ie hash value and len is equal)
  - if collision chain is longer than 18
    ("average maximum" chain with fillfactor 1.0 is near 7)
- calculate "full" hash for strings in long collision chain
- use "bloom" filter to bookkeeping existence of strings with "full" hash
- refill "bloom" on string sweeping.
This commit is contained in:
Sokolov Yura aka funny_falcon 2016-06-28 11:57:47 +03:00
parent 33af3ea00e
commit ff7e514cfe
8 changed files with 181 additions and 10 deletions

View File

@ -146,6 +146,18 @@ XCFLAGS=
#XCFLAGS+= -DLUA_USE_ASSERT
#
##############################################################################
############################ STRING INTERNING #############################
##############################################################################
# Define LuaJIT string interning behaviour
#
# commented or LUAJIT_SMART_STRINGS=0 - no attepmt to recover from collisions,
# only "classic LuaJIT" hashing used - max 16bytes from string is hashed.
# LUAJIT_SMART_STRINGS=1 - use full string hashing for collisioned strings.
# if collision chain is longer than 10 and string is longer than 12bytes,
# then "fast and dumb" whole string hash function used.
XCFLAGS+= -DLUAJIT_SMART_STRINGS=1
#
##############################################################################
# You probably don't need to change anything below this line!
##############################################################################

View File

@ -727,7 +727,7 @@ LJLIB_CF(ffi_abi) LJLIB_REC(.)
{
GCstr *s = lj_lib_checkstr(L, 1);
int b = 0;
switch (s->hash) {
switch (lj_str_fast_hash(s)) {
#if LJ_64
case H_(849858eb,ad35fd06): b = 1; break; /* 64bit */
#else

View File

@ -1059,7 +1059,7 @@ static void cp_decl_gccattribute(CPState *cp, CPDecl *decl)
if (cp->tok == CTOK_IDENT) {
GCstr *attrstr = cp->str;
cp_next(cp);
switch (attrstr->hash) {
switch (lj_str_fast_hash(attrstr)) {
case H_(64a9208e,8ce14319): case H_(8e6331b2,95a282af): /* aligned */
cp_decl_align(cp, decl);
break;
@ -1718,16 +1718,16 @@ static void cp_pragma(CPState *cp, BCLine pragmaline)
{
cp_next(cp);
if (cp->tok == CTOK_IDENT &&
cp->str->hash == H_(e79b999f,42ca3e85)) { /* pack */
!strcmp(strdata(cp->str), "pack")) { /* pack */
cp_next(cp);
cp_check(cp, '(');
if (cp->tok == CTOK_IDENT) {
if (cp->str->hash == H_(738e923c,a1b65954)) { /* push */
if (!strcmp(strdata(cp->str), "push")) { /* push */
if (cp->curpack < CPARSE_MAX_PACKSTACK) {
cp->packstack[cp->curpack+1] = cp->packstack[cp->curpack];
cp->curpack++;
}
} else if (cp->str->hash == H_(6c71cf27,6c71cf27)) { /* pop */
} else if (!strcmp(strdata(cp->str), "pop")) { /* pop */
if (cp->curpack > 0) cp->curpack--;
} else {
cp_errmsg(cp, cp->tok, LJ_ERR_XSYMBOL);

View File

@ -121,6 +121,7 @@ typedef unsigned int uintptr_t;
/* A really naive Bloom filter. But sufficient for our needs. */
typedef uintptr_t BloomFilter;
#define BLOOM_MASK (8*sizeof(BloomFilter) - 1)
#define BLOOM_LOG (sizeof(BloomFilter)==4?5:6)
#define bloombit(x) ((uintptr_t)1 << ((x) & BLOOM_MASK))
#define bloomset(b, x) ((b) |= bloombit((x)))
#define bloomtest(b, x) ((b) & bloombit((x)))

View File

@ -402,6 +402,33 @@ static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim)
return p;
}
/* Partial sweep of a GC list. */
static GCRef *gc_sweep_str_chain(global_State *g, GCRef *p)
{
/* Mask with other white and LJ_GC_FIXED. Or LJ_GC_SFIXED on shutdown. */
int ow = otherwhite(g);
GCobj *o;
while ((o = gcref(*p)) != NULL) {
if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* Black or current white? */
lua_assert(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED));
makewhite(g, o); /* Value is alive, change to the current white. */
#if LUAJIT_SMART_STRINGS
if (strsmart(&o->str)) {
MSize h = lj_str_fast_hash(&o->str);
bloomset(g->strbloom.new[0], strbloombits0(h));
bloomset(g->strbloom.new[1], strbloombits1(h));
}
#endif
p = &o->gch.nextgc;
} else { /* Otherwise value is dead, free it. */
lua_assert(isdead(g, o) || ow == LJ_GC_SFIXED);
setgcrefr(*p, o->gch.nextgc);
lj_str_free(g, &o->str);
}
}
return p;
}
/* Check whether we can clear a key or a value slot from a table. */
static int gc_mayclear(cTValue *o, int val)
{
@ -555,7 +582,13 @@ void lj_gc_freeall(global_State *g)
gc_fullsweep(g, &g->gc.root);
strmask = g->strmask;
for (i = 0; i <= strmask; i++) /* Free all string hash chains. */
gc_fullsweep(g, &g->strhash[i]);
gc_sweep_str_chain(g, &g->strhash[i]);
#if LUAJIT_SMART_STRINGS
g->strbloom.cur[0] = g->strbloom.new[0];
g->strbloom.cur[1] = g->strbloom.new[1];
g->strbloom.new[0] = 0;
g->strbloom.new[1] = 0;
#endif
}
/* -- Collector ----------------------------------------------------------- */
@ -618,9 +651,16 @@ static size_t gc_onestep(lua_State *L)
return 0;
case GCSsweepstring: {
GCSize old = g->gc.total;
gc_fullsweep(g, &g->strhash[g->gc.sweepstr++]); /* Sweep one chain. */
if (g->gc.sweepstr > g->strmask)
gc_sweep_str_chain(g, &g->strhash[g->gc.sweepstr++]); /* Sweep one chain. */
if (g->gc.sweepstr > g->strmask) {
g->gc.state = GCSsweep; /* All string hash chains sweeped. */
#if LUAJIT_SMART_STRINGS
g->strbloom.cur[0] = g->strbloom.new[0];
g->strbloom.cur[1] = g->strbloom.new[1];
g->strbloom.new[0] = 0;
g->strbloom.new[1] = 0;
#endif
}
lua_assert(old >= g->gc.total);
g->gc.estimate -= old - g->gc.total;
return GCSWEEPCOST;

View File

@ -595,6 +595,12 @@ typedef struct global_State {
GCRef *strhash; /* String hash table (hash chain anchors). */
MSize strmask; /* String hash mask (size of hash table - 1). */
MSize strnum; /* Number of strings in hash table. */
#if LUAJIT_SMART_STRINGS
struct {
BloomFilter cur[2];
BloomFilter new[2];
} strbloom;
#endif
lua_Alloc allocf; /* Memory allocator. */
void *allocd; /* Memory allocator data. */
GCState gc; /* Garbage collector. */

View File

@ -126,6 +126,9 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
GCobj *o;
MSize len = (MSize)lenx;
MSize a, b, h = len;
#if LUAJIT_SMART_STRINGS
int collisions = 0;
#endif
if (lenx >= LJ_MAX_STR)
lj_err_msg(L, LJ_ERR_STROV);
g = G(L);
@ -147,29 +150,103 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
a ^= h; a -= lj_rol(h, 11);
b ^= a; b -= lj_rol(a, 25);
h ^= b; h -= lj_rol(b, 16);
#if LUAJIT_SMART_STRINGS
h &= ~strsmartbit;
#endif
/* Check if the string has already been interned. */
o = gcref(g->strhash[h & g->strmask]);
if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) {
#if LUAJIT_SMART_STRINGS
#define inc_collision_hard() (collisions+=8, 1)
#define inc_collision_soft() (collisions++)
#define max_collisions 17
#else
#define inc_collision_hard() (1)
#define inc_collision_soft()
#endif
while (o != NULL) {
GCstr *sx = gco2str(o);
if (sx->hash == h && sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) {
if (sx->hash == h && sx->len == len && inc_collision_hard() &&
str_fastcmp(str, strdata(sx), len) == 0) {
/* Resurrect if dead. Can only happen with fixstring() (keywords). */
if (isdead(g, o)) flipwhite(o);
return sx; /* Return existing string. */
}
o = gcnext(o);
inc_collision_soft();
}
} else { /* Slow path: end of string is too close to a page boundary. */
while (o != NULL) {
GCstr *sx = gco2str(o);
if (sx->hash == h && sx->len == len && memcmp(str, strdata(sx), len) == 0) {
if (sx->hash == h && sx->len == len && inc_collision_hard() &&
memcmp(str, strdata(sx), len) == 0) {
/* Resurrect if dead. Can only happen with fixstring() (keywords). */
if (isdead(g, o)) flipwhite(o);
return sx; /* Return existing string. */
}
o = gcnext(o);
inc_collision_soft();
}
}
#if LUAJIT_SMART_STRINGS
if (len > 12)
{
int need_fullh = 0, search_fullh = 0;
search_fullh = bloomtest(g->strbloom.cur[0], strbloombits0(h)) &&
bloomtest(g->strbloom.cur[1], strbloombits1(h));
need_fullh = search_fullh || collisions > max_collisions;
if (LJ_UNLIKELY(need_fullh)) {
MSize fh;
const char *ss = str;
MSize i = (len-1)/8;
fh = h ^ len;
a = lj_getu32(str + len - 4);
b = lj_getu32(str + len - 8);
for (; i; i--, ss+=8) {
fh = lj_rol(fh ^ a, 17) + (b ^ 0xdeadbeef);
a = lj_rol(a, 13); a -= lj_getu32(ss);
b = lj_rol(a, 11); b -= lj_getu32(ss+4);
}
fh = lj_rol(fh ^ a, 17) + (b ^ 0xdeadbeef);
a ^= fh; a -= lj_rol(fh, 11);
b ^= a; b -= lj_rol(a, 25);
fh ^= b; fh -= lj_rol(b, 16);
fh |= strsmartbit;
if (search_fullh) {
/* Recheck if the string has already been interned with "harder" hash. */
o = gcref(g->strhash[fh & g->strmask]);
if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) {
while (o != NULL) {
GCstr *sx = gco2str(o);
if (sx->hash == fh && sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) {
/* Resurrect if dead. Can only happen with fixstring() (keywords). */
if (isdead(g, o)) flipwhite(o);
return sx; /* Return existing string. */
}
o = gcnext(o);
}
} else { /* Slow path: end of string is too close to a page boundary. */
while (o != NULL) {
GCstr *sx = gco2str(o);
if (sx->hash == fh && sx->len == len && memcmp(str, strdata(sx), len) == 0) {
/* Resurrect if dead. Can only happen with fixstring() (keywords). */
if (isdead(g, o)) flipwhite(o);
return sx; /* Return existing string. */
}
o = gcnext(o);
}
}
}
if (collisions > 10) {
bloomset(g->strbloom.cur[0], strbloombits0(h));
bloomset(g->strbloom.new[0], strbloombits0(h));
bloomset(g->strbloom.cur[1], strbloombits1(h));
bloomset(g->strbloom.new[1], strbloombits1(h));
h = fh;
}
}
}
#endif
/* Nope, create a new string. */
s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr);
newwhite(g, s);

View File

@ -24,4 +24,39 @@ LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
#define lj_str_newz(L, s) (lj_str_new(L, s, strlen(s)))
#define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1))
#if LUAJIT_SMART_STRINGS
#define strsmartbit (1<<(sizeof(MSize)*8-1))
#define strsmart(s) ((s)->hash & strsmartbit)
#define strbloombits0(h) ((h)>>(sizeof(h)*8-1-BLOOM_LOG*2))
#define strbloombits1(h) ((h)>>(sizeof(h)*8-1-BLOOM_LOG))
static LJ_AINLINE MSize lj_str_fast_hash(GCstr* s)
{
const char *str = strdata(s);
MSize len = s->len;
MSize a, b, h = len;
if (!strsmart(s)) {
return s->hash;
}
if (len >= 4) { /* Caveat: unaligned access! */
a = lj_getu32(str);
h ^= lj_getu32(str+len-4);
b = lj_getu32(str+(len>>1)-2);
h ^= b; h -= lj_rol(b, 14);
b += lj_getu32(str+(len>>2)-1);
} else if (len > 0) {
a = *(const uint8_t *)str;
h ^= *(const uint8_t *)(str+len-1);
b = *(const uint8_t *)(str+(len>>1));
h ^= b; h -= lj_rol(b, 14);
} else {
return s->hash;;
}
a ^= h; a -= lj_rol(h, 11);
b ^= a; b -= lj_rol(a, 25);
h ^= b; h -= lj_rol(b, 16);
return h & ~strsmartbit;
}
#else
#define lj_str_fast_hash(s) ((s)->hash)
#endif
#endif