From ff7e514cfed5d91db3f0a42eaa64c63d4f090c1f Mon Sep 17 00:00:00 2001 From: Sokolov Yura aka funny_falcon Date: Tue, 28 Jun 2016 11:57:47 +0300 Subject: [PATCH] escape from collisioned strings - detect when a lot of collisions generated - if two full collisions found (ie hash value and len is equal) - if collision chain is longer than 18 ("average maximum" chain with fillfactor 1.0 is near 7) - calculate "full" hash for strings in long collision chain - use "bloom" filter to bookkeeping existence of strings with "full" hash - refill "bloom" on string sweeping. --- src/Makefile | 12 ++++++++ src/lib_ffi.c | 2 +- src/lj_cparse.c | 8 ++--- src/lj_def.h | 1 + src/lj_gc.c | 46 ++++++++++++++++++++++++++-- src/lj_obj.h | 6 ++++ src/lj_str.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++-- src/lj_str.h | 35 +++++++++++++++++++++ 8 files changed, 181 insertions(+), 10 deletions(-) diff --git a/src/Makefile b/src/Makefile index fb5fdcb7..fd786bf4 100644 --- a/src/Makefile +++ b/src/Makefile @@ -146,6 +146,18 @@ XCFLAGS= #XCFLAGS+= -DLUA_USE_ASSERT # ############################################################################## +############################ STRING INTERNING ############################# +############################################################################## +# Define LuaJIT string interning behaviour +# +# commented or LUAJIT_SMART_STRINGS=0 - no attepmt to recover from collisions, +# only "classic LuaJIT" hashing used - max 16bytes from string is hashed. +# LUAJIT_SMART_STRINGS=1 - use full string hashing for collisioned strings. +# if collision chain is longer than 10 and string is longer than 12bytes, +# then "fast and dumb" whole string hash function used. +XCFLAGS+= -DLUAJIT_SMART_STRINGS=1 +# +############################################################################## # You probably don't need to change anything below this line! ############################################################################## diff --git a/src/lib_ffi.c b/src/lib_ffi.c index 1feee215..a27ac9df 100644 --- a/src/lib_ffi.c +++ b/src/lib_ffi.c @@ -727,7 +727,7 @@ LJLIB_CF(ffi_abi) LJLIB_REC(.) { GCstr *s = lj_lib_checkstr(L, 1); int b = 0; - switch (s->hash) { + switch (lj_str_fast_hash(s)) { #if LJ_64 case H_(849858eb,ad35fd06): b = 1; break; /* 64bit */ #else diff --git a/src/lj_cparse.c b/src/lj_cparse.c index 16d2cb65..f09846de 100644 --- a/src/lj_cparse.c +++ b/src/lj_cparse.c @@ -1059,7 +1059,7 @@ static void cp_decl_gccattribute(CPState *cp, CPDecl *decl) if (cp->tok == CTOK_IDENT) { GCstr *attrstr = cp->str; cp_next(cp); - switch (attrstr->hash) { + switch (lj_str_fast_hash(attrstr)) { case H_(64a9208e,8ce14319): case H_(8e6331b2,95a282af): /* aligned */ cp_decl_align(cp, decl); break; @@ -1718,16 +1718,16 @@ static void cp_pragma(CPState *cp, BCLine pragmaline) { cp_next(cp); if (cp->tok == CTOK_IDENT && - cp->str->hash == H_(e79b999f,42ca3e85)) { /* pack */ + !strcmp(strdata(cp->str), "pack")) { /* pack */ cp_next(cp); cp_check(cp, '('); if (cp->tok == CTOK_IDENT) { - if (cp->str->hash == H_(738e923c,a1b65954)) { /* push */ + if (!strcmp(strdata(cp->str), "push")) { /* push */ if (cp->curpack < CPARSE_MAX_PACKSTACK) { cp->packstack[cp->curpack+1] = cp->packstack[cp->curpack]; cp->curpack++; } - } else if (cp->str->hash == H_(6c71cf27,6c71cf27)) { /* pop */ + } else if (!strcmp(strdata(cp->str), "pop")) { /* pop */ if (cp->curpack > 0) cp->curpack--; } else { cp_errmsg(cp, cp->tok, LJ_ERR_XSYMBOL); diff --git a/src/lj_def.h b/src/lj_def.h index 9413399d..e6490548 100644 --- a/src/lj_def.h +++ b/src/lj_def.h @@ -121,6 +121,7 @@ typedef unsigned int uintptr_t; /* A really naive Bloom filter. But sufficient for our needs. */ typedef uintptr_t BloomFilter; #define BLOOM_MASK (8*sizeof(BloomFilter) - 1) +#define BLOOM_LOG (sizeof(BloomFilter)==4?5:6) #define bloombit(x) ((uintptr_t)1 << ((x) & BLOOM_MASK)) #define bloomset(b, x) ((b) |= bloombit((x))) #define bloomtest(b, x) ((b) & bloombit((x))) diff --git a/src/lj_gc.c b/src/lj_gc.c index 7c707462..bd9b8d3f 100644 --- a/src/lj_gc.c +++ b/src/lj_gc.c @@ -402,6 +402,33 @@ static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim) return p; } +/* Partial sweep of a GC list. */ +static GCRef *gc_sweep_str_chain(global_State *g, GCRef *p) +{ + /* Mask with other white and LJ_GC_FIXED. Or LJ_GC_SFIXED on shutdown. */ + int ow = otherwhite(g); + GCobj *o; + while ((o = gcref(*p)) != NULL) { + if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* Black or current white? */ + lua_assert(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED)); + makewhite(g, o); /* Value is alive, change to the current white. */ +#if LUAJIT_SMART_STRINGS + if (strsmart(&o->str)) { + MSize h = lj_str_fast_hash(&o->str); + bloomset(g->strbloom.new[0], strbloombits0(h)); + bloomset(g->strbloom.new[1], strbloombits1(h)); + } +#endif + p = &o->gch.nextgc; + } else { /* Otherwise value is dead, free it. */ + lua_assert(isdead(g, o) || ow == LJ_GC_SFIXED); + setgcrefr(*p, o->gch.nextgc); + lj_str_free(g, &o->str); + } + } + return p; +} + /* Check whether we can clear a key or a value slot from a table. */ static int gc_mayclear(cTValue *o, int val) { @@ -555,7 +582,13 @@ void lj_gc_freeall(global_State *g) gc_fullsweep(g, &g->gc.root); strmask = g->strmask; for (i = 0; i <= strmask; i++) /* Free all string hash chains. */ - gc_fullsweep(g, &g->strhash[i]); + gc_sweep_str_chain(g, &g->strhash[i]); +#if LUAJIT_SMART_STRINGS + g->strbloom.cur[0] = g->strbloom.new[0]; + g->strbloom.cur[1] = g->strbloom.new[1]; + g->strbloom.new[0] = 0; + g->strbloom.new[1] = 0; +#endif } /* -- Collector ----------------------------------------------------------- */ @@ -618,9 +651,16 @@ static size_t gc_onestep(lua_State *L) return 0; case GCSsweepstring: { GCSize old = g->gc.total; - gc_fullsweep(g, &g->strhash[g->gc.sweepstr++]); /* Sweep one chain. */ - if (g->gc.sweepstr > g->strmask) + gc_sweep_str_chain(g, &g->strhash[g->gc.sweepstr++]); /* Sweep one chain. */ + if (g->gc.sweepstr > g->strmask) { g->gc.state = GCSsweep; /* All string hash chains sweeped. */ +#if LUAJIT_SMART_STRINGS + g->strbloom.cur[0] = g->strbloom.new[0]; + g->strbloom.cur[1] = g->strbloom.new[1]; + g->strbloom.new[0] = 0; + g->strbloom.new[1] = 0; +#endif + } lua_assert(old >= g->gc.total); g->gc.estimate -= old - g->gc.total; return GCSWEEPCOST; diff --git a/src/lj_obj.h b/src/lj_obj.h index 25da9455..a08ede77 100644 --- a/src/lj_obj.h +++ b/src/lj_obj.h @@ -595,6 +595,12 @@ typedef struct global_State { GCRef *strhash; /* String hash table (hash chain anchors). */ MSize strmask; /* String hash mask (size of hash table - 1). */ MSize strnum; /* Number of strings in hash table. */ +#if LUAJIT_SMART_STRINGS + struct { + BloomFilter cur[2]; + BloomFilter new[2]; + } strbloom; +#endif lua_Alloc allocf; /* Memory allocator. */ void *allocd; /* Memory allocator data. */ GCState gc; /* Garbage collector. */ diff --git a/src/lj_str.c b/src/lj_str.c index 014665b6..b984da90 100644 --- a/src/lj_str.c +++ b/src/lj_str.c @@ -126,6 +126,9 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) GCobj *o; MSize len = (MSize)lenx; MSize a, b, h = len; +#if LUAJIT_SMART_STRINGS + int collisions = 0; +#endif if (lenx >= LJ_MAX_STR) lj_err_msg(L, LJ_ERR_STROV); g = G(L); @@ -147,29 +150,103 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) a ^= h; a -= lj_rol(h, 11); b ^= a; b -= lj_rol(a, 25); h ^= b; h -= lj_rol(b, 16); +#if LUAJIT_SMART_STRINGS + h &= ~strsmartbit; +#endif /* Check if the string has already been interned. */ o = gcref(g->strhash[h & g->strmask]); if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) { +#if LUAJIT_SMART_STRINGS +#define inc_collision_hard() (collisions+=8, 1) +#define inc_collision_soft() (collisions++) +#define max_collisions 17 +#else +#define inc_collision_hard() (1) +#define inc_collision_soft() +#endif while (o != NULL) { GCstr *sx = gco2str(o); - if (sx->hash == h && sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) { + if (sx->hash == h && sx->len == len && inc_collision_hard() && + str_fastcmp(str, strdata(sx), len) == 0) { /* Resurrect if dead. Can only happen with fixstring() (keywords). */ if (isdead(g, o)) flipwhite(o); return sx; /* Return existing string. */ } o = gcnext(o); + inc_collision_soft(); } } else { /* Slow path: end of string is too close to a page boundary. */ while (o != NULL) { GCstr *sx = gco2str(o); - if (sx->hash == h && sx->len == len && memcmp(str, strdata(sx), len) == 0) { + if (sx->hash == h && sx->len == len && inc_collision_hard() && + memcmp(str, strdata(sx), len) == 0) { /* Resurrect if dead. Can only happen with fixstring() (keywords). */ if (isdead(g, o)) flipwhite(o); return sx; /* Return existing string. */ } o = gcnext(o); + inc_collision_soft(); } } +#if LUAJIT_SMART_STRINGS + if (len > 12) + { + int need_fullh = 0, search_fullh = 0; + search_fullh = bloomtest(g->strbloom.cur[0], strbloombits0(h)) && + bloomtest(g->strbloom.cur[1], strbloombits1(h)); + need_fullh = search_fullh || collisions > max_collisions; + if (LJ_UNLIKELY(need_fullh)) { + MSize fh; + const char *ss = str; + MSize i = (len-1)/8; + fh = h ^ len; + a = lj_getu32(str + len - 4); + b = lj_getu32(str + len - 8); + for (; i; i--, ss+=8) { + fh = lj_rol(fh ^ a, 17) + (b ^ 0xdeadbeef); + a = lj_rol(a, 13); a -= lj_getu32(ss); + b = lj_rol(a, 11); b -= lj_getu32(ss+4); + } + fh = lj_rol(fh ^ a, 17) + (b ^ 0xdeadbeef); + a ^= fh; a -= lj_rol(fh, 11); + b ^= a; b -= lj_rol(a, 25); + fh ^= b; fh -= lj_rol(b, 16); + fh |= strsmartbit; + if (search_fullh) { + /* Recheck if the string has already been interned with "harder" hash. */ + o = gcref(g->strhash[fh & g->strmask]); + if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) { + while (o != NULL) { + GCstr *sx = gco2str(o); + if (sx->hash == fh && sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) { + /* Resurrect if dead. Can only happen with fixstring() (keywords). */ + if (isdead(g, o)) flipwhite(o); + return sx; /* Return existing string. */ + } + o = gcnext(o); + } + } else { /* Slow path: end of string is too close to a page boundary. */ + while (o != NULL) { + GCstr *sx = gco2str(o); + if (sx->hash == fh && sx->len == len && memcmp(str, strdata(sx), len) == 0) { + /* Resurrect if dead. Can only happen with fixstring() (keywords). */ + if (isdead(g, o)) flipwhite(o); + return sx; /* Return existing string. */ + } + o = gcnext(o); + } + } + } + if (collisions > 10) { + bloomset(g->strbloom.cur[0], strbloombits0(h)); + bloomset(g->strbloom.new[0], strbloombits0(h)); + bloomset(g->strbloom.cur[1], strbloombits1(h)); + bloomset(g->strbloom.new[1], strbloombits1(h)); + h = fh; + } + } + } +#endif /* Nope, create a new string. */ s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr); newwhite(g, s); diff --git a/src/lj_str.h b/src/lj_str.h index bb306c14..763ccd23 100644 --- a/src/lj_str.h +++ b/src/lj_str.h @@ -24,4 +24,39 @@ LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); #define lj_str_newz(L, s) (lj_str_new(L, s, strlen(s))) #define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1)) +#if LUAJIT_SMART_STRINGS +#define strsmartbit (1<<(sizeof(MSize)*8-1)) +#define strsmart(s) ((s)->hash & strsmartbit) +#define strbloombits0(h) ((h)>>(sizeof(h)*8-1-BLOOM_LOG*2)) +#define strbloombits1(h) ((h)>>(sizeof(h)*8-1-BLOOM_LOG)) +static LJ_AINLINE MSize lj_str_fast_hash(GCstr* s) +{ + const char *str = strdata(s); + MSize len = s->len; + MSize a, b, h = len; + if (!strsmart(s)) { + return s->hash; + } + if (len >= 4) { /* Caveat: unaligned access! */ + a = lj_getu32(str); + h ^= lj_getu32(str+len-4); + b = lj_getu32(str+(len>>1)-2); + h ^= b; h -= lj_rol(b, 14); + b += lj_getu32(str+(len>>2)-1); + } else if (len > 0) { + a = *(const uint8_t *)str; + h ^= *(const uint8_t *)(str+len-1); + b = *(const uint8_t *)(str+(len>>1)); + h ^= b; h -= lj_rol(b, 14); + } else { + return s->hash;; + } + a ^= h; a -= lj_rol(h, 11); + b ^= a; b -= lj_rol(a, 25); + h ^= b; h -= lj_rol(b, 16); + return h & ~strsmartbit; +} +#else +#define lj_str_fast_hash(s) ((s)->hash) +#endif #endif