strings: strong string hash for LUAJIT_SMART_STRINGS==2

This commit is contained in:
Sokolov Yura aka funny_falcon 2016-06-28 11:59:19 +03:00
parent ff7e514cfe
commit f24deca48a
3 changed files with 109 additions and 0 deletions

View File

@ -155,6 +155,9 @@ XCFLAGS=
# LUAJIT_SMART_STRINGS=1 - use full string hashing for collisioned strings. # LUAJIT_SMART_STRINGS=1 - use full string hashing for collisioned strings.
# if collision chain is longer than 10 and string is longer than 12bytes, # if collision chain is longer than 10 and string is longer than 12bytes,
# then "fast and dumb" whole string hash function used. # then "fast and dumb" whole string hash function used.
# LUAJIT_SMART_STRINGS=2 - use slow strong hashing for collisioned strings.
# if collision chain is longer than 10, then all new strings are hashed
# using 32bit cousine to SipHash
XCFLAGS+= -DLUAJIT_SMART_STRINGS=1 XCFLAGS+= -DLUAJIT_SMART_STRINGS=1
# #
############################################################################## ##############################################################################

View File

@ -600,6 +600,9 @@ typedef struct global_State {
BloomFilter cur[2]; BloomFilter cur[2];
BloomFilter new[2]; BloomFilter new[2];
} strbloom; } strbloom;
# if LUAJIT_SMART_STRINGS == 2
uint32_t str_rand_key[2];
# endif
#endif #endif
lua_Alloc allocf; /* Memory allocator. */ lua_Alloc allocf; /* Memory allocator. */
void *allocd; /* Memory allocator data. */ void *allocd; /* Memory allocator data. */

View File

@ -11,6 +11,18 @@
#include "lj_err.h" #include "lj_err.h"
#include "lj_str.h" #include "lj_str.h"
#include "lj_char.h" #include "lj_char.h"
#if LUAJIT_SMART_STRINGS == 2
#if LJ_TARGET_POSIX
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/time.h>
#elif LJ_TARGET_WINDOWS
#include <windows.h>
#pragma comment(lib, "advapi32.dll")
#endif
#endif
/* -- String helpers ------------------------------------------------------ */ /* -- String helpers ------------------------------------------------------ */
@ -118,6 +130,91 @@ void lj_str_resize(lua_State *L, MSize newmask)
g->strhash = newhash; g->strhash = newhash;
} }
#if LUAJIT_SMART_STRINGS==2
#if LJ_TARGET_WINDOWS
static void lj_init_strkey(struct global_State* g) {
FILETIME ft;
HCRYPTPROV hProvider = 0;
const DWORD dwLength = 8;
BYTE pbBuffer[dwLength] = {};
if (CryptAcquireContextW(&hProvider, 0, 0, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT | CRYPT_SILENT)) {
CryptGenRandom(hProvider, sizeof(g->str_rand_key), &g->str_rand_key);
CryptReleaseContext(hProvider, 0);
}
GetSystemTimeAsFileTime(&ft);
g->str_rand_key[0] ^= (uint32_t)ft.dwLowDateTime;
g->str_rand_key[1] ^= (uint32_t)ft.dwHighDateTime;
if (g->str_rand_key[0] == 0 && g->str_rand_key[1] == 0)
g->str_rand_key[0] = 1;
}
#else
static void lj_init_strkey(struct global_State* g) {
int fd = open("/dev/urandom", O_RDONLY);
if (fd != -1) {
(void)read(fd, &g->str_rand_key, sizeof(g->str_rand_key));
(void)close(fd);
}
struct timeval tv;
gettimeofday(&tv, NULL);
g->str_rand_key[0] ^= (uint32_t)tv.tv_sec;
g->str_rand_key[1] ^= (uint32_t)tv.tv_usec;
if (g->str_rand_key[0] == 0 && g->str_rand_key[1] == 0)
g->str_rand_key[0] = 1;
}
#endif
// 32bit cousin to SipHash
#define HALF_ROUND(a,b,c,d,s,t) \
a += b; c += d; \
b = lj_rol(b, s) ^ a; \
d = lj_rol(d, t) ^ c; \
a = lj_rol(a, 16);
#define ROUND(v0,v1,v2,v3) \
HALF_ROUND(v0,v1,v2,v3,5,8); \
HALF_ROUND(v2,v1,v0,v3,7,13);
static MSize lj_saphash(struct global_State* g, const char *str, MSize len)
{
uint32_t b = len << 24;
uint32_t v0, v1, v2, v3;
uint8_t* m;
if ((g->str_rand_key[0] | g->str_rand_key[1])== 0)
lj_init_strkey(g);
v0 = g->str_rand_key[0] ^ 0x736f6d65UL;
v1 = g->str_rand_key[1] ^ 0x326f7261UL;
v2 = g->str_rand_key[0] ^ 0x6c796765UL;
v3 = g->str_rand_key[1] ^ 0x74653262UL;
while (len >= 4) {
uint32_t mi = lj_getu32(str);
str += 4; len -= 4;
v3 ^= mi;
ROUND(v0,v1,v2,v3);
v0 ^= mi;
}
m = (uint8_t *)str;
switch (len) {
case 3: b |= m[2]<<16;
case 2: b |= m[1]<<8;
case 1: b |= m[0];
}
v3 ^= b;
ROUND(v0,v1,v2,v3);
v0 ^= b; v2 ^= 0xff;
ROUND(v0,v1,v2,v3);
ROUND(v0,v1,v2,v3);
ROUND(v0,v1,v2,v3);
return (v0 ^ v1) ^ (v2 ^ v3);
}
#endif
/* Intern a string and return string object. */ /* Intern a string and return string object. */
GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
{ {
@ -189,7 +286,9 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
} }
} }
#if LUAJIT_SMART_STRINGS #if LUAJIT_SMART_STRINGS
#if LUAJIT_SMART_STRINGS==1
if (len > 12) if (len > 12)
#endif
{ {
int need_fullh = 0, search_fullh = 0; int need_fullh = 0, search_fullh = 0;
search_fullh = bloomtest(g->strbloom.cur[0], strbloombits0(h)) && search_fullh = bloomtest(g->strbloom.cur[0], strbloombits0(h)) &&
@ -197,6 +296,7 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
need_fullh = search_fullh || collisions > max_collisions; need_fullh = search_fullh || collisions > max_collisions;
if (LJ_UNLIKELY(need_fullh)) { if (LJ_UNLIKELY(need_fullh)) {
MSize fh; MSize fh;
#if LUAJIT_SMART_STRINGS==1
const char *ss = str; const char *ss = str;
MSize i = (len-1)/8; MSize i = (len-1)/8;
fh = h ^ len; fh = h ^ len;
@ -211,6 +311,9 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
a ^= fh; a -= lj_rol(fh, 11); a ^= fh; a -= lj_rol(fh, 11);
b ^= a; b -= lj_rol(a, 25); b ^= a; b -= lj_rol(a, 25);
fh ^= b; fh -= lj_rol(b, 16); fh ^= b; fh -= lj_rol(b, 16);
#elif LUAJIT_SMART_STRINGS==2
fh = lj_saphash(g, str, len);
#endif
fh |= strsmartbit; fh |= strsmartbit;
if (search_fullh) { if (search_fullh) {
/* Recheck if the string has already been interned with "harder" hash. */ /* Recheck if the string has already been interned with "harder" hash. */