mirror of
https://github.com/LuaJIT/LuaJIT.git
synced 2025-02-08 07:34:07 +00:00
strings: strong string hash for LUAJIT_SMART_STRINGS==2
This commit is contained in:
parent
ff7e514cfe
commit
f24deca48a
@ -155,6 +155,9 @@ XCFLAGS=
|
|||||||
# LUAJIT_SMART_STRINGS=1 - use full string hashing for collisioned strings.
|
# LUAJIT_SMART_STRINGS=1 - use full string hashing for collisioned strings.
|
||||||
# if collision chain is longer than 10 and string is longer than 12bytes,
|
# if collision chain is longer than 10 and string is longer than 12bytes,
|
||||||
# then "fast and dumb" whole string hash function used.
|
# then "fast and dumb" whole string hash function used.
|
||||||
|
# LUAJIT_SMART_STRINGS=2 - use slow strong hashing for collisioned strings.
|
||||||
|
# if collision chain is longer than 10, then all new strings are hashed
|
||||||
|
# using 32bit cousine to SipHash
|
||||||
XCFLAGS+= -DLUAJIT_SMART_STRINGS=1
|
XCFLAGS+= -DLUAJIT_SMART_STRINGS=1
|
||||||
#
|
#
|
||||||
##############################################################################
|
##############################################################################
|
||||||
|
@ -600,6 +600,9 @@ typedef struct global_State {
|
|||||||
BloomFilter cur[2];
|
BloomFilter cur[2];
|
||||||
BloomFilter new[2];
|
BloomFilter new[2];
|
||||||
} strbloom;
|
} strbloom;
|
||||||
|
# if LUAJIT_SMART_STRINGS == 2
|
||||||
|
uint32_t str_rand_key[2];
|
||||||
|
# endif
|
||||||
#endif
|
#endif
|
||||||
lua_Alloc allocf; /* Memory allocator. */
|
lua_Alloc allocf; /* Memory allocator. */
|
||||||
void *allocd; /* Memory allocator data. */
|
void *allocd; /* Memory allocator data. */
|
||||||
|
103
src/lj_str.c
103
src/lj_str.c
@ -11,6 +11,18 @@
|
|||||||
#include "lj_err.h"
|
#include "lj_err.h"
|
||||||
#include "lj_str.h"
|
#include "lj_str.h"
|
||||||
#include "lj_char.h"
|
#include "lj_char.h"
|
||||||
|
#if LUAJIT_SMART_STRINGS == 2
|
||||||
|
#if LJ_TARGET_POSIX
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <sys/time.h>
|
||||||
|
#elif LJ_TARGET_WINDOWS
|
||||||
|
#include <windows.h>
|
||||||
|
#pragma comment(lib, "advapi32.dll")
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
/* -- String helpers ------------------------------------------------------ */
|
/* -- String helpers ------------------------------------------------------ */
|
||||||
|
|
||||||
@ -118,6 +130,91 @@ void lj_str_resize(lua_State *L, MSize newmask)
|
|||||||
g->strhash = newhash;
|
g->strhash = newhash;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if LUAJIT_SMART_STRINGS==2
|
||||||
|
#if LJ_TARGET_WINDOWS
|
||||||
|
static void lj_init_strkey(struct global_State* g) {
|
||||||
|
FILETIME ft;
|
||||||
|
HCRYPTPROV hProvider = 0;
|
||||||
|
const DWORD dwLength = 8;
|
||||||
|
BYTE pbBuffer[dwLength] = {};
|
||||||
|
|
||||||
|
if (CryptAcquireContextW(&hProvider, 0, 0, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT | CRYPT_SILENT)) {
|
||||||
|
CryptGenRandom(hProvider, sizeof(g->str_rand_key), &g->str_rand_key);
|
||||||
|
CryptReleaseContext(hProvider, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
GetSystemTimeAsFileTime(&ft);
|
||||||
|
g->str_rand_key[0] ^= (uint32_t)ft.dwLowDateTime;
|
||||||
|
g->str_rand_key[1] ^= (uint32_t)ft.dwHighDateTime;
|
||||||
|
if (g->str_rand_key[0] == 0 && g->str_rand_key[1] == 0)
|
||||||
|
g->str_rand_key[0] = 1;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static void lj_init_strkey(struct global_State* g) {
|
||||||
|
int fd = open("/dev/urandom", O_RDONLY);
|
||||||
|
if (fd != -1) {
|
||||||
|
(void)read(fd, &g->str_rand_key, sizeof(g->str_rand_key));
|
||||||
|
(void)close(fd);
|
||||||
|
}
|
||||||
|
struct timeval tv;
|
||||||
|
gettimeofday(&tv, NULL);
|
||||||
|
g->str_rand_key[0] ^= (uint32_t)tv.tv_sec;
|
||||||
|
g->str_rand_key[1] ^= (uint32_t)tv.tv_usec;
|
||||||
|
if (g->str_rand_key[0] == 0 && g->str_rand_key[1] == 0)
|
||||||
|
g->str_rand_key[0] = 1;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// 32bit cousin to SipHash
|
||||||
|
#define HALF_ROUND(a,b,c,d,s,t) \
|
||||||
|
a += b; c += d; \
|
||||||
|
b = lj_rol(b, s) ^ a; \
|
||||||
|
d = lj_rol(d, t) ^ c; \
|
||||||
|
a = lj_rol(a, 16);
|
||||||
|
|
||||||
|
#define ROUND(v0,v1,v2,v3) \
|
||||||
|
HALF_ROUND(v0,v1,v2,v3,5,8); \
|
||||||
|
HALF_ROUND(v2,v1,v0,v3,7,13);
|
||||||
|
|
||||||
|
static MSize lj_saphash(struct global_State* g, const char *str, MSize len)
|
||||||
|
{
|
||||||
|
uint32_t b = len << 24;
|
||||||
|
uint32_t v0, v1, v2, v3;
|
||||||
|
uint8_t* m;
|
||||||
|
|
||||||
|
if ((g->str_rand_key[0] | g->str_rand_key[1])== 0)
|
||||||
|
lj_init_strkey(g);
|
||||||
|
|
||||||
|
v0 = g->str_rand_key[0] ^ 0x736f6d65UL;
|
||||||
|
v1 = g->str_rand_key[1] ^ 0x326f7261UL;
|
||||||
|
v2 = g->str_rand_key[0] ^ 0x6c796765UL;
|
||||||
|
v3 = g->str_rand_key[1] ^ 0x74653262UL;
|
||||||
|
|
||||||
|
while (len >= 4) {
|
||||||
|
uint32_t mi = lj_getu32(str);
|
||||||
|
str += 4; len -= 4;
|
||||||
|
v3 ^= mi;
|
||||||
|
ROUND(v0,v1,v2,v3);
|
||||||
|
v0 ^= mi;
|
||||||
|
}
|
||||||
|
|
||||||
|
m = (uint8_t *)str;
|
||||||
|
switch (len) {
|
||||||
|
case 3: b |= m[2]<<16;
|
||||||
|
case 2: b |= m[1]<<8;
|
||||||
|
case 1: b |= m[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
v3 ^= b;
|
||||||
|
ROUND(v0,v1,v2,v3);
|
||||||
|
v0 ^= b; v2 ^= 0xff;
|
||||||
|
ROUND(v0,v1,v2,v3);
|
||||||
|
ROUND(v0,v1,v2,v3);
|
||||||
|
ROUND(v0,v1,v2,v3);
|
||||||
|
return (v0 ^ v1) ^ (v2 ^ v3);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Intern a string and return string object. */
|
/* Intern a string and return string object. */
|
||||||
GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
|
GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
|
||||||
{
|
{
|
||||||
@ -189,7 +286,9 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#if LUAJIT_SMART_STRINGS
|
#if LUAJIT_SMART_STRINGS
|
||||||
|
#if LUAJIT_SMART_STRINGS==1
|
||||||
if (len > 12)
|
if (len > 12)
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
int need_fullh = 0, search_fullh = 0;
|
int need_fullh = 0, search_fullh = 0;
|
||||||
search_fullh = bloomtest(g->strbloom.cur[0], strbloombits0(h)) &&
|
search_fullh = bloomtest(g->strbloom.cur[0], strbloombits0(h)) &&
|
||||||
@ -197,6 +296,7 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
|
|||||||
need_fullh = search_fullh || collisions > max_collisions;
|
need_fullh = search_fullh || collisions > max_collisions;
|
||||||
if (LJ_UNLIKELY(need_fullh)) {
|
if (LJ_UNLIKELY(need_fullh)) {
|
||||||
MSize fh;
|
MSize fh;
|
||||||
|
#if LUAJIT_SMART_STRINGS==1
|
||||||
const char *ss = str;
|
const char *ss = str;
|
||||||
MSize i = (len-1)/8;
|
MSize i = (len-1)/8;
|
||||||
fh = h ^ len;
|
fh = h ^ len;
|
||||||
@ -211,6 +311,9 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
|
|||||||
a ^= fh; a -= lj_rol(fh, 11);
|
a ^= fh; a -= lj_rol(fh, 11);
|
||||||
b ^= a; b -= lj_rol(a, 25);
|
b ^= a; b -= lj_rol(a, 25);
|
||||||
fh ^= b; fh -= lj_rol(b, 16);
|
fh ^= b; fh -= lj_rol(b, 16);
|
||||||
|
#elif LUAJIT_SMART_STRINGS==2
|
||||||
|
fh = lj_saphash(g, str, len);
|
||||||
|
#endif
|
||||||
fh |= strsmartbit;
|
fh |= strsmartbit;
|
||||||
if (search_fullh) {
|
if (search_fullh) {
|
||||||
/* Recheck if the string has already been interned with "harder" hash. */
|
/* Recheck if the string has already been interned with "harder" hash. */
|
||||||
|
Loading…
Reference in New Issue
Block a user