This commit is contained in:
Albert 2024-01-18 20:49:50 -05:00
parent e403e4a1dc
commit 6b22c1df16
24 changed files with 1268 additions and 263 deletions

View File

@ -403,6 +403,7 @@ LUA_API lua_State *luaJIT_newstate(lua_Alloc f, void *ud,
luaJIT_allocpages allocp,
luaJIT_freepages freep,
luaJIT_reallochuge realloch,
luaJIT_reallocraw rawalloc,
void *page_ud)
{
lua_State *L;
@ -417,7 +418,7 @@ LUA_API lua_State *luaJIT_newstate(lua_Alloc f, void *ud,
ud = NULL;
}
#endif
L = lj_newstate(f, ud, allocp, freep, realloch, page_ud);
L = lj_newstate(f, ud, allocp, freep, realloch, rawalloc, page_ud);
if (L) {
G(L)->panic = panic;

View File

@ -416,7 +416,7 @@ LJLIB_CF(load)
SBufExt *sbx = bufV(L->base);
s = sbx->r;
len = sbufxlen(sbx);
if (!name) name = &G(L)->strempty; /* Buffers are not NUL-terminated. */
if (!name) name = G(L)->strempty; /* Buffers are not NUL-terminated. */
} else {
GCstr *str = lj_lib_checkstr(L, 1);
s = strdata(str);

View File

@ -860,7 +860,7 @@ LUALIB_API int luaopen_ffi(lua_State *L)
LJ_LIB_REG(L, NULL, ffi_clib);
LJ_LIB_REG(L, NULL, ffi_callback);
/* NOBARRIER: the key is new and lj_tab_newkey() handles the barrier. */
settabV(L, lj_tab_setstr(L, cts->miscmap, &cts->g->strempty), tabV(L->top-1));
settabV(L, lj_tab_setstr(L, cts->miscmap, cts->g->strempty), tabV(L->top-1));
L->top--;
lj_clib_default(L, tabV(L->top-1)); /* Create ffi.C default namespace. */
lua_pushliteral(L, LJ_OS_NAME);

View File

@ -185,7 +185,7 @@ static int io_file_readlen(lua_State *L, FILE *fp, MSize m)
} else {
int c = getc(fp);
ungetc(c, fp);
setstrV(L, L->top++, &G(L)->strempty);
setstrV(L, L->top++, G(L)->strempty);
return (c != EOF);
}
}

View File

@ -221,7 +221,7 @@ LJLIB_CF(os_date)
sz += (sz|1);
}
} else {
setstrV(L, L->top++, &G(L)->strempty);
setstrV(L, L->top++, G(L)->strempty);
}
return 1;
}

View File

@ -800,7 +800,7 @@ LUA_API void lua_concat(lua_State *L, int n)
copyTV(L, L->top-1, L->top+LJ_FR2);
} while (--n > 0);
} else if (n == 0) { /* Push empty string. */
setstrV(L, L->top, &G(L)->strempty);
setstrV(L, L->top, G(L)->strempty);
incr_top(L);
}
/* else n == 1: nothing to do. */

View File

@ -265,13 +265,22 @@ static void *lj_arena_api_reallochuge(void *ud, void *p, size_t osz, size_t nsz)
return newp;
}
#define lj_arena_firstalloc(g, arena, id, atype, type) \
static void *lj_arena_api_rawalloc(void *ud, void *p, size_t osz, size_t nsz)
{
if (!p)
return RESERVE_AND_COMMIT_PAGES(nsz);
if (!nsz)
UNRESERVE_PAGES(p, osz);
return NULL;
}
#define lj_arena_firstalloc(g, arena, id, atype, type, init) \
{ \
atype *a = (atype *)lj_arena_alloc(&g->gc.ctx); \
if (!a) \
return 0; \
arena = &a->hdr; \
do_arena_init(a, g, id, atype, type); \
init(a, g, id, atype, type); \
}
static int lj_blob_firstalloc(global_State *g, GCAblob **h)
@ -290,7 +299,7 @@ static int lj_blob_firstalloc(global_State *g, GCAblob **h)
int lj_arena_init(struct global_State *g, luaJIT_allocpages allocp,
luaJIT_freepages freep, luaJIT_reallochuge realloch,
void *page_ud)
luaJIT_reallocraw rawalloc, void *page_ud)
{
g->gc.bloblist_alloc = 32;
g->gc.bloblist =
@ -306,10 +315,11 @@ int lj_arena_init(struct global_State *g, luaJIT_allocpages allocp,
g->gc.bloblist_wr = 1;
/* All must be provided to override */
if (allocp && freep && realloch) {
if (allocp && freep && realloch && rawalloc) {
g->gc.ctx.allocpages = allocp;
g->gc.ctx.freepages = freep;
g->gc.ctx.reallochuge = realloch;
g->gc.ctx.rawalloc = rawalloc;
g->gc.ctx.pageud = page_ud;
} else {
arena_alloc *arenas = (arena_alloc*)g->allocf(g->allocd, NULL, 0, sizeof(arena_alloc));
@ -319,6 +329,7 @@ int lj_arena_init(struct global_State *g, luaJIT_allocpages allocp,
g->gc.ctx.allocpages = &lj_arena_api_allocpages;
g->gc.ctx.freepages = &lj_arena_api_freepages;
g->gc.ctx.reallochuge = &lj_arena_api_reallochuge;
g->gc.ctx.rawalloc = &lj_arena_api_rawalloc;
g->gc.ctx.pageud = arenas;
arenas->g = g;
@ -334,11 +345,13 @@ int lj_arena_init(struct global_State *g, luaJIT_allocpages allocp,
}
/* Allocate all arenas */
lj_arena_firstalloc(g, g->gc.tab, ~LJ_TTAB, GCAtab, GCtab);
lj_arena_firstalloc(g, g->gc.fintab, ~LJ_TTAB, GCAtab, GCtab);
lj_arena_firstalloc(g, g->gc.uv, ~LJ_TUPVAL, GCAupval, GCupval);
lj_arena_firstalloc(g, g->gc.func, ~LJ_TFUNC, GCAfunc, GCfunc);
lj_arena_firstalloc(g, g->gc.udata, ~LJ_TUDATA, GCAudata, GCudata);
lj_arena_firstalloc(g, g->gc.tab, ~LJ_TTAB, GCAtab, GCtab, do_arena_init);
lj_arena_firstalloc(g, g->gc.fintab, ~LJ_TTAB, GCAtab, GCtab, do_arena_init);
lj_arena_firstalloc(g, g->gc.uv, ~LJ_TUPVAL, GCAupval, GCupval, do_arena_init);
lj_arena_firstalloc(g, g->gc.func, ~LJ_TFUNC, GCAfunc, GCfunc, do_arena_init);
lj_arena_firstalloc(g, g->gc.udata, ~LJ_TUDATA, GCAudata, GCudata, do_arena_init);
lj_arena_firstalloc(g, g->gc.str_small, ~LJ_TSTR, GCAstr, GCstr, do_smallstr_arena_init);
g->gc.str = &lj_arena_str_med(g)->hdr;
((GCAudata *)g->gc.udata)->free4_h = ((GCAudata *)g->gc.udata)->free_h;
@ -359,6 +372,36 @@ static void release_chain_arena(struct global_State *g, GCArenaHdr *a)
}
}
static void release_string_table(struct global_State *g)
{
/* The string table is more tricky. Entries are either valid pointers
* or on the freelist. Start by zeroing all freelist entries
*/
int32_t at = g->str.secondary_slot_free_head;
while (at != -1) {
int32_t n = (int32_t)mrefu(g->str.secondary_list[at]);
setmrefu(g->str.secondary_list[at], 0);
at = n;
}
/* Remaining nonzero entries are valid pointers. */
for (uint32_t i = 0; i < g->str.secondary_list_capacity; i++) {
void *a = mref(g->str.secondary_list[i], void);
if (a)
release_one_arena(g, a);
}
}
static void release_huge_strings(struct global_State *g)
{
GCArenaHdr *a = g->gc.str_huge;
while (a) {
GCAstr *s = (GCAstr *)a;
a = a->gray;
lj_arena_freehuge(&g->gc.ctx, s, s->free_h);
}
}
static void release_all_arenas(struct global_State *g)
{
release_chain_arena(g, g->gc.tab);
@ -366,6 +409,10 @@ static void release_all_arenas(struct global_State *g)
release_chain_arena(g, g->gc.uv);
release_chain_arena(g, g->gc.func);
release_chain_arena(g, g->gc.udata);
release_chain_arena(g, g->gc.str_small);
release_chain_arena(g, g->gc.str);
release_huge_strings(g);
release_string_table(g);
for (uint32_t i = 0; i < g->gc.bloblist_wr; i++) {
GCAblob *a = g->gc.bloblist[i];
if (a->flags & GCA_BLOB_HUGE)

View File

@ -11,6 +11,7 @@
typedef unsigned (*luaJIT_allocpages)(void *ud, void **pages, unsigned n);
typedef void (*luaJIT_freepages)(void *ud, void **pages, unsigned n);
typedef void *(*luaJIT_reallochuge)(void *ud, void *p, size_t osz, size_t nsz);
typedef void *(*luaJIT_reallocraw)(void *ud, void *p, size_t osz, size_t nsz);
#define ARENA_SHIFT 16
#define ARENA_SIZE (1u << ARENA_SHIFT)
@ -65,12 +66,13 @@ typedef struct arena_context {
luaJIT_allocpages allocpages;
luaJIT_freepages freepages;
luaJIT_reallochuge reallochuge;
luaJIT_reallocraw rawalloc;
void *pageud;
} arena_context;
int lj_arena_init(struct global_State *g, luaJIT_allocpages allocp,
luaJIT_freepages freep, luaJIT_reallochuge realloch,
void *page_ud);
luaJIT_reallocraw rawalloc, void *page_ud);
void lj_arena_cleanup(struct global_State *g);
/* Add ARENA_FREELIST_CHUNK free arenas */

View File

@ -364,7 +364,7 @@ cTValue *lj_ctype_meta(CTState *cts, CTypeID id, MMS mm)
}
if (ctype_isptr(ct->info) &&
ctype_isfunc(ctype_get(cts, ctype_cid(ct->info))->info))
tv = lj_tab_getstr(cts->miscmap, &cts->g->strempty);
tv = lj_tab_getstr(cts->miscmap, cts->g->strempty);
else
tv = lj_tab_getinth(cts->miscmap, -(int32_t)id);
if (tv && tvistab(tv) &&

View File

@ -55,12 +55,13 @@ typedef unsigned int uintptr_t;
#define LJ_MAX_BUF LJ_MAX_MEM32 /* Max. buffer length. */
#define LJ_MAX_UDATA LJ_MAX_MEM32 /* Max. userdata length. */
#define LJ_MAX_STRTAB (1<<26) /* Max. string table size. */
#define LJ_MAX_STRTAB (1<<22) /* Max. string table size. */
#define LJ_MAX_HBITS 26 /* Max. hash bits. */
#define LJ_MAX_ABITS 28 /* Max. bits of array key. */
#define LJ_MAX_ASIZE ((1<<(LJ_MAX_ABITS-1))+1) /* Max. array part size. */
#define LJ_MAX_COLOSIZE 0 /* Max. elems for colocated array. */
#define LJ_COLO_ENABLED 1
#define LJ_HUGE_STR_THRESHOLD 4000
#define LJ_MAX_LINE LJ_MAX_MEM32 /* Max. source code line number. */
#define LJ_MAX_XLEVEL 200 /* Max. syntactic nesting level. */
@ -77,7 +78,7 @@ typedef unsigned int uintptr_t;
/* Minimum table/buffer sizes. */
#define LJ_MIN_GLOBAL 6 /* Min. global table size (hbits). */
#define LJ_MIN_REGISTRY 2 /* Min. registry size (hbits). */
#define LJ_MIN_STRTAB 256 /* Min. string table size (pow2). */
#define LJ_MIN_STRTAB 64 /* Min. string table size (pow2). */
#define LJ_MIN_SBUF 32 /* Min. string buffer length. */
#define LJ_MIN_VECSZ 8 /* Min. size for growable vectors. */
#define LJ_MIN_IRSZ 32 /* Min. size for growable IR. */

View File

@ -849,7 +849,7 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
J->base[0] = emitir(IRT(IR_SNEW, IRT_STR), trptr, trslen);
} else { /* Range underflow: return empty string. */
emitir(IRTGI(IR_LT), trend, trstart);
J->base[0] = lj_ir_kstr(J, &J2G(J)->strempty);
J->base[0] = lj_ir_kstr(J, J2G(J)->strempty);
}
} else { /* Return string.byte result(s). */
ptrdiff_t i, len = end - start;
@ -886,7 +886,7 @@ static void LJ_FASTCALL recff_string_char(jit_State *J, RecordFFData *rd)
tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, J->base[i]);
J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
} else if (i == 0) {
J->base[0] = lj_ir_kstr(J, &J2G(J)->strempty);
J->base[0] = lj_ir_kstr(J, J2G(J)->strempty);
}
UNUSED(rd);
}

File diff suppressed because it is too large Load Diff

View File

@ -12,9 +12,12 @@
/* Garbage collector states. Order matters. */
enum {
GCSpause, GCSpropagate, GCSatomic, GCSsweepstring, GCSsweep,
GCSsweep_blob, GCSsweep_func, GCSsweep_tab, GCSsweep_fintab,
GCSsweep_uv, GCSsweep_udata, GCSfinalize_arena, GCSfinalize
GCSpause, GCSpropagate, GCSatomic, GCSsweep, GCSsweep_blob,
GCSsweep_smallstring, GCSsweep_string, GCSsweep_hugestring,
GCSsweep_func, GCSsweep_tab, GCSsweep_fintab,
GCSsweep_uv, GCSsweep_udata, GCSfinalize_arena, GCSfinalize,
/* These last states are optional */
GCSclean_smallstr, GCScompact_strchain, GCScompact_strtab,
};
/* Bitmasks for marked field of GCobj. */
@ -25,7 +28,6 @@ enum {
#define LJ_GC_WEAKKEY 0x10
#define LJ_GC_WEAKVAL 0x08
#define LJ_GC_CDATA_FIN 0x10
#define LJ_GC_FIXED 0x20
#define LJ_GC_SFIXED 0x40
#define LJ_GC_MARK_MASK 0xE0
@ -43,6 +45,12 @@ enum {
#define LJ_GC_SWEEP0 0x01
#define LJ_GC_SWEEP1 0x02
#define LJ_GC_SWEEPS (LJ_GC_SWEEP0 | LJ_GC_SWEEP1)
/* If set this arena has new free elements and must be rescanned */
#define LJ_GC_ON_FREE_LIST 0x8
/* If set lazy sweeping knows this arena is dirty. */
#define LJ_GC_SWEEP_DIRTY 0x10
#define LJ_STR_SECONDARY 0x10
/* Macros to test and set GCobj colors. */
#define iswhite(g, x) (!((x)->gch.gcflags & (g)->gc.currentblackgray))
@ -58,16 +66,30 @@ LJ_FUNC int checkdead(global_State *g, GCobj *o);
#define makewhite(x) \
((x)->gch.gcflags = ((x)->gch.gcflags & (uint8_t)~LJ_GC_COLORS))
#define black2gray(x) ((x)->gch.gcflags |= (uint8_t)LJ_GC_GRAY)
#define fixstring(s) ((s)->gcflags |= LJ_GC_FIXED)
#define fixstring(s) \
{ \
GCstr *str = (s); \
uint32_t idx = aidx(str); \
str->gcflags = LJ_GC_GRAY; \
if (str->len > LJ_HUGE_STR_THRESHOLD) \
gcat(str, GCAstr)->free_start = 1; \
else \
gcat(str, GCAstr)->fixed[aidxh(idx)] |= abit(aidxl(idx)); \
}
#define markfinalized(x) ((x)->gch.gcflags |= LJ_GC_FINALIZED)
#define maybe_resurrect_str(g, s) \
if (LJ_UNLIKELY(iswhite(g, obj2gco(s)) && (g)->gc.state == GCSsweepstring && \
((s)->hash & (g)->str.mask) >= (g)->gc.sweepstr)) \
{ \
(s)->gcflags |= (g)->gc.currentblack; \
GCAstr *a = gcat(s, GCAstr); \
uint32_t idx = aidx(s); \
uint64_t bit = abit(aidxl(idx)); \
a->mark[aidxh(idx)] |= bit; \
/* If this is a small string then we may need to clear the free bit */ \
if ((s)->len <= 15) a->free[aidxh(idx)] &= ~bit; \
}
#define isminor(g) (g->gc.gcmode & LJ_GCMODE_MINORSWEEP)
/* Collector. */
@ -163,8 +185,23 @@ static LJ_AINLINE void lj_mem_free(global_State *g, void *p, size_t osize)
#define lj_mem_newt(L, s, t) ((t *)lj_mem_new(L, (s)))
#define lj_mem_freet(g, p) lj_mem_free(g, (p), sizeof(*(p)))
void *lj_mem_newpages(global_State *g, size_t sz);
void lj_mem_freepages(global_State *g, void *ptr, size_t sz);
/* New GC */
#define st_ref(o) ((GCstr *)(gcrefu(o) & ~(uintptr_t)1))
#define st_alg(o) ((gcrefu(o) & 1))
typedef struct StrTab {
StrHash hashes[15];
uint32_t prev_len;
GCRef strs[15];
struct StrTab *next;
} StrTab;
LJ_STATIC_ASSERT(sizeof(StrTab) % 64 == 0);
LJ_FUNC GCtab *lj_mem_alloctab(lua_State *L, uint32_t asize);
LJ_FUNC GCtab *lj_mem_alloctabempty_gc(lua_State *L);
LJ_FUNC GCstr *lj_mem_allocstr(lua_State *L, MSize len);
@ -173,6 +210,9 @@ LJ_FUNC GCupval *lj_mem_allocuv(lua_State *L);
LJ_FUNC GCudata *lj_mem_allocudata(lua_State *L, MSize bytes);
LJ_FUNC GCfunc *lj_mem_allocfunc(lua_State *L, MSize bytes);
LJ_FUNC StrTab* lj_mem_allocstrtab(lua_State *L, uint32_t *id);
LJ_FUNC void lj_mem_freestrtab(global_State *g, StrTab *st);
LJ_FUNC void lj_mem_freechainedstrtab(global_State *g, StrTab *st);
LJ_FUNC void *lj_mem_newblob(lua_State *L, MSize sz);
LJ_FUNC void *lj_mem_reallocblob(lua_State *L, void *p, MSize osz, MSize nsz);
@ -211,7 +251,8 @@ typedef uint64_t bitmap_t;
#define FREE_EXTRA_MASK(type) (~0ull >> (WORD_BITS - WORDS_FOR_TYPE(type)))
#define FREE_MASK(type) (~0ull >> (WORD_BITS - WORDS_FOR_TYPE_UNROUNDED(type)))
#define FREE_LOW(atype, type) ~0ull << ELEMENTS_OCCUPIED(atype, type)
#define FREE_LOW(atype, type) (~0ull << ELEMENTS_OCCUPIED(atype, type))
#define FREE_LOW2(atype, type) (~0ull << (ELEMENTS_OCCUPIED(atype, type) - 64))
/* The else branch of the ternary is incorrect and must be guarded against,
* but it eliminates UB an a warning. It should be resolved at compile time */
#define FREE_HIGH(type) \
@ -254,6 +295,24 @@ LJ_STATIC_ASSERT(MAX_BMARRAY_SIZE <= WORD_BITS);
if (HIGH_ELEMENTS_OCCUPIED(otype) != 0) \
a->free[FREE_HIGH_INDEX(otype)] = FREE_HIGH(otype)
/* Small strings are 32-byte objects in 16-byte granularity so only every other
* object is valid.
*/
#define EVERY_OTHER_OBJECT 0x5555555555555555ull
#define do_smallstr_arena_init(a, g, id, atype, otype) \
memset(a, 0, sizeof(atype)); \
a->hdr.obj_type = id; \
a->hdr.flags = g->gc.currentsweep; \
a->free_h = FREE_MASK(otype) ^ 1; \
for (uint32_t i = 0; i < WORDS_FOR_TYPE_UNROUNDED(otype); i++) { \
a->free[i] = EVERY_OTHER_OBJECT; \
} \
a->free[0] = 0; \
a->free[1] = FREE_LOW2(atype, otype) & EVERY_OTHER_OBJECT; \
if (HIGH_ELEMENTS_OCCUPIED(otype) != 0) \
a->free[FREE_HIGH_INDEX(otype)] = FREE_HIGH(otype) & EVERY_OTHER_OBJECT
typedef struct GCAcommon {
GCArenaHdr hdr;
bitmap_t unspecified;
@ -318,6 +377,40 @@ typedef struct GCAfunc {
bitmap_t gray[WORDS_FOR_TYPE(GCfunc)];
} GCAfunc;
/* This is designed to overlay on sid & len which are not useful for freeing */
typedef struct FreeBlock {
uint32_t gc_hdr;
uint32_t next;
uint32_t gcstr_hid;
uint32_t size; /* This is # of 16-byte chunks */
} FreeBlock;
/* Ensure the overlay does not clobber hid in the string */
LJ_STATIC_ASSERT(offsetof(FreeBlock, gcstr_hid) == offsetof(GCstr, hid));
LJ_STATIC_ASSERT(offsetof(FreeBlock, size) == offsetof(GCstr, len));
/* For the general purpose allocator:
* (free mark)
* 0 0 - Extent
* 0 1 - Free block
* 1 0 - In use, white
* 1 1 - In use, black
*
* Allocation is a simple LL chained series of FreeBlock
*/
typedef struct GCAstr {
GCArenaHdr hdr;
bitmap_t free_h;
uint32_t in_use;
uint32_t free_start;
bitmap_t mark[64];
/* No gray bitmap, padding not required.
* fixed acts as the old GC_FIXED and acts as a permanent mark.
*/
bitmap_t fixed[64];
bitmap_t free[64];
} GCAstr;
/* All offsets must match the common arena */
LJ_STATIC_ASSERT(offsetof(GCAtab, gray) == offsetof(GCAcommon, gray));
LJ_STATIC_ASSERT(offsetof(GCAtab, mark) == offsetof(GCAcommon, mark));
@ -327,6 +420,7 @@ LJ_STATIC_ASSERT(offsetof(GCAupval, gray) == offsetof(GCAcommon, gray));
LJ_STATIC_ASSERT(offsetof(GCAupval, mark) == offsetof(GCAcommon, mark));
LJ_STATIC_ASSERT(offsetof(GCAfunc, gray) == offsetof(GCAcommon, gray));
LJ_STATIC_ASSERT(offsetof(GCAfunc, mark) == offsetof(GCAcommon, mark));
LJ_STATIC_ASSERT(offsetof(GCAstr, mark) == offsetof(GCAcommon, mark));
#if LJ_HASJIT
typedef struct GCAtrace {
@ -342,4 +436,67 @@ LJ_STATIC_ASSERT(offsetof(GCAtrace, gray) == offsetof(GCAcommon, gray));
LJ_STATIC_ASSERT(offsetof(GCAtrace, mark) == offsetof(GCAcommon, mark));
#endif
/* Strings & the String Table
*
* Strings are arena allocated like other objects. Strings are assumed to be
* 16-byte granularity in all cases, which is the smallest permitted size.
*
* Strings are classified into small, medium or huge.
* Small strings have a (NUL-inclusive) payload of <= 16 bytes and are bitmap
* allocated with every other entry being ignored by the sweep code.
* Huge strings have a payload > 3000 bytes and have a dedicated allocation
* Medium strings use a scanning allocator in custom arena.
*
* The string table consists of two areas, the primary and secondary areas.
* Each area consists of some number of StrTab objects, each containing up to
* 15 (hash, GCstr*) mappings.
* The primary area is an array of up to LJ_MAX_STRTAB entries.
* The secondary area is an array of arenas each split into
* STRTAB_ENTRIES_PER_ARENA entries.
*
* Each string holds an reference to where it lives in the string table
* For primary:
* 111111, (22-bit array index), (4-bit entry index)
* For secondary:
* (19-bit array index), (9-bit arena index), (4-bit entry index)
*
* This implies that the theoretical maximum number of strings allowed is
* 15 * LJ_MAX_STRTAB + 15 * STRTAB_ENTRIES_PER_ARENA * 0x7DFFF
* At present for 64-bit this is 62914560 + 2639825925 = 2702740485
*
* Lazy string collection
* Small strings are collected normally but are removed from the string
* table lazily. This allows string sweep to be extremely fast.
*
* Full collection is done when the new string is allocated - we are
* touching the memory anyway, or when the entire arena is being freed.
*/
#define STRTAB_ENTRIES_PER_ARENA ((ARENA_SIZE - 64) / sizeof(StrTab))
#define STRTAB_UPPER_SHIFT ((ARENA_SIZE - 64) / sizeof(StrTab))
#define STRING_SECONDARY_MAXIMUM_SIZE 0x7DFFF
/* */
typedef struct GCAstrtab {
int32_t next;
int32_t prev;
int32_t index;
uint16_t count;
uint16_t free_h;
/* TODO: layout for 32-bit mode */
uint64_t free[6];
StrTab entries[STRTAB_ENTRIES_PER_ARENA];
} GCAstrtab;
LJ_STATIC_ASSERT(sizeof(GCAstrtab) == ARENA_SIZE);
#define strtab_primary(g, id) \
&mref((g)->str.tab, StrTab)[((id) >> 4) & 0x3FFFFF]
#define strtab_secondary(g, id) \
&mref((g)->str.secondary_list[(id) >> 13], GCAstrtab) \
->entries[((id) >> 4) & 0x1FF]
GCAstr *lj_arena_str_med(global_State *g);
#endif

View File

@ -15,8 +15,18 @@
#define tzcount64(x) (unsigned)_tzcnt_u64(x)
/* x & (x - 1) */
#define reset_lowest32(x) _blsr_u32(x)
#define reset_lowest64(x) _blsr_u64(x)
#define reset_lowest32(x) (uint32_t)_blsr_u32(x)
#define reset_lowest64(x) (uint64_t)_blsr_u64(x)
/* x ^ (x - 1) */
#define mask_lowest32(x) (uint32_t)_blsmsk_u32(x)
#define mask_lowest64(x) (uint64_t)_blsmsk_u64(x)
/* x & ~y */
#define and_not32(x, y) (uint32_t)_andn_u32(y, x)
#define and_not64(x, y) (uint64_t)_andn_u64(y, x)
#define popcount64(x) (unsigned)_mm_popcnt_u64(x)
/* 256 bit SIMD */
#define LJ_SIMD_256 1
@ -24,12 +34,21 @@
#define I256_ZERO(o) o = _mm256_setzero_si256()
/* vpxor a, a, a; vpcmpeqq a, a, a sets all bits to 1 */
#define I256_ONES(o) o = _mm256_cmpeq_epi64(_mm256_setzero_si256(), _mm256_setzero_si256())
#define I256_BCAST_8(o, v) o = _mm256_set1_epi8((char)v)
#define I256_BCAST_32(o, v) o = _mm256_set1_epi32((int)v)
#define I256_NEQ_64_MASK(x, y) ((uint64_t)_mm256_movemask_pd(_mm256_castsi256_pd(_mm256_cmpeq_epi64(x, y))) ^ 0xF)
#define I256_EQ_64_MASK(x, y) (uint64_t)_mm256_movemask_pd(_mm256_castsi256_pd(_mm256_cmpeq_epi64(x, y)))
#define I256_EQ_32_MASK(x, y) (uint64_t)_mm256_movemask_ps(_mm256_castsi256_ps(_mm256_cmpeq_epi32(x, y)))
#define I256_AND(o, x, y) o = _mm256_and_si256(x, y)
#define I256_XOR(o, x, y) o = _mm256_xor_si256(x, y)
#define I256_OR(o, x, y) o = _mm256_or_si256(x, y)
#define I256_LOADA(o, ptr) o = _mm256_load_si256((__m256i *)ptr)
#define I256_STOREA(ptr, v) _mm256_store_si256((__m256i *)ptr, v)
#define I256_ANDNOT(o, x, y) o = _mm256_andnot_si256(y, x) /* x & ~y */
#define I256_SHL_64(o, x, n) o = _mm256_slli_epi64(x, n)
#define I256_SHUFFLE_64(o, x, mask) \
o = _mm256_castpd_si256(_mm256_permute_pd(_mm256_castsi256_pd(x), mask))
#define I256_LOADA(o, ptr) o = _mm256_load_si256((__m256i *)(ptr))
#define I256_STOREA(ptr, v) _mm256_store_si256((__m256i *)(ptr), v)
#define I256_EXTRACT(x, n) (uint64_t)_mm256_extract_epi64(x, n)
#else
#error "No intrinsics defined for arch"

View File

@ -34,12 +34,19 @@ void lj_meta_init(lua_State *L)
global_State *g = G(L);
const char *p, *q;
uint32_t mm;
GCstr *prev = NULL;
for (mm = 0, p = metanames; *p; mm++, p = q) {
GCstr *s;
for (q = p+2; *q && *q != '_'; q++) ;
s = lj_str_new(L, p, (size_t)(q-p));
/* NOBARRIER: g->gcroot[] is a GC root. */
setgcref(g->gcroot[GCROOT_MMNAME+mm], obj2gco(s));
lj_assertX(q-p <= 15, "metamethod %d not short string", mm);
lj_assertX(!prev || prev + 2 == s, "unexpected string ordering");
prev = s;
(void)prev;
fixstring(s);
if (mm == 0) {
setgcrefp(g->meta_root, s);
}
}
}

View File

@ -62,7 +62,7 @@ typedef struct GCRef {
/* Common GC header for all collectable objects. */
#define GCHeader uint8_t gcflags; uint8_t gct
/* This occupies 6 bytes, so use the next 2 bytes for non-32 bit fields. */
/* This occupies 2 bytes, so use the next 2 bytes for non-32 bit fields. */
#if LJ_GC64
#define gcref(r) ((GCobj *)(r).gcptr64)
@ -108,6 +108,7 @@ typedef struct GCRef {
** sure nothing invokes the GC inbetween.
** - The target and the source are the same object (self-reference).
** - The target already contains the object (e.g. moving elements around).
** - The target is a fixed string (uncollectible).
**
** The most common case is a store to a stack slot. All other cases where
** a barrier has been omitted are annotated with a NOBARRIER comment.
@ -249,6 +250,7 @@ typedef const TValue cTValue;
** ------MSW------.------LSW------
** primitive types |1..1|itype|1..................1|
** GC objects |1..1|itype|-------GCRef--------|
** 32bit GC objects |1..1|itype|ptyp|--GCRef ofs----|
** lightuserdata |1..1|itype|seg|------ofs-------|
** int (LJ_DUALNUM) |1..1|itype|0..0|-----int-------|
** number ------------double-------------
@ -307,13 +309,15 @@ typedef uint32_t StrID; /* String ID. */
typedef struct GCstr {
GCHeader;
uint8_t reserved; /* Used by lexer for fast lookup of reserved words. */
uint8_t hashalg; /* Hash algorithm. */
GCRef nextgc;
uint8_t unused;
StrID sid; /* Interned string ID. */
StrHash hash; /* Hash of string. */
uint32_t hid; /* Location of hash table entry. */
MSize len; /* Size of string. */
} GCstr;
/* String payloads can be assumed to be 16-byte aligned. */
LJ_STATIC_ASSERT(sizeof(GCstr) == 16);
#define strref(r) (&gcref((r))->str)
#define strdata(s) ((const char *)((s)+1))
#define strdatawr(s) ((char *)((s)+1))
@ -584,8 +588,6 @@ MMDEF(MMENUM)
/* GC root IDs. */
typedef enum {
GCROOT_MMNAME, /* Metamethod names. */
GCROOT_MMNAME_LAST = GCROOT_MMNAME + MM__MAX-1,
GCROOT_BASEMT, /* Metatables for base types. */
GCROOT_BASEMT_NUM = GCROOT_BASEMT + ~LJ_TNUMX,
GCROOT_IO_INPUT, /* Userdata for default I/O input file. */
@ -595,7 +597,7 @@ typedef enum {
#define basemt_it(g, it) ((g)->gcroot[GCROOT_BASEMT+~(it)])
#define basemt_obj(g, o) ((g)->gcroot[GCROOT_BASEMT+itypemap(o)])
#define mmname_str(g, mm) (strref((g)->gcroot[GCROOT_MMNAME+(mm)]))
#define mmname_str(g, mm) (strref((g)->meta_root) + 2*(mm))
/* Garbage collector state. */
typedef struct GCState {
@ -613,7 +615,6 @@ typedef struct GCState {
#else
uint8_t unused1;
#endif
MSize sweepstr; /* Sweep position in string table. */
GCRef root; /* List of all collectable objects. */
MRef sweep; /* Sweep position in root list. */
GCRef gray; /* List of gray objects. */
@ -627,6 +628,8 @@ typedef struct GCState {
GCSize estimate; /* Estimate of memory actually in use. */
GCSize accum; /* Accumulated memory marked. */
GCSize malloc; /* Malloced memory. */
GCSize strings; /* String memory */
GCSize old_strings;
MSize stepmul; /* Incremental GC step granularity. */
MSize pause; /* Pause between successive GC cycles. */
#if LJ_64
@ -641,6 +644,8 @@ typedef struct GCState {
GCArenaHdr *uv;
GCArenaHdr *func;
GCArenaHdr *udata;
GCArenaHdr *str_small;
GCArenaHdr *str;
/* This is the allocated-from blob arena. Never NULL */
GCAblob *blob_generic;
@ -663,18 +668,30 @@ typedef struct GCState {
GCArenaHdr *free_uv;
GCArenaHdr *free_func;
GCArenaHdr *free_udata;
GCArenaHdr *free_str_small;
GCArenaHdr *free_str;
/* Huge string list. Chains with 'gray' */
GCArenaHdr *str_huge;
} GCState;
/* String interning state. */
typedef struct StrInternState {
GCRef *tab; /* String hash table anchors. */
MRef tab; /* String hash table. */
MSize mask; /* String hash mask (size of hash table - 1). */
MSize num; /* Number of strings in hash table. */
MSize num_small;
MSize num_dead;
StrID id; /* Next string ID. */
uint8_t idreseed; /* String ID reseed counter. */
uint8_t second; /* String interning table uses secondary hashing. */
uint8_t unused1;
uint8_t unused2;
uint32_t secondary_list_capacity;
int32_t secondary_slot_free_head;
int32_t secondary_arena_free_head;
/* Each entry is either a GCAstrtab* or a freelist chain entry */
MRef *secondary_list;
LJ_ALIGN(8) uint64_t seed; /* Random string seed. */
} StrInternState;
@ -683,8 +700,8 @@ typedef struct global_State {
lua_Alloc allocf; /* Memory allocator. */
void *allocd; /* Memory allocator data. */
GCState gc; /* Garbage collector. */
GCstr strempty; /* Empty string. */
uint8_t stremptyz; /* Zero terminator of empty string. */
GCstr *strempty; /* Empty string. */
uint8_t unused;
uint8_t hookmask; /* Hook mask. */
uint8_t dispatchmode; /* Dispatch mode. */
uint8_t vmevmask; /* VM event mask. */
@ -706,6 +723,7 @@ typedef struct global_State {
MRef jit_base; /* Current JIT code L->base or NULL. */
MRef ctype_state; /* Pointer to C type state. */
PRNGState prng; /* Global PRNG state. */
GCRef meta_root; /* Root of metatable strings */
GCRef gcroot[GCROOT_MAX]; /* GC roots. */
} global_State;
@ -801,7 +819,6 @@ LJ_STATIC_ASSERT(offsetof(GChead, gclist) == offsetof(GCproto, gclist));
LJ_STATIC_ASSERT(offsetof(GChead, nextgc) == offsetof(lua_State, nextgc));
LJ_STATIC_ASSERT(offsetof(GChead, nextgc) == offsetof(GCproto, nextgc));
LJ_STATIC_ASSERT(offsetof(GChead, nextgc) == offsetof(GCcdata, nextgc));
LJ_STATIC_ASSERT(offsetof(GChead, nextgc) == offsetof(GCstr, nextgc));
typedef union GCobj {
GChead gch;

View File

@ -514,7 +514,7 @@ LJFOLD(XSNEW any KINT)
LJFOLDF(kfold_snew_empty)
{
if (fright->i == 0)
return lj_ir_kstr(J, &J2G(J)->strempty);
return lj_ir_kstr(J, J2G(J)->strempty);
return NEXTFOLD;
}
@ -653,7 +653,7 @@ LJFOLDF(bufstr_kfold_cse)
if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
if (fleft->o == IR_BUFHDR) { /* No put operations? */
if (fleft->op2 == IRBUFHDR_RESET) /* Empty buffer? */
return lj_ir_kstr(J, &J2G(J)->strempty);
return lj_ir_kstr(J, J2G(J)->strempty);
fins->op1 = fleft->op1;
fins->op2 = fleft->prev; /* Relies on checks in bufput_append. */
return CSEFOLD;

View File

@ -2101,7 +2101,7 @@ static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot)
topslot = J->maxslot--;
*xbase = tr;
top = xbase;
setstrV(J->L, &ix.keyv, &J2G(J)->strempty); /* Simulate string result. */
setstrV(J->L, &ix.keyv, J2G(J)->strempty); /* Simulate string result. */
} else {
J->maxslot = topslot-1;
copyTV(J->L, &ix.keyv, &J->L->base[topslot]);

View File

@ -187,7 +187,6 @@ static void close_state(lua_State *L)
lj_gc_freeall(g);
lj_assertG(gcref(g->gc.root) == obj2gco(L),
"main thread is not first GC object");
lj_assertG(g->str.num == 0, "leaked %d strings", g->str.num);
lj_trace_freestate(g);
#if LJ_HASFFI
lj_ctype_freestate(g);
@ -202,6 +201,8 @@ static void close_state(lua_State *L)
}
#endif
lj_arena_cleanup(g);
lj_mem_freevec(g, g->str.secondary_list, g->str.secondary_list_capacity,
MRef);
lj_assertG(g->gc.ctx.mem_commit == 0, "memory leak of %u arenas",
g->gc.ctx.mem_commit);
lj_assertG(g->gc.ctx.mem_huge == 0, "memory leak of %llu huge arena bytes",
@ -222,7 +223,7 @@ lua_State *lj_state_newstate(lua_Alloc allocf, void *allocd)
#else
LUA_API lua_State *lua_newstate(lua_Alloc allocf, void *allocd)
{
return lj_newstate(allocf, allocd, NULL, NULL, NULL, NULL);
return lj_newstate(allocf, allocd, NULL, NULL, NULL, NULL, NULL);
}
#endif
@ -230,6 +231,7 @@ lua_State *lj_newstate(lua_Alloc allocf, void *allocd,
luaJIT_allocpages allocp,
luaJIT_freepages freep,
luaJIT_reallochuge realloch,
luaJIT_reallocraw rawalloc,
void *page_ud)
{
PRNGState prng;
@ -257,19 +259,17 @@ lua_State *lj_newstate(lua_Alloc allocf, void *allocd,
g->allocf = allocf;
g->allocd = allocd;
g->gc.currentsweep = LJ_GC_SWEEP0;
if (!lj_arena_init(g, allocp, freep, realloch, page_ud)) {
if (!lj_arena_init(g, allocp, freep, realloch, rawalloc, page_ud)) {
close_state(L);
return NULL;
}
L->gct = ~LJ_TTHREAD;
L->gcflags = LJ_GC_FIXED | LJ_GC_SFIXED; /* Prevent free. */
L->gcflags = LJ_GC_SFIXED; /* Prevent free. */
L->dummy_ffid = FF_C;
setmref(L->glref, g);
g->gc.currentblack = LJ_GC_BLACK0;
g->gc.currentblackgray = LJ_GC_BLACK0 | LJ_GC_GRAY;
g->gc.safecolor = 0;
g->strempty.gcflags = 0;
g->strempty.gct = ~LJ_TSTR;
g->prng = prng;
#ifndef LUAJIT_USE_SYSMALLOC
if (allocf == lj_alloc_f) {

View File

@ -39,6 +39,7 @@ LJ_FUNC lua_State *lj_newstate(lua_Alloc f, void *ud,
luaJIT_allocpages allocp,
luaJIT_freepages freep,
luaJIT_reallochuge realloch,
luaJIT_reallocraw rawalloc,
void *page_ud);

View File

@ -12,6 +12,7 @@
#include "lj_str.h"
#include "lj_char.h"
#include "lj_prng.h"
#include "lj_intrin.h"
/* -- String helpers ------------------------------------------------------ */
@ -124,94 +125,157 @@ static LJ_NOINLINE StrHash hash_dense(uint64_t seed, StrHash h,
/* -- String interning ---------------------------------------------------- */
#define LJ_STR_MAXCOLL 32
#define LJ_STR_MAXCHAIN 32
static uint32_t lj_str_get_free(StrTab *st)
{
#if LJ_64
I256 a, b, c, d, z;
uint32_t t1, t2;
I256_ZERO(z);
I256_LOADA(a, &st->strs[0]);
I256_LOADA(b, &st->strs[4]);
I256_LOADA(c, &st->strs[8]);
I256_LOADA(d, &st->strs[12]);
t1 = I256_EQ_64_MASK(a, z) | (I256_EQ_64_MASK(b, z) << 4);
t2 = (I256_EQ_64_MASK(c, z) << 8) | (I256_EQ_64_MASK(d, z) << 12);
return t1 | t2;
#else
I256 x, z;
I256_LOADA(x, &st->strs[0]);
ret = I256_EQ_64_MASK(x, z);
I256_LOADA(x, &st->strs[8]);
return ret | I256_EQ_64_MASK(x, z) << 8;
#endif
}
static void lj_str_insert(lua_State *L, GCstr *s, StrHash hash, int hashalg)
{
global_State *g = G(L);
uint32_t index = hash & g->str.mask;
uint32_t hid = (index << 4) | 0xFC000000;
StrTab *st = &mref(g->str.tab, StrTab)[index];
#if LUAJIT_SECURITY_STRHASH
/* Check for algorithm mismatch, sparse into dense list */
if ((st->prev_len & LJ_STR_SECONDARY) && !hashalg) {
hashalg = 1;
hash = hash_dense(g->str.seed, hash, strdata(s), s->len);
index = hash & g->str.mask;
hid = (index << 4) | 0xFC000000;
st = &mref(g->str.tab, StrTab)[index];
}
#endif
while(1) {
if((st->prev_len & 0xF) < 15) {
uint32_t i = tzcount32(lj_str_get_free(st));
lj_assertG(!gcrefu(st->strs[i]), "bad stringtable index, occupied");
lj_assertG(i == 0 || gcrefu(st->strs[i-1]), "bad stringtable index, nonsequential");
st->hashes[i] = hash;
/* NOBARRIER: string table is cleared on demand */
setgcrefp(st->strs[i], (uintptr_t)s | hashalg);
st->prev_len++;
if (!hid) {
/* There are three options here
* 1. Directly compute it from the arena header
* 2. Find an existing string and reuse it's hid
* 3. Use the prev_len value of ->next
* But since next isn't guaranteed to be populated and
* while we *should* have a valid entry we'd have to find it,
* it's best to just compute it directly.
*/
GCAstrtab *a = gcat(st, GCAstrtab);
hid =
((uint32_t)a->index << 13) | ((uint32_t)(st - &a->entries[0]) << 4);
}
s->hid = hid | i;
return;
}
if(!st->next) {
StrTab *next = lj_mem_allocstrtab(L, &s->hid);
st->next = next;
next->hashes[0] = hash;
/* NOBARRIER: string table is cleared on demand */
setgcrefp(next->strs[0], (uintptr_t)s | hashalg);
/* We know all strings are valid but we don't easily know the ID going
* forwards, however every string will contain it in hid. String 1 is
* guaranteed to hold one with the correct value.
*/
next->prev_len = st_ref(st->strs[1])->hid;
return;
}
st = st->next;
hid = 0;
}
}
/* Resize the string interning hash table (grow and shrink). */
void lj_str_resize(lua_State *L, MSize newmask)
{
global_State *g = G(L);
GCRef *newtab, *oldtab = g->str.tab;
MSize i;
StrTab *newtab, *oldtab = mref(g->str.tab, StrTab);
MSize i, j;
MSize oldmask = g->str.mask;
StrTab *tab;
/* No resizing during GC traversal or if already too big. */
if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1)
/* No resizing if already too big. */
if (newmask >= LJ_MAX_STRTAB-1)
return;
newtab = lj_mem_newvec(L, newmask+1, GCRef);
memset(newtab, 0, (newmask+1)*sizeof(GCRef));
newtab = (StrTab *)lj_mem_newpages(g, (newmask + 1) * sizeof(StrTab));
/* Already zeroed */
#if LUAJIT_SECURITY_STRHASH
/* Check which chains need secondary hashes. */
if (g->str.second) {
int newsecond = 0;
uint32_t newsecond = 0;
/* Compute primary chain lengths. */
for (i = g->str.mask; i != ~(MSize)0; i--) {
GCobj *o = (GCobj *)(gcrefu(oldtab[i]) & ~(uintptr_t)1);
while (o) {
GCstr *s = gco2str(o);
MSize hash = s->hashalg ? hash_sparse(g->str.seed, strdata(s), s->len) :
s->hash;
hash &= newmask;
setgcrefp(newtab[hash], gcrefu(newtab[hash]) + 1);
o = gcnext(o);
for (i = oldmask; i != ~(MSize)0; i--) {
StrTab *tab = (StrTab *)&oldtab[i];
do {
for (j = 0; j < 15; j++) {
GCstr *s = st_ref(tab->strs[j]);
MSize hash = st_alg(tab->strs[j])
? hash_sparse(g->str.seed, strdata(s), s->len)
: tab->hashes[j];
newtab[hash & newmask].prev_len++;
}
tab = tab->next;
} while (tab);
}
/* Mark secondary chains. */
for (i = newmask; i != ~(MSize)0; i--) {
int secondary = gcrefu(newtab[i]) > LJ_STR_MAXCOLL;
newsecond |= secondary;
setgcrefp(newtab[i], secondary);
StrTab *tab = (StrTab *)&newtab[i];
tab->prev_len =
(tab->prev_len > LJ_STR_MAXCOLL * 15) ? LJ_STR_SECONDARY : 0;
newsecond |= tab->prev_len;
}
g->str.second = newsecond;
}
#endif
/* Install new table */
setmref(g->str.tab, newtab);
g->str.mask = newmask;
/* Reinsert all strings from the old table into the new table. */
for (i = g->str.mask; i != ~(MSize)0; i--) {
GCobj *o = (GCobj *)(gcrefu(oldtab[i]) & ~(uintptr_t)1);
while (o) {
GCobj *next = gcnext(o);
GCstr *s = gco2str(o);
MSize hash = s->hash;
#if LUAJIT_SECURITY_STRHASH
uintptr_t u;
if (LJ_LIKELY(!s->hashalg)) { /* String hashed with primary hash. */
hash &= newmask;
u = gcrefu(newtab[hash]);
if (LJ_UNLIKELY(u & 1)) { /* Switch string to secondary hash. */
s->hash = hash = hash_dense(g->str.seed, s->hash, strdata(s), s->len);
s->hashalg = 1;
hash &= newmask;
u = gcrefu(newtab[hash]);
}
} else { /* String hashed with secondary hash. */
MSize shash = hash_sparse(g->str.seed, strdata(s), s->len);
u = gcrefu(newtab[shash & newmask]);
if (u & 1) {
hash &= newmask;
u = gcrefu(newtab[hash]);
} else { /* Revert string back to primary hash. */
s->hash = shash;
s->hashalg = 0;
hash = (shash & newmask);
for (i = 0; i < oldmask+1; i++) {
tab = &oldtab[i];
do {
StrTab *old = tab;
for (j = 0; j < 15; j++) {
GCstr *s = st_ref(tab->strs[j]);
if (s) {
lj_str_insert(L, s, tab->hashes[j], st_alg(tab->strs[j]));
}
}
/* NOBARRIER: The string table is a GC root. */
setgcrefp(o->gch.nextgc, (u & ~(uintptr_t)1));
setgcrefp(newtab[hash], ((uintptr_t)o | (u & 1)));
#else
hash &= newmask;
/* NOBARRIER: The string table is a GC root. */
setgcrefr(o->gch.nextgc, newtab[hash]);
setgcref(newtab[hash], o);
#endif
o = next;
}
tab = tab->next;
if (old != &oldtab[i])
lj_mem_freestrtab(g, old);
} while (tab);
}
/* Free old table and replace with new table. */
lj_str_freetab(g);
g->str.tab = newtab;
g->str.mask = newmask;
/* Free old table. */
lj_mem_freepages(g, oldtab, (oldmask + 1) * sizeof(StrTab));
}
#if LUAJIT_SECURITY_STRHASH
@ -220,45 +284,33 @@ static LJ_NOINLINE GCstr *lj_str_rehash_chain(lua_State *L, StrHash hashc,
const char *str, MSize len)
{
global_State *g = G(L);
int sweep =
g->gc.state == GCSsweepstring ? g->gc.safecolor : 0; /* Sweeping? */
uint8_t mask = isminor(g) ? 0xFF : ~LJ_GC_COLORS;
GCRef *strtab = g->str.tab;
MSize strmask = g->str.mask;
GCobj *o = gcref(strtab[hashc & strmask]);
setgcrefp(strtab[hashc & strmask], (void *)((uintptr_t)1));
StrTab *tab = &mref(g->str.tab, StrTab)[hashc & strmask];
StrTab *base = tab;
uint32_t i;
g->str.second = 1;
while (o) {
uintptr_t u;
GCobj *next = gcnext(o);
GCstr *s = gco2str(o);
StrHash hash;
if (sweep) { /* Must sweep while rechaining. */
if (o->gch.gcflags & sweep) { /* String alive? */
lj_assertG(!checkdead(g, o) || (o->gch.gcflags & LJ_GC_FIXED),
"sweep of undead string");
o->gch.gcflags &= mask;
} else { /* Free dead string. */
lj_assertG(checkdead(g, o) || (sweep & LJ_GC_SFIXED),
"sweep of unlive string");
lj_str_free(g, s);
o = next;
continue;
tab->prev_len |= LJ_STR_SECONDARY;
do {
StrTab *old = tab;
for (i = 0; i < 15; i++) {
if (!st_alg(tab->strs[i])) {
GCstr *s = st_ref(tab->strs[i]);
if (s) {
setgcrefnull(tab->strs[i]);
tab->prev_len--;
lj_str_insert(
L, s, hash_dense(g->str.seed, tab->hashes[i], strdata(s), s->len),
1);
}
}
hash = s->hash;
if (!s->hashalg) { /* Rehash with secondary hash. */
hash = hash_dense(g->str.seed, hash, strdata(s), s->len);
s->hash = hash;
s->hashalg = 1;
}
/* Rechain. */
hash &= strmask;
u = gcrefu(strtab[hash]);
setgcrefp(o->gch.nextgc, (u & ~(uintptr_t)1));
setgcrefp(strtab[hash], ((uintptr_t)o | (u & 1)));
o = next;
tab = tab->next;
if (old != base && !(old->prev_len & 0xF)) {
lj_mem_freechainedstrtab(g, old);
}
} while (tab);
/* Try to insert the pending string again. */
return lj_str_new(L, str, len);
}
@ -279,11 +331,9 @@ static GCstr *lj_str_alloc(lua_State *L, const char *str, MSize len,
{
GCstr *s = lj_mem_allocstr(L, len);
global_State *g = G(L);
uintptr_t u;
newwhite(s);
s->gct = ~LJ_TSTR;
s->len = len;
s->hash = hash;
#ifndef STRID_RESEED_INTERVAL
s->sid = g->str.id++;
#elif STRID_RESEED_INTERVAL
@ -297,17 +347,12 @@ static GCstr *lj_str_alloc(lua_State *L, const char *str, MSize len,
s->sid = (StrID)lj_prng_u64(&g->prng);
#endif
s->reserved = 0;
s->hashalg = (uint8_t)hashalg;
/* Clear last 4 bytes of allocated memory. Implies zero-termination, too. */
*(uint32_t *)(strdatawr(s)+(len & ~(MSize)3)) = 0;
memcpy(strdatawr(s), str, len);
/* Add to string hash table. */
hash &= g->str.mask;
u = gcrefu(g->str.tab[hash]);
setgcrefp(s->nextgc, (u & ~(uintptr_t)1));
/* NOBARRIER: The string table is a GC root. */
setgcrefp(g->str.tab[hash], ((uintptr_t)s | (u & 1)));
if (g->str.num++ > g->str.mask) /* Allow a 100% load factor. */
lj_str_insert(L, s, hash, hashalg);
if (g->str.num++ > g->str.mask * 15) /* Allow a 100% load factor. */
lj_str_resize(L, (g->str.mask << 1) + 1); /* Grow string table. */
return s; /* Return newly interned string. */
}
@ -317,31 +362,46 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
{
global_State *g = G(L);
if (lenx-1 < LJ_MAX_STR-1) {
I256 h0, h1, cmp;
MSize len = (MSize)lenx;
StrHash hash = hash_sparse(g->str.seed, str, len);
MSize coll = 0;
uint32_t chain = 0;
int hashalg = 0;
/* Check if the string has already been interned. */
GCobj *o = gcref(g->str.tab[hash & g->str.mask]);
StrTab *st = &mref(g->str.tab, StrTab)[hash & g->str.mask];
StrTab *root;
#if LUAJIT_SECURITY_STRHASH
if (LJ_UNLIKELY((uintptr_t)o & 1)) { /* Secondary hash for this chain? */
if (LJ_UNLIKELY(st->prev_len & LJ_STR_SECONDARY)) { /* Secondary hash for this chain? */
hashalg = 1;
hash = hash_dense(g->str.seed, hash, str, len);
o = (GCobj *)(gcrefu(g->str.tab[hash & g->str.mask]) & ~(uintptr_t)1);
st = &mref(g->str.tab, StrTab)[hash & g->str.mask];
}
#endif
while (o != NULL) {
GCstr *sx = gco2str(o);
if (sx->hash == hash && sx->len == len) {
if (memcmp(str, strdata(sx), len) == 0) {
root = st;
I256_BCAST_32(cmp, hash);
do {
I256_LOADA(h0, &st->hashes[0]);
I256_LOADA(h1, &st->hashes[8]);
uint32_t eq = (I256_EQ_32_MASK(h0, cmp) | ((I256_EQ_32_MASK(h1, cmp) & 0x7F) << 8));
while (eq != 0) {
GCstr *sx = st_ref(st->strs[tzcount32(eq)]);
eq = reset_lowest32(eq);
if (LJ_UNLIKELY(!sx))
continue;
if (len == sx->len && memcmp(str, strdata(sx), len) == 0) {
maybe_resurrect_str(g, sx);
return sx; /* Return existing string. */
}
coll++;
}
coll++;
o = gcnext(o);
}
chain++;
st = st->next;
} while (st != NULL);
if(LJ_UNLIKELY(chain > 0x3FFFFFF))
chain = 0x3FFFFFF;
root->prev_len = (root->prev_len & 0x1F) | (chain << 5);
#if LUAJIT_SECURITY_STRHASH
/* Rehash chain if there are too many collisions. */
if (LJ_UNLIKELY(coll > LJ_STR_MAXCOLL) && !hashalg) {
@ -349,24 +409,37 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
}
#endif
/* Otherwise allocate a new string. */
return lj_str_alloc(L, str, len, hash, hashalg);
} else {
if (lenx)
lj_err_msg(L, LJ_ERR_STROV);
return &g->strempty;
return g->strempty;
}
}
void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
{
g->str.num--;
lj_mem_free(g, s, lj_str_size(s->len));
}
void LJ_FASTCALL lj_str_init(lua_State *L)
{
global_State *g = G(L);
g->str.seed = lj_prng_u64(&g->prng);
lj_str_resize(L, LJ_MIN_STRTAB-1);
g->str.secondary_arena_free_head = -1;
g->str.secondary_list_capacity = 8;
g->str.secondary_slot_free_head = g->str.secondary_list_capacity - 1;
g->str.secondary_list = lj_mem_newvec(L, g->str.secondary_list_capacity, MRef);
for(uint32_t i = 1; i < g->str.secondary_list_capacity; i++)
setmrefu(g->str.secondary_list[i], i-1);
setmrefu(g->str.secondary_list[0], ~0ull);
g->strempty = lj_mem_allocstr(L, 0);
memset(g->strempty, 0, 32);
g->strempty->gct = ~LJ_TSTR;
fixstring(g->strempty);
g->str.mask = LJ_MIN_STRTAB - 1;
setmref(g->str.tab, lj_mem_newpages(g, LJ_MIN_STRTAB * sizeof(StrTab)));
}
void lj_str_freetab(global_State *g)
{
lj_mem_freepages(g, mref(g->str.tab, void), (g->str.mask + 1) * sizeof(StrTab));
}

View File

@ -19,13 +19,11 @@ LJ_FUNC int lj_str_haspattern(GCstr *s);
/* String interning. */
LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask);
LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len);
LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
LJ_FUNC void LJ_FASTCALL lj_str_init(lua_State *L);
#define lj_str_freetab(g) \
(lj_mem_freevec(g, g->str.tab, g->str.mask+1, GCRef))
LJ_FUNC void lj_str_freetab(global_State *g);
LJ_FUNC void lj_str_shrink(lua_State *L);
#define lj_str_newz(L, s) (lj_str_new(L, s, strlen(s)))
#define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1))
#define lj_str_size(len) (sizeof(GCstr) + (((len)+4) & ~(MSize)3))
#endif

View File

@ -77,6 +77,9 @@ typedef unsigned (*luaJIT_allocpages)(void *ud, void **pages, unsigned n);
/* Free is called one last time with NULL, 0 to indicate any state should be released */
typedef void (*luaJIT_freepages)(void *ud, void **pages, unsigned n);
typedef void* (*luaJIT_reallochuge)(void *ud, void *p, size_t osz, size_t nsz);
/* Raw page allocation, similar to huge but no space is reserved and
* there are no extra alignment requirements. Resize behaviour is not required. */
typedef void* (*luaJIT_reallocraw)(void *ud, void *p, size_t osz, size_t nsz);
/* This many bytes are reserved at the start of each huge arena for the allocator's use */
#define LUAJIT_HUGE_RESERVED_SPACE 20
@ -100,6 +103,7 @@ LUA_API lua_State *luaJIT_newstate(lua_Alloc f, void *ud,
luaJIT_allocpages allocp,
luaJIT_freepages freep,
luaJIT_reallochuge realloch,
luaJIT_reallocraw rawalloc,
void *page_ud);
/* As lua_createtable, but can be used with __gc */

View File

@ -1243,7 +1243,8 @@ static void build_subroutines(BuildCtx *ctx)
| jz ->fff_res1
| settp TAB:RC, TAB:RB, LJ_TTAB
| mov [BASE-16], TAB:RC // Store metatable as default result.
| mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+8*(GCROOT_MMNAME+MM_metatable)]
| mov STR:RC, [DISPATCH+DISPATCH_GL(meta_root)]
| add STR:RC, 32*MM_metatable
| mov RAd, TAB:RB->hmask
| and RAd, STR:RC->sid
| settp STR:RC, LJ_TSTR