String buffers, part 4a: Add metatable serialization dictionary.

Sponsored by fmad.io.
This commit is contained in:
Mike Pall 2021-08-12 21:10:13 +02:00
parent 983d66b8c5
commit 15ed84bd49
6 changed files with 116 additions and 41 deletions

View File

@ -127,7 +127,7 @@ space.
<p>
Buffers operate like a FIFO (first-in first-out) data structure. Data
can be appended (written) to the end of the buffer and consumed (read)
from the front of the buffer. These operations can be freely mixed.
from the front of the buffer. These operations may be freely mixed.
</p>
<p>
The buffer space that holds the characters is managed automatically
@ -199,7 +199,7 @@ may be reused.
<h3 id="buffer_free"><tt>buf = buf:free()</tt></h3>
<p>
The buffer space of the buffer object is freed. The object itself
remains intact, empty and it may be reused.
remains intact, empty and may be reused.
</p>
<p>
Note: you normally don't need to use this method. The garbage collector
@ -404,8 +404,8 @@ speed is mostly constrained by object creation cost.
</p>
<p>
The serializer handles most Lua types, common FFI number types and
nested structures. Functions, thread objects, other FFI cdata, full
userdata and associated metatables cannot be serialized (yet).
nested structures. Functions, thread objects, other FFI cdata and full
userdata cannot be serialized (yet).
</p>
<p>
The encoder serializes nested structures as trees. Multiple references
@ -461,21 +461,31 @@ commonly occur as table keys of objects you are serializing. These keys
are compactly encoded as indexes during serialization. A well chosen
dictionary saves space and improves serialization performance.
</li>
<li>
<tt>metatable</tt> is a Lua table holding a <b>dictionary of metatables</b>
for the table objects you are serializing.
</li>
</ul>
<p>
<tt>dict</tt> needs to be an array of strings, starting at index 1 and
without holes (no <tt>nil</tt> inbetween). The table is anchored in the
buffer object and internally modified into a two-way index (don't do
this yourself, just pass a plain array). The table must not be modified
after it has been passed to <tt>buffer.new()</tt>.
<tt>dict</tt> needs to be an array of strings and <tt>metatable</tt> needs
to be an array of tables. Both starting at index 1 and without holes (no
<tt>nil</tt> inbetween). The tables are anchored in the buffer object and
internally modified into a two-way index (don't do this yourself, just pass
a plain array). The tables must not be modified after they have been passed
to <tt>buffer.new()</tt>.
</p>
<p>
The <tt>dict</tt> tables used by the encoder and decoder must be the
same. Put the most common entries at the front. Extend at the end to
ensure backwards-compatibility &mdash; older encodings can then still be
read. You may also set some indexes to <tt>false</tt> to explicitly drop
backwards-compatibility. Old encodings that use these indexes will throw
an error when decoded.
The <tt>dict</tt> and <tt>metatable</tt> tables used by the encoder and
decoder must be the same. Put the most common entries at the front. Extend
at the end to ensure backwards-compatibility &mdash; older encodings can
then still be read. You may also set some indexes to <tt>false</tt> to
explicitly drop backwards-compatibility. Old encodings that use these
indexes will throw an error when decoded.
</p>
<p>
Metatables that are not found in the <tt>metatable</tt> dictionary are
ignored when encoding. Decoding returns a table with a <tt>nil</tt>
metatable.
</p>
<p>
Note: parsing and preparation of the options table is somewhat
@ -564,7 +574,7 @@ suffix.
<pre>
object → nil | false | true
| null | lightud32 | lightud64
| int | num | tab
| int | num | tab | tab_mt
| int64 | uint64 | complex
| string
@ -585,13 +595,14 @@ tab → 0x08 // Empty table
| 0x0b a.U a*object h.U h*{object object} // Mixed
| 0x0c a.U (a-1)*object // 1-based array
| 0x0d a.U (a-1)*object h.U h*{object object} // Mixed
tab_mt → 0x0e (index-1).U tab // Metatable dict entry
int64 → 0x10 int.L // FFI int64_t
uint64 → 0x11 uint.L // FFI uint64_t
complex → 0x12 re.L im.L // FFI complex
string → (0x20+len).U len*char.B
| 0x0f (index-1).U // Dict entry
| 0x0f (index-1).U // String dict entry
.B = 8 bit
.I = 32 bit little-endian

View File

@ -288,7 +288,7 @@ LJLIB_CF(buffer_new)
{
MSize sz = 0;
int targ = 1;
GCtab *env, *dict = NULL;
GCtab *env, *dict_str = NULL, *dict_mt = NULL;
GCudata *ud;
SBufExt *sbx;
if (L->base < L->top && !tvistab(L->base)) {
@ -298,10 +298,16 @@ LJLIB_CF(buffer_new)
}
if (L->base+targ-1 < L->top) {
GCtab *options = lj_lib_checktab(L, targ);
cTValue *opt_dict = lj_tab_getstr(options, lj_str_newlit(L, "dict"));
cTValue *opt_dict, *opt_mt;
opt_dict = lj_tab_getstr(options, lj_str_newlit(L, "dict"));
if (opt_dict && tvistab(opt_dict)) {
dict = tabV(opt_dict);
lj_serialize_dict_prep(L, dict);
dict_str = tabV(opt_dict);
lj_serialize_dict_prep_str(L, dict_str);
}
opt_mt = lj_tab_getstr(options, lj_str_newlit(L, "metatable"));
if (opt_mt && tvistab(opt_mt)) {
dict_mt = tabV(opt_mt);
lj_serialize_dict_prep_mt(L, dict_mt);
}
}
env = tabref(curr_func(L)->c.env);
@ -312,7 +318,8 @@ LJLIB_CF(buffer_new)
setudataV(L, L->top++, ud);
sbx = (SBufExt *)uddata(ud);
lj_bufx_init(L, sbx);
setgcref(sbx->dict, obj2gco(dict));
setgcref(sbx->dict_str, obj2gco(dict_str));
setgcref(sbx->dict_mt, obj2gco(dict_mt));
if (sz > 0) lj_buf_need2((SBuf *)sbx, sz);
return 1;
}

View File

@ -27,7 +27,8 @@ typedef struct SBufExt {
MRef bsb; /* Borrowed string buffer. */
};
char *r; /* Read pointer. */
GCRef dict; /* Serialization string dictionary table. */
GCRef dict_str; /* Serialization string dictionary table. */
GCRef dict_mt; /* Serialization metatable dictionary table. */
int depth; /* Remaining recursion depth. */
} SBufExt;

View File

@ -69,8 +69,10 @@ static void gc_mark(global_State *g, GCobj *o)
SBufExt *sbx = (SBufExt *)uddata(gco2ud(o));
if (sbufiscow(sbx) && gcref(sbx->cowref))
gc_markobj(g, gcref(sbx->cowref));
if (gcref(sbx->dict))
gc_markobj(g, gcref(sbx->dict));
if (gcref(sbx->dict_str))
gc_markobj(g, gcref(sbx->dict_str));
if (gcref(sbx->dict_mt))
gc_markobj(g, gcref(sbx->dict_mt));
}
} else if (LJ_UNLIKELY(gct == ~LJ_TUPVAL)) {
GCupval *uv = gco2uv(o);

View File

@ -34,8 +34,8 @@ enum {
SER_TAG_INT,
SER_TAG_NUM,
SER_TAG_TAB, /* 0x08 */
SER_TAG_0x0e = SER_TAG_TAB+6,
SER_TAG_DICT,
SER_TAG_DICT_MT = SER_TAG_TAB+6,
SER_TAG_DICT_STR,
SER_TAG_INT64, /* 0x10 */
SER_TAG_UINT64,
SER_TAG_COMPLEX,
@ -124,7 +124,7 @@ static LJ_AINLINE char *serialize_ru124(char *r, char *w, uint32_t *pv)
}
/* Prepare string dictionary for use (once). */
void LJ_FASTCALL lj_serialize_dict_prep(lua_State *L, GCtab *dict)
void LJ_FASTCALL lj_serialize_dict_prep_str(lua_State *L, GCtab *dict)
{
if (!dict->hmask) { /* No hash part means not prepared, yet. */
MSize i, len = lj_tab_len(dict);
@ -143,6 +143,26 @@ void LJ_FASTCALL lj_serialize_dict_prep(lua_State *L, GCtab *dict)
}
}
/* Prepare metatable dictionary for use (once). */
void LJ_FASTCALL lj_serialize_dict_prep_mt(lua_State *L, GCtab *dict)
{
if (!dict->hmask) { /* No hash part means not prepared, yet. */
MSize i, len = lj_tab_len(dict);
if (!len) return;
lj_tab_resize(L, dict, dict->asize, hsize2hbits(len));
for (i = 1; i <= len && i < dict->asize; i++) {
cTValue *o = arrayslot(dict, i);
if (tvistab(o)) {
if (tvisnil(lj_tab_get(L, dict, o))) { /* Ignore dups. */
lj_tab_newkey(L, dict, o)->u64 = (uint64_t)(i-1);
}
} else if (!tvisfalse(o)) {
lj_err_caller(L, LJ_ERR_BUFFER_BADOPT);
}
}
}
}
/* -- Internal serializer ------------------------------------------------- */
/* Put serialized object into buffer. */
@ -185,6 +205,22 @@ static char *serialize_put(char *w, SBufExt *sbx, cTValue *o)
for (i = 0; i <= hmask; i++)
nhash += !tvisnil(&node[i].val);
}
/* Write metatable index. */
if (LJ_UNLIKELY(tabref(sbx->dict_mt)) && tabref(t->metatable)) {
TValue mto;
Node *n;
settabV(sbufL(sbx), &mto, tabref(t->metatable));
n = hashgcref(tabref(sbx->dict_mt), mto.gcr);
do {
if (n->key.u64 == mto.u64) {
uint32_t idx = n->val.u32.lo;
w = serialize_more(w, sbx, 1+5);
*w++ = SER_TAG_DICT_MT;
w = serialize_wu124(w, idx);
break;
}
} while ((n = nextnode(n)));
}
/* Write number of array slots and hash slots. */
w = serialize_more(w, sbx, 1+2*5);
*w++ = (char)(SER_TAG_TAB + (nhash ? 1 : 0) + (narray ? one : 0));
@ -197,19 +233,19 @@ static char *serialize_put(char *w, SBufExt *sbx, cTValue *o)
}
if (nhash) { /* Write hash entries. */
const Node *node = noderef(t->node) + t->hmask;
GCtab *dict = tabref(sbx->dict);
if (LJ_UNLIKELY(dict)) {
GCtab *dict_str = tabref(sbx->dict_str);
if (LJ_UNLIKELY(dict_str)) {
for (;; node--)
if (!tvisnil(&node->val)) {
if (LJ_LIKELY(tvisstr(&node->key))) {
/* Inlined lj_tab_getstr is 30% faster. */
const GCstr *str = strV(&node->key);
Node *n = hashstr(dict, str);
Node *n = hashstr(dict_str, str);
do {
if (tvisstr(&n->key) && strV(&n->key) == str) {
uint32_t idx = n->val.u32.lo;
w = serialize_more(w, sbx, 1+5);
*w++ = SER_TAG_DICT;
*w++ = SER_TAG_DICT_STR;
w = serialize_wu124(w, idx);
break;
}
@ -322,19 +358,32 @@ static char *serialize_get(char *r, SBufExt *sbx, TValue *o)
if (!tvisnum(o)) setnanV(o);
} else if (tp <= SER_TAG_TRUE) {
setpriV(o, ~tp);
} else if (tp == SER_TAG_DICT) {
GCtab *dict;
} else if (tp == SER_TAG_DICT_STR) {
GCtab *dict_str;
uint32_t idx;
r = serialize_ru124(r, w, &idx);
idx++;
dict = tabref(sbx->dict);
if (dict && idx < dict->asize && tvisstr(arrayslot(dict, idx)))
copyTV(sbufL(sbx), o, arrayslot(dict, idx));
dict_str = tabref(sbx->dict_str);
if (dict_str && idx < dict_str->asize && tvisstr(arrayslot(dict_str, idx)))
copyTV(sbufL(sbx), o, arrayslot(dict_str, idx));
else
lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDICTX, idx);
} else if (tp >= SER_TAG_TAB && tp < SER_TAG_TAB+6) {
} else if (tp >= SER_TAG_TAB && tp <= SER_TAG_DICT_MT) {
uint32_t narray = 0, nhash = 0;
GCtab *t;
GCtab *t, *mt = NULL;
if (tp == SER_TAG_DICT_MT) {
GCtab *dict_mt;
uint32_t idx;
r = serialize_ru124(r, w, &idx); if (LJ_UNLIKELY(!r)) goto eob;
idx++;
dict_mt = tabref(sbx->dict_mt);
if (dict_mt && idx < dict_mt->asize && tvistab(arrayslot(dict_mt, idx)))
mt = tabV(arrayslot(dict_mt, idx));
else
lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDICTX, idx);
r = serialize_ru124(r, w, &tp); if (LJ_UNLIKELY(!r)) goto eob;
if (!(tp >= SER_TAG_TAB && tp < SER_TAG_DICT_MT)) goto badtag;
}
if (tp >= SER_TAG_TAB+2) {
r = serialize_ru124(r, w, &narray); if (LJ_UNLIKELY(!r)) goto eob;
}
@ -342,6 +391,8 @@ static char *serialize_get(char *r, SBufExt *sbx, TValue *o)
r = serialize_ru124(r, w, &nhash); if (LJ_UNLIKELY(!r)) goto eob;
}
t = lj_tab_new(sbufL(sbx), narray, hsize2hbits(nhash));
/* NOBARRIER: The table is new (marked white). */
setgcref(t->metatable, obj2gco(mt));
settabV(sbufL(sbx), o, t);
if (narray) {
TValue *oa = tvref(t->array) + (tp >= SER_TAG_TAB+4);
@ -395,6 +446,7 @@ static char *serialize_get(char *r, SBufExt *sbx, TValue *o)
setrawlightudV(o, (void *)ud);
#endif
} else {
badtag:
lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDEC, tp);
}
return r;
@ -460,10 +512,11 @@ LJ_FUNC MSize LJ_FASTCALL lj_serialize_peektype(SBufExt *sbx)
case SER_TAG_NUM: return IRT_NUM;
case SER_TAG_TAB: case SER_TAG_TAB+1: case SER_TAG_TAB+2:
case SER_TAG_TAB+3: case SER_TAG_TAB+4: case SER_TAG_TAB+5:
case SER_TAG_DICT_MT:
return IRT_TAB;
case SER_TAG_INT64: case SER_TAG_UINT64: case SER_TAG_COMPLEX:
return IRT_CDATA;
case SER_TAG_DICT:
case SER_TAG_DICT_STR:
default:
return IRT_STR;
}

View File

@ -13,7 +13,8 @@
#define LJ_SERIALIZE_DEPTH 100 /* Default depth. */
LJ_FUNC void LJ_FASTCALL lj_serialize_dict_prep(lua_State *L, GCtab *dict);
LJ_FUNC void LJ_FASTCALL lj_serialize_dict_prep_str(lua_State *L, GCtab *dict);
LJ_FUNC void LJ_FASTCALL lj_serialize_dict_prep_mt(lua_State *L, GCtab *dict);
LJ_FUNC SBufExt * LJ_FASTCALL lj_serialize_put(SBufExt *sbx, cTValue *o);
LJ_FUNC char * LJ_FASTCALL lj_serialize_get(SBufExt *sbx, TValue *o);
LJ_FUNC GCstr * LJ_FASTCALL lj_serialize_encode(lua_State *L, cTValue *o);