From 15ed84bd499b3ecdba9f431f2d24696a313227e4 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 12 Aug 2021 21:10:13 +0200 Subject: [PATCH] String buffers, part 4a: Add metatable serialization dictionary. Sponsored by fmad.io. --- doc/ext_buffer.html | 45 ++++++++++++++---------- src/lib_buffer.c | 17 +++++++--- src/lj_buf.h | 3 +- src/lj_gc.c | 6 ++-- src/lj_serialize.c | 83 +++++++++++++++++++++++++++++++++++++-------- src/lj_serialize.h | 3 +- 6 files changed, 116 insertions(+), 41 deletions(-) diff --git a/doc/ext_buffer.html b/doc/ext_buffer.html index 2443fc90..63c2efe3 100644 --- a/doc/ext_buffer.html +++ b/doc/ext_buffer.html @@ -127,7 +127,7 @@ space.

Buffers operate like a FIFO (first-in first-out) data structure. Data can be appended (written) to the end of the buffer and consumed (read) -from the front of the buffer. These operations can be freely mixed. +from the front of the buffer. These operations may be freely mixed.

The buffer space that holds the characters is managed automatically @@ -199,7 +199,7 @@ may be reused.

buf = buf:free()

The buffer space of the buffer object is freed. The object itself -remains intact, empty and it may be reused. +remains intact, empty and may be reused.

Note: you normally don't need to use this method. The garbage collector @@ -404,8 +404,8 @@ speed is mostly constrained by object creation cost.

The serializer handles most Lua types, common FFI number types and -nested structures. Functions, thread objects, other FFI cdata, full -userdata and associated metatables cannot be serialized (yet). +nested structures. Functions, thread objects, other FFI cdata and full +userdata cannot be serialized (yet).

The encoder serializes nested structures as trees. Multiple references @@ -461,21 +461,31 @@ commonly occur as table keys of objects you are serializing. These keys are compactly encoded as indexes during serialization. A well chosen dictionary saves space and improves serialization performance. +

  • +metatable is a Lua table holding a dictionary of metatables +for the table objects you are serializing. +
  • -dict needs to be an array of strings, starting at index 1 and -without holes (no nil inbetween). The table is anchored in the -buffer object and internally modified into a two-way index (don't do -this yourself, just pass a plain array). The table must not be modified -after it has been passed to buffer.new(). +dict needs to be an array of strings and metatable needs +to be an array of tables. Both starting at index 1 and without holes (no +nil inbetween). The tables are anchored in the buffer object and +internally modified into a two-way index (don't do this yourself, just pass +a plain array). The tables must not be modified after they have been passed +to buffer.new().

    -The dict tables used by the encoder and decoder must be the -same. Put the most common entries at the front. Extend at the end to -ensure backwards-compatibility — older encodings can then still be -read. You may also set some indexes to false to explicitly drop -backwards-compatibility. Old encodings that use these indexes will throw -an error when decoded. +The dict and metatable tables used by the encoder and +decoder must be the same. Put the most common entries at the front. Extend +at the end to ensure backwards-compatibility — older encodings can +then still be read. You may also set some indexes to false to +explicitly drop backwards-compatibility. Old encodings that use these +indexes will throw an error when decoded. +

    +

    +Metatables that are not found in the metatable dictionary are +ignored when encoding. Decoding returns a table with a nil +metatable.

    Note: parsing and preparation of the options table is somewhat @@ -564,7 +574,7 @@ suffix.

     object    → nil | false | true
               | null | lightud32 | lightud64
    -          | int | num | tab
    +          | int | num | tab | tab_mt
               | int64 | uint64 | complex
               | string
     
    @@ -585,13 +595,14 @@ tab       → 0x08                                   // Empty table
               | 0x0b a.U a*object h.U h*{object object}      // Mixed
               | 0x0c a.U (a-1)*object                // 1-based array
               | 0x0d a.U (a-1)*object h.U h*{object object}  // Mixed
    +tab_mt    → 0x0e (index-1).U tab          // Metatable dict entry
     
     int64     → 0x10 int.L                             // FFI int64_t
     uint64    → 0x11 uint.L                           // FFI uint64_t
     complex   → 0x12 re.L im.L                         // FFI complex
     
     string    → (0x20+len).U len*char.B
    -          | 0x0f (index-1).U                        // Dict entry
    +          | 0x0f (index-1).U                 // String dict entry
     
     .B = 8 bit
     .I = 32 bit little-endian
    diff --git a/src/lib_buffer.c b/src/lib_buffer.c
    index ae065759..2e364861 100644
    --- a/src/lib_buffer.c
    +++ b/src/lib_buffer.c
    @@ -288,7 +288,7 @@ LJLIB_CF(buffer_new)
     {
       MSize sz = 0;
       int targ = 1;
    -  GCtab *env, *dict = NULL;
    +  GCtab *env, *dict_str = NULL, *dict_mt = NULL;
       GCudata *ud;
       SBufExt *sbx;
       if (L->base < L->top && !tvistab(L->base)) {
    @@ -298,10 +298,16 @@ LJLIB_CF(buffer_new)
       }
       if (L->base+targ-1 < L->top) {
         GCtab *options = lj_lib_checktab(L, targ);
    -    cTValue *opt_dict = lj_tab_getstr(options, lj_str_newlit(L, "dict"));
    +    cTValue *opt_dict, *opt_mt;
    +    opt_dict = lj_tab_getstr(options, lj_str_newlit(L, "dict"));
         if (opt_dict && tvistab(opt_dict)) {
    -      dict = tabV(opt_dict);
    -      lj_serialize_dict_prep(L, dict);
    +      dict_str = tabV(opt_dict);
    +      lj_serialize_dict_prep_str(L, dict_str);
    +    }
    +    opt_mt = lj_tab_getstr(options, lj_str_newlit(L, "metatable"));
    +    if (opt_mt && tvistab(opt_mt)) {
    +      dict_mt = tabV(opt_mt);
    +      lj_serialize_dict_prep_mt(L, dict_mt);
         }
       }
       env = tabref(curr_func(L)->c.env);
    @@ -312,7 +318,8 @@ LJLIB_CF(buffer_new)
       setudataV(L, L->top++, ud);
       sbx = (SBufExt *)uddata(ud);
       lj_bufx_init(L, sbx);
    -  setgcref(sbx->dict, obj2gco(dict));
    +  setgcref(sbx->dict_str, obj2gco(dict_str));
    +  setgcref(sbx->dict_mt, obj2gco(dict_mt));
       if (sz > 0) lj_buf_need2((SBuf *)sbx, sz);
       return 1;
     }
    diff --git a/src/lj_buf.h b/src/lj_buf.h
    index 4ace2685..e2ac922e 100644
    --- a/src/lj_buf.h
    +++ b/src/lj_buf.h
    @@ -27,7 +27,8 @@ typedef struct SBufExt {
         MRef bsb;		/* Borrowed string buffer. */
       };
       char *r;		/* Read pointer. */
    -  GCRef dict;		/* Serialization string dictionary table. */
    +  GCRef dict_str;	/* Serialization string dictionary table. */
    +  GCRef dict_mt;	/* Serialization metatable dictionary table. */
       int depth;		/* Remaining recursion depth. */
     } SBufExt;
     
    diff --git a/src/lj_gc.c b/src/lj_gc.c
    index 646a27b2..5a238542 100644
    --- a/src/lj_gc.c
    +++ b/src/lj_gc.c
    @@ -69,8 +69,10 @@ static void gc_mark(global_State *g, GCobj *o)
           SBufExt *sbx = (SBufExt *)uddata(gco2ud(o));
           if (sbufiscow(sbx) && gcref(sbx->cowref))
     	gc_markobj(g, gcref(sbx->cowref));
    -      if (gcref(sbx->dict))
    -	gc_markobj(g, gcref(sbx->dict));
    +      if (gcref(sbx->dict_str))
    +	gc_markobj(g, gcref(sbx->dict_str));
    +      if (gcref(sbx->dict_mt))
    +	gc_markobj(g, gcref(sbx->dict_mt));
         }
       } else if (LJ_UNLIKELY(gct == ~LJ_TUPVAL)) {
         GCupval *uv = gco2uv(o);
    diff --git a/src/lj_serialize.c b/src/lj_serialize.c
    index 70ff4796..e12e3668 100644
    --- a/src/lj_serialize.c
    +++ b/src/lj_serialize.c
    @@ -34,8 +34,8 @@ enum {
       SER_TAG_INT,
       SER_TAG_NUM,
       SER_TAG_TAB,		/* 0x08 */
    -  SER_TAG_0x0e = SER_TAG_TAB+6,
    -  SER_TAG_DICT,
    +  SER_TAG_DICT_MT = SER_TAG_TAB+6,
    +  SER_TAG_DICT_STR,
       SER_TAG_INT64,	/* 0x10 */
       SER_TAG_UINT64,
       SER_TAG_COMPLEX,
    @@ -124,7 +124,7 @@ static LJ_AINLINE char *serialize_ru124(char *r, char *w, uint32_t *pv)
     }
     
     /* Prepare string dictionary for use (once). */
    -void LJ_FASTCALL lj_serialize_dict_prep(lua_State *L, GCtab *dict)
    +void LJ_FASTCALL lj_serialize_dict_prep_str(lua_State *L, GCtab *dict)
     {
       if (!dict->hmask) {  /* No hash part means not prepared, yet. */
         MSize i, len = lj_tab_len(dict);
    @@ -143,6 +143,26 @@ void LJ_FASTCALL lj_serialize_dict_prep(lua_State *L, GCtab *dict)
       }
     }
     
    +/* Prepare metatable dictionary for use (once). */
    +void LJ_FASTCALL lj_serialize_dict_prep_mt(lua_State *L, GCtab *dict)
    +{
    +  if (!dict->hmask) {  /* No hash part means not prepared, yet. */
    +    MSize i, len = lj_tab_len(dict);
    +    if (!len) return;
    +    lj_tab_resize(L, dict, dict->asize, hsize2hbits(len));
    +    for (i = 1; i <= len && i < dict->asize; i++) {
    +      cTValue *o = arrayslot(dict, i);
    +      if (tvistab(o)) {
    +	if (tvisnil(lj_tab_get(L, dict, o))) {  /* Ignore dups. */
    +	  lj_tab_newkey(L, dict, o)->u64 = (uint64_t)(i-1);
    +	}
    +      } else if (!tvisfalse(o)) {
    +	lj_err_caller(L, LJ_ERR_BUFFER_BADOPT);
    +      }
    +    }
    +  }
    +}
    +
     /* -- Internal serializer ------------------------------------------------- */
     
     /* Put serialized object into buffer. */
    @@ -185,6 +205,22 @@ static char *serialize_put(char *w, SBufExt *sbx, cTValue *o)
           for (i = 0; i <= hmask; i++)
     	nhash += !tvisnil(&node[i].val);
         }
    +    /* Write metatable index. */
    +    if (LJ_UNLIKELY(tabref(sbx->dict_mt)) && tabref(t->metatable)) {
    +      TValue mto;
    +      Node *n;
    +      settabV(sbufL(sbx), &mto, tabref(t->metatable));
    +      n = hashgcref(tabref(sbx->dict_mt), mto.gcr);
    +      do {
    +	if (n->key.u64 == mto.u64) {
    +	  uint32_t idx = n->val.u32.lo;
    +	  w = serialize_more(w, sbx, 1+5);
    +	  *w++ = SER_TAG_DICT_MT;
    +	  w = serialize_wu124(w, idx);
    +	  break;
    +	}
    +      } while ((n = nextnode(n)));
    +    }
         /* Write number of array slots and hash slots. */
         w = serialize_more(w, sbx, 1+2*5);
         *w++ = (char)(SER_TAG_TAB + (nhash ? 1 : 0) + (narray ? one : 0));
    @@ -197,19 +233,19 @@ static char *serialize_put(char *w, SBufExt *sbx, cTValue *o)
         }
         if (nhash) {  /* Write hash entries. */
           const Node *node = noderef(t->node) + t->hmask;
    -      GCtab *dict = tabref(sbx->dict);
    -      if (LJ_UNLIKELY(dict)) {
    +      GCtab *dict_str = tabref(sbx->dict_str);
    +      if (LJ_UNLIKELY(dict_str)) {
     	for (;; node--)
     	  if (!tvisnil(&node->val)) {
     	    if (LJ_LIKELY(tvisstr(&node->key))) {
     	      /* Inlined lj_tab_getstr is 30% faster. */
     	      const GCstr *str = strV(&node->key);
    -	      Node *n = hashstr(dict, str);
    +	      Node *n = hashstr(dict_str, str);
     	      do {
     		if (tvisstr(&n->key) && strV(&n->key) == str) {
     		  uint32_t idx = n->val.u32.lo;
     		  w = serialize_more(w, sbx, 1+5);
    -		  *w++ = SER_TAG_DICT;
    +		  *w++ = SER_TAG_DICT_STR;
     		  w = serialize_wu124(w, idx);
     		  break;
     		}
    @@ -322,19 +358,32 @@ static char *serialize_get(char *r, SBufExt *sbx, TValue *o)
         if (!tvisnum(o)) setnanV(o);
       } else if (tp <= SER_TAG_TRUE) {
         setpriV(o, ~tp);
    -  } else if (tp == SER_TAG_DICT) {
    -    GCtab *dict;
    +  } else if (tp == SER_TAG_DICT_STR) {
    +    GCtab *dict_str;
         uint32_t idx;
         r = serialize_ru124(r, w, &idx);
         idx++;
    -    dict = tabref(sbx->dict);
    -    if (dict && idx < dict->asize && tvisstr(arrayslot(dict, idx)))
    -      copyTV(sbufL(sbx), o, arrayslot(dict, idx));
    +    dict_str = tabref(sbx->dict_str);
    +    if (dict_str && idx < dict_str->asize && tvisstr(arrayslot(dict_str, idx)))
    +      copyTV(sbufL(sbx), o, arrayslot(dict_str, idx));
         else
           lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDICTX, idx);
    -  } else if (tp >= SER_TAG_TAB && tp < SER_TAG_TAB+6) {
    +  } else if (tp >= SER_TAG_TAB && tp <= SER_TAG_DICT_MT) {
         uint32_t narray = 0, nhash = 0;
    -    GCtab *t;
    +    GCtab *t, *mt = NULL;
    +    if (tp == SER_TAG_DICT_MT) {
    +      GCtab *dict_mt;
    +      uint32_t idx;
    +      r = serialize_ru124(r, w, &idx); if (LJ_UNLIKELY(!r)) goto eob;
    +      idx++;
    +      dict_mt = tabref(sbx->dict_mt);
    +      if (dict_mt && idx < dict_mt->asize && tvistab(arrayslot(dict_mt, idx)))
    +	mt = tabV(arrayslot(dict_mt, idx));
    +      else
    +	lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDICTX, idx);
    +      r = serialize_ru124(r, w, &tp); if (LJ_UNLIKELY(!r)) goto eob;
    +      if (!(tp >= SER_TAG_TAB && tp < SER_TAG_DICT_MT)) goto badtag;
    +    }
         if (tp >= SER_TAG_TAB+2) {
           r = serialize_ru124(r, w, &narray); if (LJ_UNLIKELY(!r)) goto eob;
         }
    @@ -342,6 +391,8 @@ static char *serialize_get(char *r, SBufExt *sbx, TValue *o)
           r = serialize_ru124(r, w, &nhash); if (LJ_UNLIKELY(!r)) goto eob;
         }
         t = lj_tab_new(sbufL(sbx), narray, hsize2hbits(nhash));
    +    /* NOBARRIER: The table is new (marked white). */
    +    setgcref(t->metatable, obj2gco(mt));
         settabV(sbufL(sbx), o, t);
         if (narray) {
           TValue *oa = tvref(t->array) + (tp >= SER_TAG_TAB+4);
    @@ -395,6 +446,7 @@ static char *serialize_get(char *r, SBufExt *sbx, TValue *o)
         setrawlightudV(o, (void *)ud);
     #endif
       } else {
    +badtag:
         lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDEC, tp);
       }
       return r;
    @@ -460,10 +512,11 @@ LJ_FUNC MSize LJ_FASTCALL lj_serialize_peektype(SBufExt *sbx)
         case SER_TAG_NUM: return IRT_NUM;
         case SER_TAG_TAB: case SER_TAG_TAB+1: case SER_TAG_TAB+2:
         case SER_TAG_TAB+3: case SER_TAG_TAB+4: case SER_TAG_TAB+5:
    +    case SER_TAG_DICT_MT:
           return IRT_TAB;
         case SER_TAG_INT64: case SER_TAG_UINT64: case SER_TAG_COMPLEX:
           return IRT_CDATA;
    -    case SER_TAG_DICT:
    +    case SER_TAG_DICT_STR:
         default:
           return IRT_STR;
         }
    diff --git a/src/lj_serialize.h b/src/lj_serialize.h
    index 9bd780ca..1fda23eb 100644
    --- a/src/lj_serialize.h
    +++ b/src/lj_serialize.h
    @@ -13,7 +13,8 @@
     
     #define LJ_SERIALIZE_DEPTH	100	/* Default depth. */
     
    -LJ_FUNC void LJ_FASTCALL lj_serialize_dict_prep(lua_State *L, GCtab *dict);
    +LJ_FUNC void LJ_FASTCALL lj_serialize_dict_prep_str(lua_State *L, GCtab *dict);
    +LJ_FUNC void LJ_FASTCALL lj_serialize_dict_prep_mt(lua_State *L, GCtab *dict);
     LJ_FUNC SBufExt * LJ_FASTCALL lj_serialize_put(SBufExt *sbx, cTValue *o);
     LJ_FUNC char * LJ_FASTCALL lj_serialize_get(SBufExt *sbx, TValue *o);
     LJ_FUNC GCstr * LJ_FASTCALL lj_serialize_encode(lua_State *L, cTValue *o);