From 4c6b669c419f313306b9e6ee43be4ad5f6d73ec6 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 25 Mar 2021 02:21:31 +0100 Subject: [PATCH] String buffers, part 1: object serialization. Sponsored by fmad.io. --- doc/contact.html | 2 + doc/ext_buffer.html | 275 +++++++++++++++++++++++++++++ doc/ext_c_api.html | 2 + doc/ext_ffi.html | 2 + doc/ext_ffi_api.html | 2 + doc/ext_ffi_semantics.html | 2 + doc/ext_ffi_tutorial.html | 2 + doc/ext_jit.html | 2 + doc/ext_profiler.html | 2 + doc/extensions.html | 2 + doc/faq.html | 2 + doc/install.html | 2 + doc/luajit.html | 2 + doc/running.html | 2 + doc/status.html | 2 + src/Makefile | 6 +- src/Makefile.dep | 36 ++-- src/lib_buffer.c | 66 +++++++ src/lib_string.c | 3 + src/lj_arch.h | 7 + src/lj_buf.h | 9 +- src/lj_errmsg.h | 10 ++ src/lj_serialize.c | 351 +++++++++++++++++++++++++++++++++++++ src/lj_serialize.h | 21 +++ src/ljamalg.c | 2 + src/lualib.h | 1 + 26 files changed, 797 insertions(+), 18 deletions(-) create mode 100644 doc/ext_buffer.html create mode 100644 src/lib_buffer.c create mode 100644 src/lj_serialize.c create mode 100644 src/lj_serialize.h diff --git a/doc/contact.html b/doc/contact.html index b7980091..c253a08b 100644 --- a/doc/contact.html +++ b/doc/contact.html @@ -37,6 +37,8 @@ FFI Semantics
  • +String Buffers +
  • jit.* Library
  • Lua/C API diff --git a/doc/ext_buffer.html b/doc/ext_buffer.html new file mode 100644 index 00000000..455c298d --- /dev/null +++ b/doc/ext_buffer.html @@ -0,0 +1,275 @@ + + + +String Buffers + + + + + + + +
    +Lua +
    + + +
    +

    + +The string buffer library allows high-performance manipulation of +string-like data. + +

    +

    + +Unlike Lua strings, which are constants, string buffers are +mutable sequences of 8-bit (binary-transparent) characters. Data +can be stored, formatted and encoded into a string buffer and later +converted, decoded or extracted. + +

    +

    + +The convenient string buffer API simplifies common string manipulation +tasks, that would otherwise require creating many intermediate strings. +String buffers improve performance by eliminating redundant memory +copies, object creation, string interning and garbage collection +overhead. In conjunction with the FFI library, they allow zero-copy +operations. + +

    + +

    Using the String Buffer Library

    +

    +The string buffer library is built into LuaJIT by default, but it's not +loaded by default. Add this to the start of every Lua file that needs +one of its functions: +

    +
    +local buffer = require("string.buffer")
    +
    + +

    Work in Progress

    + +

    + +This library is a work in progress. More +functions will be added soon. + +

    + +

    Serialization of Lua Objects

    +

    + +The following functions and methods allow high-speed serialization +(encoding) of a Lua object into a string and decoding it back to a Lua +object. This allows convenient storage and transport of structured +data. + +

    +

    + +The encoded data is in an internal binary +format. The data can be stored in files, binary-transparent +databases or transmitted to other LuaJIT instances across threads, +processes or networks. + +

    +

    + +Encoding speed can reach up to 1 Gigabyte/second on a modern desktop- or +server-class system, even when serializing many small objects. Decoding +speed is mostly constrained by object creation cost. + +

    +

    + +The serializer handles most Lua types, common FFI number types and +nested structures. Functions, thread objects, other FFI cdata, full +userdata and associated metatables cannot be serialized (yet). + +

    +

    + +The encoder serializes nested structures as trees. Multiple references +to a single object will be stored separately and create distinct objects +after decoding. Circular references cause an error. + + +

    + +

    str = buffer.encode(obj)

    +

    + +Serializes (encodes) the Lua object obj into the string +str. + +

    +

    + +obj can be any of the supported Lua types — it doesn't +need to be a Lua table. + +

    +

    + +This function may throw an error when attempting to serialize +unsupported object types, circular references or deeply nested tables. + +

    + +

    obj = buffer.decode(str)

    +

    + +De-serializes (decodes) the string str into the Lua object +obj. + +

    +

    + +The returned object may be any of the supported Lua types — +even nil. + +

    +

    + +This function may throw an error when fed with malformed or incomplete +encoded data. The standalone function throws when there's left-over data +after decoding a single top-level object. + +

    + +

    Serialization Format Specification

    +

    + +This serialization format is designed for internal use by LuaJIT +applications. Serialized data is upwards-compatible and portable across +all supported LuaJIT platforms. + +

    +

    + +It's an 8-bit binary format and not human-readable. It uses e.g. +embedded zeroes and stores embedded Lua string objects unmodified, which +are 8-bit-clean, too. Encoded data can be safely concatenated for +streaming and later decoded one top-level object at a time. + +

    +

    + +The encoding is reasonably compact, but tuned for maximum performance, +not for minimum space usage. It compresses well with any of the common +byte-oriented data compression algorithms. + +

    +

    + +Although documented here for reference, this format is explicitly +not intended to be a 'public standard' for structured data +interchange across computer languages (like JSON or MessagePack). Please +do not use it as such. + +

    +

    + +The specification is given below as a context-free grammar with a +top-level object as the starting point. Alternatives are +separated by the | symbol and * indicates repeats. +Grouping is implicit or indicated by {…}. Terminals are +either plain hex numbers, encoded as bytes, or have a .format +suffix. + +

    +
    +object    → nil | false | true
    +          | null | lightud32 | lightud64
    +          | int | num | tab
    +          | int64 | uint64 | complex
    +          | string
    +
    +nil       → 0x00
    +false     → 0x01
    +true      → 0x02
    +
    +null      → 0x03                            // NULL lightuserdata
    +lightud32 → 0x04 data.I                   // 32 bit lightuserdata
    +lightud64 → 0x05 data.L                   // 64 bit lightuserdata
    +
    +int       → 0x06 int.I                                 // int32_t
    +num       → 0x07 double.L
    +
    +tab       → 0x08                                   // Empty table
    +          | 0x09 h.U h*{object object}          // Key/value hash
    +          | 0x0a a.U a*object                    // 0-based array
    +          | 0x0b a.U a*object h.U h*{object object}      // Mixed
    +          | 0x0c a.U (a-1)*object                // 1-based array
    +          | 0x0d a.U (a-1)*object h.U h*{object object}  // Mixed
    +
    +int64     → 0x10 int.L                             // FFI int64_t
    +uint64    → 0x11 uint.L                           // FFI uint64_t
    +complex   → 0x12 re.L im.L                         // FFI complex
    +
    +string    → (0x20+len).U len*char.B
    +
    +.B = 8 bit
    +.I = 32 bit little-endian
    +.L = 64 bit little-endian
    +.U = prefix-encoded 32 bit unsigned number n:
    +     0x00..0xdf   → n.B
    +     0xe0..0x1fdf → (0xe0|(((n-0xe0)>>8)&0x1f)).B ((n-0xe0)&0xff).B
    +   0x1fe0..       → 0xff n.I
    +
    +
    +
    + + + diff --git a/doc/ext_c_api.html b/doc/ext_c_api.html index 6079e5ac..9f1ad212 100644 --- a/doc/ext_c_api.html +++ b/doc/ext_c_api.html @@ -37,6 +37,8 @@ FFI Semantics
  • +String Buffers +
  • jit.* Library
  • Lua/C API diff --git a/doc/ext_ffi.html b/doc/ext_ffi.html index 13b75bda..b934dc78 100644 --- a/doc/ext_ffi.html +++ b/doc/ext_ffi.html @@ -37,6 +37,8 @@ FFI Semantics
  • +String Buffers +
  • jit.* Library
  • Lua/C API diff --git a/doc/ext_ffi_api.html b/doc/ext_ffi_api.html index b7ace808..061cc42a 100644 --- a/doc/ext_ffi_api.html +++ b/doc/ext_ffi_api.html @@ -42,6 +42,8 @@ td.abiparam { font-weight: bold; width: 6em; } FFI Semantics
  • +String Buffers +
  • jit.* Library
  • Lua/C API diff --git a/doc/ext_ffi_semantics.html b/doc/ext_ffi_semantics.html index 904ee51d..fef39c32 100644 --- a/doc/ext_ffi_semantics.html +++ b/doc/ext_ffi_semantics.html @@ -42,6 +42,8 @@ td.convop { font-style: italic; width: 40%; } FFI Semantics
  • +String Buffers +
  • jit.* Library
  • Lua/C API diff --git a/doc/ext_ffi_tutorial.html b/doc/ext_ffi_tutorial.html index 8ed61364..ca71be4d 100644 --- a/doc/ext_ffi_tutorial.html +++ b/doc/ext_ffi_tutorial.html @@ -44,6 +44,8 @@ td.idiomlua b { font-weight: normal; color: #2142bf; } FFI Semantics
  • +String Buffers +
  • jit.* Library
  • Lua/C API diff --git a/doc/ext_jit.html b/doc/ext_jit.html index 84302fa0..6dd54c70 100644 --- a/doc/ext_jit.html +++ b/doc/ext_jit.html @@ -37,6 +37,8 @@ FFI Semantics
  • +String Buffers +
  • jit.* Library
  • Lua/C API diff --git a/doc/ext_profiler.html b/doc/ext_profiler.html index 0e8d3691..2783abdb 100644 --- a/doc/ext_profiler.html +++ b/doc/ext_profiler.html @@ -37,6 +37,8 @@ FFI Semantics
  • +String Buffers +
  • jit.* Library
  • Lua/C API diff --git a/doc/extensions.html b/doc/extensions.html index 77cf444c..799679a3 100644 --- a/doc/extensions.html +++ b/doc/extensions.html @@ -54,6 +54,8 @@ td.excinterop { FFI Semantics
  • +String Buffers +
  • jit.* Library
  • Lua/C API diff --git a/doc/faq.html b/doc/faq.html index b71e6e7c..a5d744d2 100644 --- a/doc/faq.html +++ b/doc/faq.html @@ -40,6 +40,8 @@ dd { margin-left: 1.5em; } FFI Semantics
  • +String Buffers +
  • jit.* Library
  • Lua/C API diff --git a/doc/install.html b/doc/install.html index fab0b2ca..e4af9dde 100644 --- a/doc/install.html +++ b/doc/install.html @@ -65,6 +65,8 @@ td.compatno { FFI Semantics
  • +String Buffers +
  • jit.* Library
  • Lua/C API diff --git a/doc/luajit.html b/doc/luajit.html index 42c0ac83..a25267a6 100644 --- a/doc/luajit.html +++ b/doc/luajit.html @@ -122,6 +122,8 @@ table.feature small { FFI Semantics
  • +String Buffers +
  • jit.* Library
  • Lua/C API diff --git a/doc/running.html b/doc/running.html index ae4911d5..b55b8439 100644 --- a/doc/running.html +++ b/doc/running.html @@ -59,6 +59,8 @@ td.param_default { FFI Semantics
  • +String Buffers +
  • jit.* Library
  • Lua/C API diff --git a/doc/status.html b/doc/status.html index e1f024bf..1d3ba984 100644 --- a/doc/status.html +++ b/doc/status.html @@ -40,6 +40,8 @@ ul li { padding-bottom: 0.3em; } FFI Semantics
  • +String Buffers +
  • jit.* Library
  • Lua/C API diff --git a/src/Makefile b/src/Makefile index 6f17bafd..a6e25ba1 100644 --- a/src/Makefile +++ b/src/Makefile @@ -482,13 +482,15 @@ LJVM_BOUT= $(LJVM_S) LJVM_MODE= elfasm LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \ - lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o + lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o \ + lib_buffer.o LJLIB_C= $(LJLIB_O:.o=.c) LJCORE_O= lj_assert.o lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \ lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \ lj_prng.o lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o \ - lj_strscan.o lj_strfmt.o lj_strfmt_num.o lj_api.o lj_profile.o \ + lj_strscan.o lj_strfmt.o lj_strfmt_num.o lj_serialize.o \ + lj_api.o lj_profile.o \ lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \ lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \ diff --git a/src/Makefile.dep b/src/Makefile.dep index 3f26599e..315bf632 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep @@ -10,6 +10,9 @@ lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_strscan.h \ lj_strfmt.h lj_ctype.h lj_cdata.h lj_cconv.h lj_carith.h lj_ff.h \ lj_ffdef.h lj_lib.h lj_libdef.h +lib_buffer.o: lib_buffer.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ + lj_def.h lj_arch.h lj_gc.h lj_buf.h lj_str.h lj_serialize.h lj_lib.h \ + lj_libdef.h lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \ lj_libdef.h @@ -170,15 +173,18 @@ lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_str.h lj_tab.h \ lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_strfmt.h lj_lex.h lj_parse.h \ lj_vm.h lj_vmevent.h +lj_prng.o: lj_prng.c lj_def.h lua.h luaconf.h lj_arch.h lj_prng.h lj_profile.o: lj_profile.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_buf.h lj_gc.h lj_str.h lj_frame.h lj_bc.h lj_debug.h lj_dispatch.h \ lj_jit.h lj_ir.h lj_trace.h lj_traceerr.h lj_profile.h luajit.h -lj_prng.o: lj_prng.c lj_def.h lua.h luaconf.h lj_arch.h lj_prng.h lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_debug.h lj_ir.h lj_jit.h \ lj_ircall.h lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h \ lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h lj_prng.h +lj_serialize.o: lj_serialize.c lj_obj.h lua.h luaconf.h lj_def.h \ + lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h \ + lj_udata.h lj_ctype.h lj_cdata.h lj_serialize.h lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \ lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \ @@ -189,7 +195,7 @@ lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_prng.h lj_lex.h \ lj_alloc.h luajit.h lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ - lj_err.h lj_errmsg.h lj_str.h lj_char.h + lj_err.h lj_errmsg.h lj_str.h lj_char.h lj_prng.h lj_strfmt.o: lj_strfmt.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_buf.h lj_gc.h lj_str.h lj_state.h lj_char.h lj_strfmt.h lj_strfmt_num.o: lj_strfmt_num.c lj_obj.h lua.h luaconf.h lj_def.h \ @@ -204,7 +210,7 @@ lj_trace.o: lj_trace.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_dispatch.h lj_traceerr.h lj_snap.h lj_gdbjit.h lj_record.h lj_asm.h \ lj_vm.h lj_vmevent.h lj_target.h lj_target_*.h lj_prng.h lj_udata.o: lj_udata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_gc.h lj_udata.h + lj_gc.h lj_err.h lj_errmsg.h lj_udata.h lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_str.h lj_tab.h lj_state.h lj_dispatch.h lj_bc.h lj_jit.h lj_ir.h \ lj_vm.h lj_vmevent.h @@ -216,23 +222,23 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_assert.c lj_obj.h \ lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \ lj_traceerr.h lj_vm.h lj_err.c lj_debug.h lj_ff.h lj_ffdef.h lj_strfmt.h \ lj_char.c lj_char.h lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c \ - lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h lj_debug.c \ - lj_prng.c lj_prng.h lj_state.c lj_lex.h lj_alloc.h luajit.h \ + lj_prng.h lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h \ + lj_debug.c lj_prng.c lj_state.c lj_lex.h lj_alloc.h luajit.h \ lj_dispatch.c lj_ccallback.h lj_profile.h lj_vmevent.c lj_vmevent.h \ - lj_vmmath.c lj_strscan.c lj_strfmt.c lj_strfmt_num.c lj_api.c \ - lj_profile.c lj_lex.c lualib.h lj_parse.h lj_parse.c lj_bcread.c \ - lj_bcdump.h lj_bcwrite.c lj_load.c lj_ctype.c lj_cdata.c lj_cconv.h \ - lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c lj_target.h \ - lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c lj_clib.h \ - lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h lj_iropt.h \ - lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c \ - lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c lj_mcode.c \ - lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \ + lj_vmmath.c lj_strscan.c lj_strfmt.c lj_strfmt_num.c lj_serialize.c \ + lj_serialize.h lj_api.c lj_profile.c lj_lex.c lualib.h lj_parse.h \ + lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c lj_ctype.c \ + lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c \ + lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \ + lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h \ + lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \ + lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c \ + lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \ lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \ lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \ lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \ lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \ - lib_init.c + lib_buffer.c lib_init.c luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \ lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \ diff --git a/src/lib_buffer.c b/src/lib_buffer.c new file mode 100644 index 00000000..e4555596 --- /dev/null +++ b/src/lib_buffer.c @@ -0,0 +1,66 @@ +/* +** Buffer library. +** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h +*/ + +#define lib_buffer_c +#define LUA_LIB + +#include "lua.h" +#include "lauxlib.h" +#include "lualib.h" + +#include "lj_obj.h" + +#if LJ_HASBUFFER +#include "lj_gc.h" +#include "lj_buf.h" +#include "lj_serialize.h" +#include "lj_lib.h" + +/* ------------------------------------------------------------------------ */ + +#define LJLIB_MODULE_buffer + +/* Note: this uses interim structs until the SBuf reorg. */ + +LJLIB_CF(buffer_encode) +{ + cTValue *o = lj_lib_checkany(L, 1); + StrBuf sbuf; + sbuf.sb = lj_buf_tmp_(L); + lj_serialize_put(&sbuf, o); + setstrV(L, L->top++, lj_buf_str(L, sbuf.sb)); + lj_gc_check(L); + return 1; +} + +LJLIB_CF(buffer_decode) +{ + GCstr *str = lj_lib_checkstr(L, 1); + const char *p = strdata(str); + SBuf sb; + StrBuf sbuf; + setsbufL(&sb, L); + setmref(sb.b, p); + setmref(sb.p, p + str->len); + setmref(sb.e, p + str->len); + sbuf.sb = &sb; + sbuf.r = (char *)p; + setnilV(L->top++); + lj_serialize_get(&sbuf, L->top-1); + lj_gc_check(L); + return 1; +} + +/* ------------------------------------------------------------------------ */ + +#include "lj_libdef.h" + +int luaopen_string_buffer(lua_State *L) +{ + LJ_LIB_REG(L, NULL, buffer); + return 1; +} + +#endif diff --git a/src/lib_string.c b/src/lib_string.c index 51d1c4b0..4a3ff372 100644 --- a/src/lib_string.c +++ b/src/lib_string.c @@ -743,6 +743,9 @@ LUALIB_API int luaopen_string(lua_State *L) setgcref(basemt_it(g, LJ_TSTR), obj2gco(mt)); settabV(L, lj_tab_setstr(L, mt, mmname_str(g, MM_index)), tabV(L->top-1)); mt->nomm = (uint8_t)(~(1u<top-1)); +#endif return 1; } diff --git a/src/lj_arch.h b/src/lj_arch.h index 0a6e1b9f..ae999467 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -549,6 +549,13 @@ #define LJ_HASFFI 1 #endif +/* Disable or enable the string buffer extension. */ +#if defined(LUAJIT_DISABLE_BUFFER) +#define LJ_HASBUFFER 0 +#else +#define LJ_HASBUFFER 1 +#endif + #if defined(LUAJIT_DISABLE_PROFILE) #define LJ_HASPROFILE 0 #elif LJ_TARGET_POSIX diff --git a/src/lj_buf.h b/src/lj_buf.h index ae875298..a720f83b 100644 --- a/src/lj_buf.h +++ b/src/lj_buf.h @@ -10,7 +10,7 @@ #include "lj_gc.h" #include "lj_str.h" -/* Resizable string buffers. Struct definition in lj_obj.h. */ +/* Resizable string buffers. SBuf struct definition in lj_obj.h. */ #define sbufB(sb) (mref((sb)->b, char)) #define sbufP(sb) (mref((sb)->p, char)) #define sbufE(sb) (mref((sb)->e, char)) @@ -100,4 +100,11 @@ static LJ_AINLINE GCstr *lj_buf_str(lua_State *L, SBuf *sb) return lj_str_new(L, sbufB(sb), sbuflen(sb)); } +/* Interim user-accessible string buffer. */ +typedef struct StrBuf { + SBuf *sb; /* Pointer to system buffer. */ + char *r; /* String buffer read pointer. */ + int depth; /* Remaining recursion depth. */ +} StrBuf; + #endif diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h index 9ff4553d..a6f638ce 100644 --- a/src/lj_errmsg.h +++ b/src/lj_errmsg.h @@ -179,6 +179,16 @@ ERRDEF(FFI_NYIPACKBIT, "NYI: packed bit fields") ERRDEF(FFI_NYICALL, "NYI: cannot call this C function (yet)") #endif +#if LJ_HASBUFFER +/* String buffer errors. */ +ERRDEF(BUFFER_BADENC, "cannot serialize " LUA_QS) +ERRDEF(BUFFER_BADDEC, "cannot deserialize tag 0x%02x") +ERRDEF(BUFFER_DEPTH, "too deep to serialize") +ERRDEF(BUFFER_DUPKEY, "duplicate table key") +ERRDEF(BUFFER_EOB, "unexpected end of buffer") +ERRDEF(BUFFER_LEFTOV, "left-over data in buffer") +#endif + #undef ERRDEF /* Detecting unused error messages: diff --git a/src/lj_serialize.c b/src/lj_serialize.c new file mode 100644 index 00000000..5d7b7721 --- /dev/null +++ b/src/lj_serialize.c @@ -0,0 +1,351 @@ +/* +** Object de/serialization. +** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h +*/ + +#define lj_serialize_c +#define LUA_CORE + +#include "lj_obj.h" +#include "lj_err.h" +#include "lj_buf.h" +#include "lj_str.h" +#include "lj_tab.h" +#include "lj_udata.h" +#if LJ_HASFFI +#include "lj_ctype.h" +#include "lj_cdata.h" +#endif +#include "lj_serialize.h" + +/* Tags for internal serialization format. */ +enum { + SER_TAG_NIL, /* 0x00 */ + SER_TAG_FALSE, + SER_TAG_TRUE, + SER_TAG_NULL, + SER_TAG_LIGHTUD32, + SER_TAG_LIGHTUD64, + SER_TAG_INT, + SER_TAG_NUM, + SER_TAG_TAB, /* 0x08 */ + SER_TAG_0x0e = SER_TAG_TAB+6, + SER_TAG_0x0f, + SER_TAG_INT64, /* 0x10 */ + SER_TAG_UINT64, + SER_TAG_COMPLEX, + SER_TAG_0x13, + SER_TAG_0x14, + SER_TAG_0x15, + SER_TAG_0x16, + SER_TAG_0x17, + SER_TAG_0x18, /* 0x18 */ + SER_TAG_0x19, + SER_TAG_0x1a, + SER_TAG_0x1b, + SER_TAG_0x1c, + SER_TAG_0x1d, + SER_TAG_0x1e, + SER_TAG_0x1f, + SER_TAG_STR, /* 0x20 + str->len */ +}; +LJ_STATIC_ASSERT((SER_TAG_TAB & 7) == 0); + +/* -- Helper functions ---------------------------------------------------- */ + +static LJ_AINLINE char *serialize_more(char *w, StrBuf *sbuf, MSize sz) +{ + if (LJ_UNLIKELY(sz > (MSize)(sbufE(sbuf->sb) - w))) { + setsbufP(sbuf->sb, w); + w = lj_buf_more2(sbuf->sb, sz); + } + return w; +} + +/* Write U124 to buffer. */ +static LJ_NOINLINE char *serialize_wu124_(char *w, uint32_t v) +{ + if (v < 0x1fe0) { + v -= 0xe0; + *w++ = (char)(0xe0 | (v >> 8)); *w++ = (char)v; + } else { + *w++ = (char)0xff; +#if LJ_BE + v = lj_bswap(v); +#endif + memcpy(w, &v, 4); w += 4; + } + return w; +} + +static LJ_AINLINE char *serialize_wu124(char *w, uint32_t v) +{ + if (LJ_LIKELY(v < 0xe0)) { + *w++ = (char)v; + return w; + } else { + return serialize_wu124_(w, v); + } +} + +static LJ_NOINLINE char *serialize_ru124_(char *r, char *e, uint32_t *pv) +{ + uint32_t v = *pv; + if (v != 0xff) { + if (r >= e) return NULL; + v = ((v & 0x1f) << 8) + *(uint8_t *)r + 0xe0; r++; + } else { + if (r + 4 > e) return NULL; + v = lj_getu32(r); r += 4; +#if LJ_BE + v = lj_bswap(v); +#endif + } + *pv = v; + return r; +} + +static LJ_AINLINE char *serialize_ru124(char *r, char *e, uint32_t *pv) +{ + if (LJ_LIKELY(r < e)) { + uint32_t v = *(uint8_t *)r; r++; + *pv = v; + if (LJ_UNLIKELY(v >= 0xe0)) { + r = serialize_ru124_(r, e, pv); + } + return r; + } + return NULL; +} + +/* -- Internal serializer ------------------------------------------------- */ + +/* Put serialized object into buffer. */ +static char *serialize_put(char *w, StrBuf *sbuf, cTValue *o) +{ + if (LJ_LIKELY(tvisstr(o))) { + const GCstr *str = strV(o); + MSize len = str->len; + w = serialize_more(w, sbuf, 5+len); + w = serialize_wu124(w, SER_TAG_STR + len); + w = lj_buf_wmem(w, strdata(str), len); + } else if (tvisint(o)) { + uint32_t x = LJ_BE ? lj_bswap((uint32_t)intV(o)) : (uint32_t)intV(o); + w = serialize_more(w, sbuf, 1+4); + *w++ = SER_TAG_INT; memcpy(w, &x, 4); w += 4; + } else if (tvisnum(o)) { + uint64_t x = LJ_BE ? lj_bswap64(o->u64) : o->u64; + w = serialize_more(w, sbuf, 1+sizeof(lua_Number)); + *w++ = SER_TAG_NUM; memcpy(w, &x, 8); w += 8; + } else if (tvispri(o)) { + w = serialize_more(w, sbuf, 1); + *w++ = (char)(SER_TAG_NIL + ~itype(o)); + } else if (tvistab(o)) { + const GCtab *t = tabV(o); + uint32_t narray = 0, nhash = 0, one = 2; + if (sbuf->depth <= 0) lj_err_caller(sbufL(sbuf->sb), LJ_ERR_BUFFER_DEPTH); + sbuf->depth--; + if (t->asize > 0) { /* Determine max. length of array part. */ + ptrdiff_t i; + TValue *array = tvref(t->array); + for (i = (ptrdiff_t)t->asize-1; i >= 0; i--) + if (!tvisnil(&array[i])) + break; + narray = (uint32_t)(i+1); + if (narray && tvisnil(&array[0])) one = 4; + } + if (t->hmask > 0) { /* Count number of used hash slots. */ + uint32_t i, hmask = t->hmask; + Node *node = noderef(t->node); + for (i = 0; i <= hmask; i++) + nhash += !tvisnil(&node[i].val); + } + /* Write number of array slots and hash slots. */ + w = serialize_more(w, sbuf, 1+2*5); + *w++ = (char)(SER_TAG_TAB + (nhash ? 1 : 0) + (narray ? one : 0)); + if (narray) w = serialize_wu124(w, narray); + if (nhash) w = serialize_wu124(w, nhash); + if (narray) { /* Write array entries. */ + cTValue *oa = tvref(t->array) + (one >> 2); + cTValue *oe = tvref(t->array) + narray; + while (oa < oe) w = serialize_put(w, sbuf, oa++); + } + if (nhash) { /* Write hash entries. */ + const Node *node = noderef(t->node) + t->hmask; + for (;; node--) + if (!tvisnil(&node->val)) { + w = serialize_put(w, sbuf, &node->key); + w = serialize_put(w, sbuf, &node->val); + if (--nhash == 0) break; + } + } + sbuf->depth++; +#if LJ_HASFFI + } else if (tviscdata(o)) { + CTState *cts = ctype_cts(sbufL(sbuf->sb)); + CType *s = ctype_raw(cts, cdataV(o)->ctypeid); + uint8_t *sp = cdataptr(cdataV(o)); + if (ctype_isinteger(s->info) && s->size == 8) { + w = serialize_more(w, sbuf, 1+8); + *w++ = (s->info & CTF_UNSIGNED) ? SER_TAG_UINT64 : SER_TAG_INT64; +#if LJ_BE + { uint64_t u = lj_bswap64(*(uint64_t *)sp); memcpy(w, &u, 8); } +#else + memcpy(w, sp, 8); +#endif + w += 8; + } else if (ctype_iscomplex(s->info) && s->size == 16) { + w = serialize_more(w, sbuf, 1+16); + *w++ = SER_TAG_COMPLEX; +#if LJ_BE + { /* Only swap the doubles. The re/im order stays the same. */ + uint64_t u = lj_bswap64(((uint64_t *)sp)[0]); memcpy(w, &u, 8); + u = lj_bswap64(((uint64_t *)sp)[1]); memcpy(w+8, &u, 8); + } +#else + memcpy(w, sp, 16); +#endif + w += 16; + } else { + goto badenc; /* NYI other cdata */ + } +#endif + } else if (tvislightud(o)) { + uintptr_t ud = (uintptr_t)lightudV(G(sbufL(sbuf->sb)), o); + w = serialize_more(w, sbuf, 1+sizeof(ud)); + if (ud == 0) { + *w++ = SER_TAG_NULL; + } else if (LJ_32 || checku32(ud)) { +#if LJ_BE && LJ_64 + ud = lj_bswap64(ud); +#elif LJ_BE + ud = lj_bswap(ud); +#endif + *w++ = SER_TAG_LIGHTUD32; memcpy(w, &ud, 4); w += 4; + } else { +#if LJ_BE + ud = lj_bswap64(ud); +#endif + *w++ = SER_TAG_LIGHTUD64; memcpy(w, &ud, 8); w += 8; + } + } else { + /* NYI userdata */ +#if LJ_HASFFI + badenc: +#endif + lj_err_callerv(sbufL(sbuf->sb), LJ_ERR_BUFFER_BADENC, lj_typename(o)); + } + return w; +} + +/* Get serialized object from buffer. */ +static char *serialize_get(char *r, StrBuf *sbuf, TValue *o) +{ + char *e = sbufE(sbuf->sb); + uint32_t tp; + r = serialize_ru124(r, e, &tp); if (LJ_UNLIKELY(!r)) goto eob; + if (LJ_LIKELY(tp >= SER_TAG_STR)) { + uint32_t len = tp - SER_TAG_STR; + if (LJ_UNLIKELY(len > (uint32_t)(e - r))) goto eob; + setstrV(sbufL(sbuf->sb), o, lj_str_new(sbufL(sbuf->sb), r, len)); + r += len; + } else if (tp == SER_TAG_INT) { + if (LJ_UNLIKELY(r + 4 > e)) goto eob; + setintV(o, (int32_t)(LJ_BE ? lj_bswap(lj_getu32(r)) : lj_getu32(r))); + r += 4; + } else if (tp == SER_TAG_NUM) { + if (LJ_UNLIKELY(r + 8 > e)) goto eob; + memcpy(o, r, 8); r += 8; +#if LJ_BE + o->u64 = lj_bswap64(o->u64); +#endif + if (!tvisnum(o)) setnanV(o); + } else if (tp <= SER_TAG_TRUE) { + setpriV(o, ~tp); + } else if (tp >= SER_TAG_TAB && tp < SER_TAG_TAB+6) { + uint32_t narray = 0, nhash = 0; + GCtab *t; + if (tp >= SER_TAG_TAB+2) { + r = serialize_ru124(r, e, &narray); if (LJ_UNLIKELY(!r)) goto eob; + } + if ((tp & 1)) { + r = serialize_ru124(r, e, &nhash); if (LJ_UNLIKELY(!r)) goto eob; + } + t = lj_tab_new(sbufL(sbuf->sb), narray, hsize2hbits(nhash)); + settabV(sbufL(sbuf->sb), o, t); + if (narray) { + TValue *oa = tvref(t->array) + (tp >= SER_TAG_TAB+4); + TValue *oe = tvref(t->array) + narray; + while (oa < oe) r = serialize_get(r, sbuf, oa++); + } + if (nhash) { + do { + TValue k, *v; + r = serialize_get(r, sbuf, &k); + v = lj_tab_set(sbufL(sbuf->sb), t, &k); + if (LJ_UNLIKELY(!tvisnil(v))) + lj_err_caller(sbufL(sbuf->sb), LJ_ERR_BUFFER_DUPKEY); + r = serialize_get(r, sbuf, v); + } while (--nhash); + } +#if LJ_HASFFI + } else if (tp >= SER_TAG_INT64 && tp <= SER_TAG_COMPLEX) { + uint32_t sz = tp == SER_TAG_COMPLEX ? 16 : 8; + GCcdata *cd; + if (LJ_UNLIKELY(r + sz > e)) goto eob; + cd = lj_cdata_new_(sbufL(sbuf->sb), + tp == SER_TAG_INT64 ? CTID_INT64 : + tp == SER_TAG_UINT64 ? CTID_UINT64 : CTID_COMPLEX_DOUBLE, + sz); + memcpy(cdataptr(cd), r, sz); r += sz; +#if LJ_BE + *(uint64_t *)cdataptr(cd) = lj_bswap64(*(uint64_t *)cdataptr(cd)); + if (sz == 16) + ((uint64_t *)cdataptr(cd))[1] = lj_bswap64(((uint64_t *)cdataptr(cd))[1]); +#endif + setcdataV(sbufL(sbuf->sb), o, cd); +#endif + } else if (tp <= (LJ_64 ? SER_TAG_LIGHTUD64 : SER_TAG_LIGHTUD32)) { + uintptr_t ud = 0; + if (tp == SER_TAG_LIGHTUD32) { + if (LJ_UNLIKELY(r + 4 > e)) goto eob; + ud = (uintptr_t)(LJ_BE ? lj_bswap(lj_getu32(r)) : lj_getu32(r)); + r += 4; + } +#if LJ_64 + else if (tp == SER_TAG_LIGHTUD64) { + if (LJ_UNLIKELY(r + 8 > e)) goto eob; + memcpy(&ud, r, 8); r += 8; +#if LJ_BE + ud = lj_bswap64(ud); +#endif + } + setrawlightudV(o, lj_lightud_intern(sbufL(sbuf->sb), (void *)ud)); +#else + setrawlightudV(o, (void *)ud); +#endif + } else { + lj_err_callerv(sbufL(sbuf->sb), LJ_ERR_BUFFER_BADDEC, tp); + } + return r; +eob: + lj_err_caller(sbufL(sbuf->sb), LJ_ERR_BUFFER_EOB); + return NULL; +} + +StrBuf * LJ_FASTCALL lj_serialize_put(StrBuf *sbuf, cTValue *o) +{ + sbuf->depth = LJ_SERIALIZE_DEPTH; + setsbufP(sbuf->sb, serialize_put(sbufP(sbuf->sb), sbuf, o)); + return sbuf; +} + +StrBuf * LJ_FASTCALL lj_serialize_get(StrBuf *sbuf, TValue *o) +{ + char *r = serialize_get(sbuf->r, sbuf, o); + if (r != sbufP(sbuf->sb)) + lj_err_caller(sbufL(sbuf->sb), LJ_ERR_BUFFER_LEFTOV); + sbuf->r = r; + return sbuf; +} + diff --git a/src/lj_serialize.h b/src/lj_serialize.h new file mode 100644 index 00000000..95d62f4e --- /dev/null +++ b/src/lj_serialize.h @@ -0,0 +1,21 @@ +/* +** Object de/serialization. +** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_SERIALIZE_H +#define _LJ_SERIALIZE_H + +#include "lj_obj.h" +#include "lj_buf.h" + +#if LJ_HASBUFFER + +#define LJ_SERIALIZE_DEPTH 100 /* Default depth. */ + +LJ_FUNC StrBuf * LJ_FASTCALL lj_serialize_put(StrBuf *sb, cTValue *o); +LJ_FUNC StrBuf * LJ_FASTCALL lj_serialize_get(StrBuf *sb, TValue *o); + +#endif + +#endif diff --git a/src/ljamalg.c b/src/ljamalg.c index 8e2d4937..384b3cc1 100644 --- a/src/ljamalg.c +++ b/src/ljamalg.c @@ -39,6 +39,7 @@ #include "lj_strscan.c" #include "lj_strfmt.c" #include "lj_strfmt_num.c" +#include "lj_serialize.c" #include "lj_api.c" #include "lj_profile.c" #include "lj_lex.c" @@ -85,5 +86,6 @@ #include "lib_bit.c" #include "lib_jit.c" #include "lib_ffi.c" +#include "lib_buffer.c" #include "lib_init.c" diff --git a/src/lualib.h b/src/lualib.h index 9cd39880..5c18e9ec 100644 --- a/src/lualib.h +++ b/src/lualib.h @@ -33,6 +33,7 @@ LUALIB_API int luaopen_debug(lua_State *L); LUALIB_API int luaopen_bit(lua_State *L); LUALIB_API int luaopen_jit(lua_State *L); LUALIB_API int luaopen_ffi(lua_State *L); +LUALIB_API int luaopen_string_buffer(lua_State *L); LUALIB_API void luaL_openlibs(lua_State *L);