Add compressed bytecode support – only LZF so far.

This commit is contained in:
Demetrios Obenour 2015-08-23 19:33:31 -04:00
parent ca7888944e
commit 393613ea6e
9 changed files with 216 additions and 13 deletions

View File

@ -108,6 +108,10 @@ XCFLAGS=
#XCFLAGS+= -DLUAJIT_NUMMODE=1
#XCFLAGS+= -DLUAJIT_NUMMODE=2
#
# Disable LZF-compressed bytecode. LZF is a very small library and
# saves more space than it uses even with small files, so it is
# enabled by default.
#XCFLAGS+= -DLUAJIT_DISABLE_COMPRESS
##############################################################################
##############################################################################
@ -165,6 +169,9 @@ XCFLAGS=
ASOPTIONS= $(CCOPT) $(CCWARN) $(XCFLAGS) $(CFLAGS)
CCOPTIONS= $(CCDEBUG) $(ASOPTIONS)
LDOPTIONS= $(CCDEBUG) $(LDFLAGS)
ifeq (,$(findstring -DLUAJIT_DISABLE_COMPRESS ,$(XCFLAGS)))
LDOPTIONS+= -llzf
endif
HOST_CC= $(CC)
HOST_RM= rm -f

View File

@ -124,9 +124,10 @@ LJLIB_CF(string_dump)
{
GCfunc *fn = lj_lib_checkfunc(L, 1);
int strip = L->base+1 < L->top && tvistruecond(L->base+1);
int compress = L->base+2 < L->top && tvistruecond(L->base+2);
SBuf *sb = lj_buf_tmp_(L); /* Assumes lj_bcwrite() doesn't use tmpbuf. */
L->top = L->base+1;
if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, sb, strip))
if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, sb, strip, compress))
lj_err_caller(L, LJ_ERR_STRDUMP);
setstrV(L, L->top-1, lj_buf_str(L, sb));
lj_gc_check(L);

View File

@ -9,6 +9,21 @@
Memcheck:Addr1
fun:lj_str_cmp
}
{
Optimized string compare
Memcheck:Addr4
fun:str_fastcmp
}
{
Optimized string compare
Memcheck:Addr1
fun:str_fastcmp
}
{
Optimized string compare
Memcheck:Cond
fun:str_fastcmp
}
{
Optimized string compare
Memcheck:Addr4

View File

@ -43,8 +43,9 @@
#define BCDUMP_F_STRIP 0x02
#define BCDUMP_F_FFI 0x04
#define BCDUMP_F_FR2 0x08
#define BCDUMP_F_COMPRESS 0x10
#define BCDUMP_F_KNOWN (BCDUMP_F_FR2*2-1)
#define BCDUMP_F_KNOWN (BCDUMP_F_COMPRESS*2-1)
/* Type codes for the GC constants of a prototype. Plus length for strings. */
enum {
@ -52,6 +53,12 @@ enum {
BCDUMP_KGC_COMPLEX, BCDUMP_KGC_STR
};
/* Available compression algorithms. Currently only LZF is supported */
typedef enum {
BCDUMP_COMPRESS_MEMCPY, /* Not really compression */
BCDUMP_COMPRESS_LZF /* LZF compression */
} compression_algorithms;
/* Type codes for the keys/values of a constant table. */
enum {
BCDUMP_KTAB_NIL, BCDUMP_KTAB_FALSE, BCDUMP_KTAB_TRUE,
@ -61,7 +68,7 @@ enum {
/* -- Bytecode reader/writer ---------------------------------------------- */
LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer,
void *data, int strip);
void *data, int strip, compression_algorithms compress);
LJ_FUNC GCproto *lj_bcread_proto(LexState *ls);
LJ_FUNC GCproto *lj_bcread(LexState *ls);

View File

@ -6,6 +6,10 @@
#define lj_bcread_c
#define LUA_CORE
#ifndef LUAJIT_DISABLE_COMPRESS
#include <lzf.h>
#endif
#include "lauxlib.h"
#include "lj_obj.h"
#include "lj_gc.h"
#include "lj_err.h"
@ -47,7 +51,7 @@ static LJ_NOINLINE void bcread_error(LexState *ls, ErrMsg em)
/* Refill buffer. */
static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
{
lua_assert(len != 0);
lua_assert(len > 0);
if (len > LJ_MAX_BUF || ls->c < 0)
bcread_error(ls, LJ_ERR_BCBAD);
do {
@ -384,14 +388,36 @@ GCproto *lj_bcread_proto(LexState *ls)
return pt;
}
/* Read and check header of bytecode dump. */
#ifndef LUAJIT_DISABLE_COMPRESS
typedef MSize (*lj_decompressor)(char const *src, MSize srclen, char *dst, MSize dstlen);
#if 0
static char const *null_reader(lua_State *_ignored1, void *_ignored2, size_t *size)
{
(void)_ignored1;
(void)_ignored2;
return (char const *)(*size = 0);
}
#endif
static LJ_AINLINE unsigned int lj_lzf_decompress(char const *src, MSize srclen, char *dst, MSize dstlen)
{
lua_assert(srclen <= UINT_MAX);
lua_assert(dstlen <= UINT_MAX);
unsigned int val = lzf_decompress(src, srclen, dst, dstlen);
return val;
}
#endif /* !defined LUAJIT_DISABLE_COMPRESS */
/*
** Read and check header of bytecode dump.
** Decompress if necessary.
*/
static int bcread_header(LexState *ls)
{
uint32_t flags;
bcread_want(ls, 3+5+5);
if (bcread_byte(ls) != BCDUMP_HEAD2 ||
bcread_byte(ls) != BCDUMP_HEAD3 ||
bcread_byte(ls) != BCDUMP_VERSION) return 0;
bcread_byte(ls) != BCDUMP_VERSION) return 0; /* Wrong magic number */
bcread_flags(ls) = flags = bcread_uleb128(ls);
if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0;
if ((flags & BCDUMP_F_FR2) != LJ_FR2*BCDUMP_F_FR2) return 0;
@ -407,6 +433,7 @@ static int bcread_header(LexState *ls)
return 0;
#endif
}
if ((flags & BCDUMP_F_STRIP)) {
ls->chunkname = lj_str_newz(ls->L, ls->chunkarg);
} else {
@ -414,6 +441,66 @@ static int bcread_header(LexState *ls)
bcread_need(ls, len);
ls->chunkname = lj_str_new(ls->L, (const char *)bcread_mem(ls, len), len);
}
if (flags & BCDUMP_F_COMPRESS) {
#ifndef LUAJIT_DISABLE_COMPRESS
/*
** The bytecode is compressed. We must decompress the file
** before doing anything.
**
** This is tricky. We need to do this transparently to C code using the
** Lua/C API. The solution is to interpose a decompression function
** between the C callback and the bytecode reader.
*/
lj_decompressor decompressor;
MSize uncompressed_size, compressed_size;
uint8_t byte;
bcread_want(ls, 1+5+5);
byte = bcread_byte(ls);
switch (byte) {
case BCDUMP_COMPRESS_MEMCPY: return 1; /* No compression */
case BCDUMP_COMPRESS_LZF: decompressor = lj_lzf_decompress; break;
default: /* Bad compression algorithm */ bcread_error(ls, LJ_ERR_BCBAD);
}
/* Read to end of chunk */
while (-1 != ls->c) { bcread_want(ls, 1ULL<<16); }
uncompressed_size = bcread_uleb128(ls);
compressed_size = ls->pe - ls->p;
/*
** We never compress data if the compressed data is longer than
** the uncompressed data, so a file with too little uncompressed
** data is invalid.
*/
if (compressed_size > uncompressed_size) return 0;
/*
** Store the compressed data in a temporary buffer in the lexer.
** This ensures that the decompressor does not need to decompress
** in-place and the temporary buffer can be freed even in the event
** of errors.
*/
ls->tempbuf = lj_mem_new(ls->L, compressed_size);
memcpy(ls->tempbuf, ls->p, compressed_size);
ls->sizetempbuf = compressed_size;
/*
** Reallocate the memory buffer used by the reader
** and do the actual decompression.
*/
ls->p = lj_buf_need(&ls->sb, uncompressed_size);;
ls->pe = ls->p + uncompressed_size;
if (decompressor(ls->tempbuf, compressed_size, sbufB(&ls->sb),
uncompressed_size) != uncompressed_size) {
/* Either the compressed data is invalid or its size is wrong */
bcread_error(ls, LJ_ERR_BCBAD);
}
#else /* !defined LUAJIT_DISABLE_LZF */
return 0;
#endif /* !defined LUAJIT_DISABLE_LZF */
}
return 1; /* Ok. */
}
@ -427,6 +514,7 @@ GCproto *lj_bcread(LexState *ls)
/* Check for a valid bytecode dump header. */
if (!bcread_header(ls))
bcread_error(ls, LJ_ERR_BCFMT);
for (;;) { /* Process all prototypes in the bytecode dump. */
GCproto *pt;
MSize len;

View File

@ -6,6 +6,10 @@
#define lj_bcwrite_c
#define LUA_CORE
#ifndef LUAJIT_NO_COMPRESS
#include <lzf.h>
#endif
#include "lj_obj.h"
#include "lj_gc.h"
#include "lj_buf.h"
@ -29,6 +33,9 @@ typedef struct BCWriteCtx {
void *wdata; /* Writer callback data. */
int strip; /* Strip debug info. */
int status; /* Status from writer callback. */
#ifndef LUAJIT_DISABLE_COMPRESS
compression_algorithms should_compress; /* Should I compress? */
#endif
} BCWriteCtx;
/* -- Bytecode writer ----------------------------------------------------- */
@ -311,7 +318,11 @@ static void bcwrite_header(BCWriteCtx *ctx)
*p++ = (ctx->strip ? BCDUMP_F_STRIP : 0) +
LJ_BE*BCDUMP_F_BE +
((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0) +
LJ_FR2*BCDUMP_F_FR2;
LJ_FR2*BCDUMP_F_FR2
#ifndef LUAJIT_DISABLE_COMPRESS
+ (ctx->should_compress ? BCDUMP_F_COMPRESS : 0)
#endif
;
if (!ctx->strip) {
p = lj_strfmt_wuleb128(p, len);
p = lj_buf_wmem(p, name, len);
@ -329,6 +340,25 @@ static void bcwrite_footer(BCWriteCtx *ctx)
}
}
#ifndef LUAJIT_DISABLE_COMPRESS
typedef struct extraWriterData {
BCWriteCtx * context;
SBuf buffer;
lua_Writer writer;
void *ud;
} extraWriterData;
static int lj_push_data(lua_State *L, const void *p, size_t sz, void *ud)
{
extraWriterData *datum = (extraWriterData*)ud;
(void)L;
lua_assert(datum->context->wdata == datum);
lua_assert(datum->context->wfunc == lj_push_data);
lj_buf_putmem(&datum->buffer, p, sz);
return 0;
}
#endif /* !defined LUAJIT_DISABLE_COMPRESS */
/* Protected callback for bytecode writer. */
static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud)
{
@ -336,14 +366,63 @@ static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud)
UNUSED(L); UNUSED(dummy);
lj_buf_need(&ctx->sb, 1024); /* Avoids resize for most prototypes. */
bcwrite_header(ctx);
#ifndef LUAJIT_DISABLE_COMPRESS
if (ctx->should_compress) {
extraWriterData data;
lj_buf_init(L, &data.buffer);
data.context = ctx;
data.ud = ctx->wdata;
data.writer = ctx->wfunc;
ctx->wdata = &data;
ctx->wfunc = lj_push_data;
bcwrite_proto(ctx, ctx->pt);
bcwrite_footer(ctx);
{
const MSize buflen = sbuflen(&data.buffer);
char *compressed_buf = lj_mem_new(L, buflen);
size_t compressed_size = 0;
switch (ctx->should_compress) {
case BCDUMP_COMPRESS_MEMCPY: lua_assert(0 &&
"Tried to use null compression algorithm"); abort();
case BCDUMP_COMPRESS_LZF:
compressed_size = lzf_compress(sbufB(&data.buffer), buflen,
compressed_buf, buflen);
break;
default:
/* Invalid compression algorithm */
ctx->status = 1;
return NULL;
}
if (compressed_size) {
char sizes[6];
char *ptr = lj_strfmt_wuleb128(sizes + 1, buflen);
sizes[0] = ctx->should_compress;
ctx->status = data.writer(L, sizes, ptr - sizes, data.ud);
if (!ctx->status) {
ctx->status = data.writer(L, compressed_buf, compressed_size, data.ud);
}
} else {
int zero = BCDUMP_COMPRESS_MEMCPY;
ctx->status = data.writer(L, &zero, 1, data.ud);
if (!ctx->status) data.writer(L, sbufB(&data.buffer), buflen, data.ud);
}
lj_buf_free(G(L), &data.buffer);
lj_mem_free(G(L), compressed_buf, buflen);
ctx->wfunc = data.writer;
ctx->wdata = data.ud;
}
} else
#endif
{
bcwrite_proto(ctx, ctx->pt);
bcwrite_footer(ctx);
}
return NULL;
}
/* Write bytecode for a prototype. */
int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data,
int strip)
int strip, compression_algorithms compress)
{
BCWriteCtx ctx;
int status;
@ -352,6 +431,9 @@ int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data,
ctx.wdata = data;
ctx.strip = strip;
ctx.status = 0;
#ifndef LUAJIT_DISABLE_COMPRESS
ctx.should_compress = compress;
#endif
lj_buf_init(L, &ctx.sb);
status = lj_vm_cpcall(L, NULL, &ctx, cpwriter);
if (status == 0) status = ctx.status;

View File

@ -376,6 +376,8 @@ int lj_lex_setup(lua_State *L, LexState *ls)
ls->bcstack = NULL;
ls->sizebcstack = 0;
ls->tok = 0;
ls->tempbuf = NULL;
ls->sizetempbuf = 0;
ls->lookahead = TK_eof; /* No look-ahead token. */
ls->linenumber = 1;
ls->lastline = 1;
@ -416,6 +418,7 @@ void lj_lex_cleanup(lua_State *L, LexState *ls)
global_State *g = G(L);
lj_mem_freevec(g, ls->bcstack, ls->sizebcstack, BCInsLine);
lj_mem_freevec(g, ls->vstack, ls->sizevstack, VarInfo);
if (ls->tempbuf) lj_mem_free(g, ls->tempbuf, ls->sizetempbuf);
lj_buf_free(g, &ls->sb);
}
@ -479,4 +482,3 @@ void lj_lex_init(lua_State *L)
s->reserved = (uint8_t)(i+1);
}
}

View File

@ -72,6 +72,8 @@ typedef struct LexState {
MSize vtop; /* Top of variable stack. */
BCInsLine *bcstack; /* Stack for bytecode instructions/line numbers. */
MSize sizebcstack; /* Size of bytecode stack. */
char *tempbuf; /* Temporary buffer for decompression */
MSize sizetempbuf; /* Size of temporary buffer */
uint32_t level; /* Syntactical nesting level. */
} LexState;
@ -82,5 +84,4 @@ LJ_FUNC LexToken lj_lex_lookahead(LexState *ls);
LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken tok);
LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...);
LJ_FUNC void lj_lex_init(lua_State *L);
#endif

View File

@ -161,7 +161,7 @@ LUA_API int lua_dump(lua_State *L, lua_Writer writer, void *data)
cTValue *o = L->top-1;
api_check(L, L->top > L->base);
if (tvisfunc(o) && isluafunc(funcV(o)))
return lj_bcwrite(L, funcproto(funcV(o)), writer, data, 0);
return lj_bcwrite(L, funcproto(funcV(o)), writer, data, 0, BCDUMP_COMPRESS_LZF);
else
return 1;
}