From 60b5af44222fff87a7184365a7f49654032d4cbf Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 8 Feb 2010 05:26:52 +0100 Subject: [PATCH] Redesign of prototype generation, part 1: varinfo and uvname. Use a growable, per-chunk variable stack. Collect varinfo/uvname for prototype at the end. --- src/lj_api.c | 6 +- src/lj_errmsg.h | 1 + src/lj_gc.c | 6 +- src/lj_lex.c | 14 +++- src/lj_lex.h | 6 +- src/lj_parse.c | 172 ++++++++++++++++++++++++------------------------ 6 files changed, 109 insertions(+), 96 deletions(-) diff --git a/src/lj_api.c b/src/lj_api.c index 8216c395..48bd605d 100644 --- a/src/lj_api.c +++ b/src/lj_api.c @@ -1102,7 +1102,7 @@ static TValue *cpparser(lua_State *L, lua_CFunction dummy, void *ud) GCfunc *fn; UNUSED(dummy); cframe_errfunc(L->cframe) = -1; /* Inherit error function. */ - lj_lex_start(L, ls); + lj_lex_setup(L, ls); fn = lj_func_newL(L, lj_parse(ls), tabref(L->env)); /* Parser may realloc stack. Don't combine above/below into one statement. */ setfuncV(L, L->top++, fn); @@ -1114,14 +1114,12 @@ LUA_API int lua_load(lua_State *L, lua_Reader reader, void *data, { LexState ls; int status; - global_State *g; ls.rfunc = reader; ls.rdata = data; ls.chunkarg = chunkname ? chunkname : "?"; lj_str_initbuf(L, &ls.sb); status = lj_vm_cpcall(L, NULL, &ls, cpparser); - g = G(L); - lj_str_freebuf(g, &ls.sb); + lj_lex_cleanup(L, &ls); lj_gc_check(L); return status; } diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h index 826366a6..0a2d9dd7 100644 --- a/src/lj_errmsg.h +++ b/src/lj_errmsg.h @@ -118,6 +118,7 @@ ERRDEF(XBCLOAD, "cannot load Lua bytecode") ERRDEF(XTOKEN, LUA_QS " expected") ERRDEF(XJUMP, "control structure too long") ERRDEF(XSLOTS, "function or expression too complex") +ERRDEF(XLIMC, "chunk has more than %d local variables") ERRDEF(XLIMM, "main function has more than %d %s") ERRDEF(XLIMF, "function at line %d has more than %d %s") ERRDEF(XMATCH, LUA_QS " expected (to close " LUA_QS " at line %d)") diff --git a/src/lj_gc.c b/src/lj_gc.c index c1ade95a..5750d0ef 100644 --- a/src/lj_gc.c +++ b/src/lj_gc.c @@ -256,11 +256,9 @@ static void gc_traverse_proto(global_State *g, GCproto *pt) for (i = -(ptrdiff_t)pt->sizekgc; i < 0; i++) /* Mark collectable consts. */ gc_markobj(g, proto_kgc(pt, i)); for (i = 0; i < (ptrdiff_t)pt->sizeuvname; i++) /* Mark upvalue names. */ - if (proto_uvname(pt, i)) - gc_mark_str(gco2str(proto_uvname(pt, i))); + gc_mark_str(gco2str(proto_uvname(pt, i))); for (i = 0; i < (ptrdiff_t)pt->sizevarinfo; i++) /* Mark names of locals. */ - if (gcref(proto_varinfo(pt)[i].name)) - gc_mark_str(gco2str(gcref(proto_varinfo(pt)[i].name))); + gc_mark_str(gco2str(gcref(proto_varinfo(pt)[i].name))); } /* Traverse the frame structure of a stack. */ diff --git a/src/lj_lex.c b/src/lj_lex.c index d1df138e..28aad901 100644 --- a/src/lj_lex.c +++ b/src/lj_lex.c @@ -301,12 +301,16 @@ static int llex(LexState *ls, TValue *tv) /* -- Lexer API ----------------------------------------------------------- */ -void lj_lex_start(lua_State *L, LexState *ls) +/* Setup lexer state. */ +void lj_lex_setup(lua_State *L, LexState *ls) { ls->L = L; ls->fs = NULL; ls->n = 0; ls->p = NULL; + ls->vstack = NULL; + ls->sizevstack = 0; + ls->vtop = 0; ls->lookahead = TK_eof; /* No look-ahead token. */ ls->linenumber = 1; ls->lastline = 1; @@ -335,6 +339,14 @@ void lj_lex_start(lua_State *L, LexState *ls) ls->chunkname = lj_str_newz(L, ls->chunkarg); } +/* Cleanup lexer state. */ +void lj_lex_cleanup(lua_State *L, LexState *ls) +{ + global_State *g = G(L); + lj_mem_freevec(g, ls->vstack, ls->sizevstack, VarInfo); + lj_str_freebuf(g, &ls->sb); +} + void lj_lex_next(LexState *ls) { ls->lastline = ls->linenumber; diff --git a/src/lj_lex.h b/src/lj_lex.h index cc5d5a9f..ee183b40 100644 --- a/src/lj_lex.h +++ b/src/lj_lex.h @@ -50,10 +50,14 @@ typedef struct LexState { BCLine lastline; /* Line of last token. */ GCstr *chunkname; /* Current chunk name (interned string). */ const char *chunkarg; /* Chunk name argument. */ + VarInfo *vstack; /* Stack for names and extents of local variables. */ + MSize sizevstack; /* Size of variable stack. */ + MSize vtop; /* Top of variable stack. */ uint32_t level; /* Syntactical nesting level. */ } LexState; -LJ_FUNC void lj_lex_start(lua_State *L, LexState *ls); +LJ_FUNC void lj_lex_setup(lua_State *L, LexState *ls); +LJ_FUNC void lj_lex_cleanup(lua_State *L, LexState *ls); LJ_FUNC void lj_lex_next(LexState *ls); LJ_FUNC LexToken lj_lex_lookahead(LexState *ls); LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken token); diff --git a/src/lj_parse.c b/src/lj_parse.c index 96136280..d949698f 100644 --- a/src/lj_parse.c +++ b/src/lj_parse.c @@ -88,11 +88,15 @@ typedef struct FuncScope { uint8_t isbreakable; /* Scope is a loop and allows a break. */ } FuncScope; -/* Upvalue type and location. */ -typedef struct UVLoc { - uint8_t k; /* Upvalue type (ExpKind). */ - uint8_t info; /* Upvalue location (BCReg or uvidx). */ -} UVLoc; +/* Index into variable stack. */ +typedef uint16_t VarIndex; +#define LJ_MAX_VSTACK 65536 + +/* Upvalue map. */ +typedef struct UVMap { + VarIndex vidx; /* Varinfo index. */ + uint16_t slot; /* Slot or parent upvalue index. */ +} UVMap; /* Per-function state. */ typedef struct FuncState { @@ -108,13 +112,13 @@ typedef struct FuncState { BCReg freereg; /* First free register. */ BCReg nkn, nkgc; /* Number of lua_Number/GCobj constants */ BCLine linedefined; /* First line of the function definition. */ - MSize nvarinfo; /* Number of entries in varinfo. */ + MSize vbase; /* Base of variable stack for this function. */ uint8_t nactvar; /* Number of active local variables. */ uint8_t flags; /* Prototype flags. */ uint8_t framesize; /* Fixed frame size. */ uint8_t nuv; /* Number of upvalues */ - uint16_t varmap[LJ_MAX_LOCVAR]; /* Map from register to varinfo index. */ - UVLoc uvloc[LJ_MAX_UPVAL]; /* Upvalue type and location. */ + VarIndex varmap[LJ_MAX_LOCVAR]; /* Map from register to variable idx. */ + UVMap uvloc[LJ_MAX_UPVAL]; /* Map from upvalue to variable idx and slot. */ } FuncState; /* Binary and unary operators. ORDER OPR */ @@ -924,28 +928,27 @@ static GCstr *lex_str(LexState *ls) /* -- Variable handling --------------------------------------------------- */ -#define var_get(fs, i) (proto_varinfo((fs)->pt)[(fs)->varmap[(i)]]) +#define var_get(ls, fs, i) ((ls)->vstack[(fs)->varmap[(i)]]) /* Define a new local variable. */ static void var_new(LexState *ls, BCReg n, GCstr *name) { FuncState *fs = ls->fs; - GCproto *pt = fs->pt; - VarInfo *varinfo = proto_varinfo(pt); + MSize vtop = ls->vtop; checklimit(fs, fs->nactvar+n, LJ_MAX_LOCVAR, "local variables"); - if (LJ_UNLIKELY(fs->nvarinfo >= pt->sizevarinfo)) { - MSize oldsize = pt->sizevarinfo; - checklimit(fs, fs->nvarinfo, 32767, "local variables"); - lj_mem_growvec(fs->L, varinfo, pt->sizevarinfo, 32767, VarInfo); - setmref(pt->varinfo, varinfo); - while (oldsize < pt->sizevarinfo) setgcrefnull(varinfo[oldsize++].name); + if (LJ_UNLIKELY(vtop >= ls->sizevstack)) { + if (ls->sizevstack >= LJ_MAX_VSTACK) + lj_lex_error(ls, 0, LJ_ERR_XLIMC, LJ_MAX_VSTACK); + lj_mem_growvec(ls->L, ls->vstack, ls->sizevstack, LJ_MAX_VSTACK, VarInfo); } - setgcref(varinfo[fs->nvarinfo].name, obj2gco(name)); - lj_gc_objbarrier(ls->L, pt, name); - fs->varmap[fs->nactvar+n] = (uint16_t)(fs->nvarinfo++); + lua_assert(lj_tab_getstr(fs->kt, name) != NULL); + /* NOBARRIER: name is anchored in fs->kt and ls->vstack is not a GCobj. */ + setgcref(ls->vstack[vtop].name, obj2gco(name)); + fs->varmap[fs->nactvar+n] = (uint16_t)vtop; + ls->vtop = vtop+1; } -#define var_new_lit(ls, v, n) \ +#define var_new_lit(ls, n, v) \ var_new(ls, (n), lj_parse_keepstr(ls, "" v, sizeof(v)-1)) /* Add local variables. */ @@ -954,7 +957,7 @@ static void var_add(LexState *ls, BCReg nvars) FuncState *fs = ls->fs; fs->nactvar = cast_byte(fs->nactvar + nvars); for (; nvars; nvars--) - var_get(fs, fs->nactvar - nvars).startpc = fs->pc; + var_get(ls, fs, fs->nactvar - nvars).startpc = fs->pc; } /* Remove local variables. */ @@ -962,7 +965,7 @@ static void var_remove(LexState *ls, BCReg tolevel) { FuncState *fs = ls->fs; while (fs->nactvar > tolevel) - var_get(fs, --fs->nactvar).endpc = fs->pc; + var_get(ls, fs, --fs->nactvar).endpc = fs->pc; } /* Lookup local variable name. */ @@ -970,46 +973,33 @@ static BCReg var_lookup_local(FuncState *fs, GCstr *n) { int i; for (i = fs->nactvar-1; i >= 0; i--) { - if (n == gco2str(gcref(var_get(fs, i).name))) + if (n == gco2str(gcref(var_get(fs->ls, fs, i).name))) return (BCReg)i; } return (BCReg)-1; /* Not found. */ } -/* Lookup upvalue name. */ -static uint32_t var_lookup_uv(FuncState *fs, GCstr *name, ExpDesc *v) +/* Lookup or add upvalue index. */ +static MSize var_lookup_uv(FuncState *fs, MSize vidx, ExpDesc *e) { - uint32_t i; - GCproto *pt = fs->pt; - GCRef *uvname; - for (i = 0; i < fs->nuv; i++) { - if (fs->uvloc[i].info == v->u.s.info && fs->uvloc[i].k == v->k) { - lua_assert(gco2str(proto_uvname(pt, i)) == name); - return i; - } - } - /* Not found, create a new upvalue for this name. */ - uvname = mref(pt->uvname, GCRef); - if (LJ_UNLIKELY(fs->nuv >= pt->sizeuvname)) { - MSize oldsize = pt->sizeuvname; - checklimit(fs, fs->nuv, LJ_MAX_UPVAL, "upvalues"); - lj_mem_growvec(fs->L, uvname, pt->sizeuvname, LJ_MAX_UPVAL, GCRef); - setmref(pt->uvname, uvname); - while (oldsize < pt->sizeuvname) setgcrefnull(uvname[oldsize++]); - } - setgcref(uvname[fs->nuv], obj2gco(name)); - lj_gc_objbarrier(fs->L, pt, name); - lua_assert(v->k == VLOCAL || v->k == VUPVAL); - fs->uvloc[fs->nuv].k = cast_byte(v->k); - fs->uvloc[fs->nuv].info = cast_byte(v->u.s.info); - return fs->nuv++; + MSize i, n = fs->nuv; + for (i = 0; i < n; i++) + if (fs->uvloc[i].vidx == vidx) + return i; /* Already exists. */ + /* Otherwise create a new one. */ + checklimit(fs, fs->nuv, LJ_MAX_UPVAL, "upvalues"); + lua_assert(e->k == VLOCAL || e->k == VUPVAL); + fs->uvloc[n].vidx = (uint16_t)vidx; + fs->uvloc[n].slot = (uint16_t)(e->u.s.info | (e->k == VLOCAL ? 0x8000 : 0)); + fs->nuv = n+1; + return n; } /* Forward declaration. */ static void scope_uvmark(FuncState *fs, BCReg level); /* Recursively lookup variables in enclosing functions. */ -static int var_lookup_(FuncState *fs, GCstr *name, ExpDesc *e, int first) +static MSize var_lookup_(FuncState *fs, GCstr *name, ExpDesc *e, int first) { if (fs) { BCReg reg = var_lookup_local(fs, name); @@ -1017,17 +1007,20 @@ static int var_lookup_(FuncState *fs, GCstr *name, ExpDesc *e, int first) expr_init(e, VLOCAL, reg); if (!first) scope_uvmark(fs, reg); /* Scope now has an upvalue. */ - return 1; - } else if (var_lookup_(fs->prev, name, e, 0)) { /* In outer function? */ - e->u.s.info = var_lookup_uv(fs, name, e); /* Make it an upvalue here. */ - e->k = VUPVAL; - return 1; + return (MSize)fs->varmap[reg]; + } else { + MSize vidx = var_lookup_(fs->prev, name, e, 0); /* Var in outer func? */ + if ((int32_t)vidx >= 0) { /* Yes, make it an upvalue here. */ + e->u.s.info = (uint8_t)var_lookup_uv(fs, vidx, e); + e->k = VUPVAL; + return vidx; + } } } else { /* Not found in any function, must be a global. */ expr_init(e, VGLOBAL, 0); e->u.sval = name; } - return 0; /* Global. */ + return (MSize)-1; /* Global. */ } /* Lookup variable name. */ @@ -1080,15 +1073,12 @@ static void fs_fixup_k(FuncState *fs, GCproto *pt) /* Fixup upvalues for prototype. */ static void fs_fixup_uv(FuncState *fs, GCproto *pt) { - uint32_t i; - uint16_t *uv = lj_mem_newvec(fs->L, fs->nuv, uint16_t); + MSize i, n = fs->nuv; + uint16_t *uv = lj_mem_newvec(fs->L, n, uint16_t); setmref(pt->uv, uv); - pt->sizeuv = fs->nuv; - for (i = 0; i < pt->sizeuv; i++) { - uint32_t v = fs->uvloc[i].info; - if (fs->uvloc[i].k == VLOCAL) v |= 0x8000; - uv[i] = (uint16_t)v; - } + pt->sizeuv = n; + for (i = 0; i < n; i++) + uv[i] = fs->uvloc[i].slot; } /* Check if bytecode op returns. */ @@ -1143,9 +1133,7 @@ static GCproto *fs_finish(LexState *ls, BCLine line) FuncState *fs = ls->fs; GCproto *pt = fs->pt; BCIns *bc; - GCRef *uvname; BCLine *lineinfo; - VarInfo *varinfo; /* Apply final fixups. */ var_remove(ls, 0); @@ -1156,21 +1144,31 @@ static GCproto *fs_finish(LexState *ls, BCLine line) lj_mem_reallocvec(L, bc, pt->sizebc, fs->pc, BCIns); setmref(pt->bc, bc); pt->sizebc = fs->pc; + fs_fixup_k(fs, pt); fs_fixup_uv(fs, pt); + lineinfo = proto_lineinfo(pt); lj_mem_reallocvec(L, lineinfo, pt->sizelineinfo, fs->pc, BCLine); setmref(pt->lineinfo, lineinfo); pt->sizelineinfo = fs->pc; - varinfo = proto_varinfo(pt); - lj_mem_reallocvec(L, varinfo, pt->sizevarinfo, fs->nvarinfo, VarInfo); - setmref(pt->varinfo, varinfo); - pt->sizevarinfo = fs->nvarinfo; - uvname = mref(pt->uvname, GCRef); - lj_mem_reallocvec(L, uvname, pt->sizeuvname, fs->nuv, GCRef); - setmref(pt->uvname, uvname); - pt->sizeuvname = fs->nuv; - lua_assert(fs->bl == NULL); + + { + MSize n = ls->vtop - fs->vbase; + VarInfo *vi = lj_mem_newvec(L, n, VarInfo); + memcpy(vi, &ls->vstack[fs->vbase], n*sizeof(VarInfo)); + setmref(pt->varinfo, vi); + pt->sizevarinfo = n; + } + + { + MSize i, n = fs->nuv; + GCRef *uvname = lj_mem_newvec(L, n, GCRef); + for (i = 0; i < n; i++) + setgcref(uvname[i], gcref(ls->vstack[fs->uvloc[i].vidx].name)); + setmref(pt->uvname, uvname); + pt->sizeuvname = n; + } /* Initialize prototype fields. */ pt->flags = fs->flags; @@ -1182,7 +1180,9 @@ static GCproto *fs_finish(LexState *ls, BCLine line) setprotoV(L, L->top++, pt); ); - /* Remove FuncState from list. Pop const table and prototype. */ + /* Remove VarInfo and FuncState. Pop const table and prototype. */ + lua_assert(fs->bl == NULL); + ls->vtop = fs->vbase; ls->fs = fs->prev; L->top -= 2; lua_assert(ls->fs != NULL || ls->token == TK_eof); @@ -1202,6 +1202,7 @@ static void fs_init(LexState *ls, FuncState *fs, BCLine line) fs->pt = pt; fs->prev = ls->fs; ls->fs = fs; /* Append to list. */ fs->ls = ls; + fs->vbase = ls->vtop; fs->L = L; fs->pc = 0; fs->lasttarget = 0; @@ -1209,7 +1210,6 @@ static void fs_init(LexState *ls, FuncState *fs, BCLine line) fs->freereg = 0; fs->nkgc = 0; fs->nkn = 0; - fs->nvarinfo = 0; fs->nactvar = 0; fs->nuv = 0; fs->bl = NULL; @@ -1382,7 +1382,7 @@ static void parse_params(LexState *ls, int needself) BCReg nparams = 0; lex_check(ls, '('); if (needself) { - var_new_lit(ls, "self", 0); + var_new_lit(ls, 0, "self"); var_add(ls, 1); } if (ls->token != ')') { @@ -1921,7 +1921,7 @@ static void parse_local(LexState *ls) parse_body(ls, &b, 0, ls->linenumber); bcemit_store(fs, &v, &b); /* The upvalue is in scope, but the local is only valid after the store. */ - var_get(fs, fs->nactvar - 1).startpc = fs->pc; + var_get(ls, fs, fs->nactvar - 1).startpc = fs->pc; } else { /* Local variable declaration. */ ExpDesc e; BCReg nexps, nvars = 0; @@ -2045,9 +2045,9 @@ static void parse_for_num(LexState *ls, GCstr *varname, BCLine line) FuncState *fs = ls->fs; BCReg base = fs->freereg; /* Hidden control variables. */ - var_new_lit(ls, "(for index)", FORL_IDX); - var_new_lit(ls, "(for limit)", FORL_STOP); - var_new_lit(ls, "(for step)", FORL_STEP); + var_new_lit(ls, FORL_IDX, "(for index)"); + var_new_lit(ls, FORL_STOP, "(for limit)"); + var_new_lit(ls, FORL_STEP, "(for step)"); /* Visible copy of index variable. */ var_new(ls, FORL_EXT, varname); lex_check(ls, '='); @@ -2072,9 +2072,9 @@ static void parse_for_iter(LexState *ls, GCstr *indexname) BCLine line; BCReg base = fs->freereg; /* Hidden control variables. */ - var_new_lit(ls, "(for generator)", nvars++); - var_new_lit(ls, "(for state)", nvars++); - var_new_lit(ls, "(for control)", nvars++); + var_new_lit(ls, nvars++, "(for generator)"); + var_new_lit(ls, nvars++, "(for state)"); + var_new_lit(ls, nvars++, "(for control)"); /* Visible variables returned from iterator. */ var_new(ls, nvars++, indexname); while (lex_opt(ls, ','))