Cleanup lexer source code.

This commit is contained in:
Mike Pall 2013-02-28 01:11:49 +01:00
parent 116cdd7e9a
commit 87c51e7f57
5 changed files with 218 additions and 204 deletions

View File

@ -47,7 +47,7 @@ static LJ_NOINLINE void bcread_error(LexState *ls, ErrMsg em)
static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need) static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
{ {
lua_assert(len != 0); lua_assert(len != 0);
if (len > LJ_MAX_MEM || ls->current < 0) if (len > LJ_MAX_MEM || ls->c < 0)
bcread_error(ls, LJ_ERR_BCBAD); bcread_error(ls, LJ_ERR_BCBAD);
do { do {
const char *buf; const char *buf;
@ -66,7 +66,7 @@ static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
buf = ls->rfunc(ls->L, ls->rdata, &size); /* Get more data from reader. */ buf = ls->rfunc(ls->L, ls->rdata, &size); /* Get more data from reader. */
if (buf == NULL || size == 0) { /* EOF? */ if (buf == NULL || size == 0) { /* EOF? */
if (need) bcread_error(ls, LJ_ERR_BCBAD); if (need) bcread_error(ls, LJ_ERR_BCBAD);
ls->current = -1; /* Only bad if we get called again. */ ls->c = -1; /* Only bad if we get called again. */
break; break;
} }
if (sbuflen(&ls->sb)) { /* Append to buffer. */ if (sbuflen(&ls->sb)) { /* Append to buffer. */
@ -430,7 +430,7 @@ static int bcread_header(LexState *ls)
GCproto *lj_bcread(LexState *ls) GCproto *lj_bcread(LexState *ls)
{ {
lua_State *L = ls->L; lua_State *L = ls->L;
lua_assert(ls->current == BCDUMP_HEAD1); lua_assert(ls->c == BCDUMP_HEAD1);
bcread_savetop(L, ls, L->top); bcread_savetop(L, ls, L->top);
lj_buf_reset(&ls->sb); lj_buf_reset(&ls->sb);
/* Check for a valid bytecode dump header. */ /* Check for a valid bytecode dump header. */

View File

@ -38,37 +38,48 @@ TKDEF(TKSTR1, TKSTR2)
/* -- Buffer handling ----------------------------------------------------- */ /* -- Buffer handling ----------------------------------------------------- */
#define char2int(c) ((int)(uint8_t)(c)) #define LEX_EOF (-1)
#define next(ls) \ #define lex_iseol(ls) (ls->c == '\n' || ls->c == '\r')
(ls->current = (ls->n--) > 0 ? char2int(*ls->p++) : fillbuf(ls))
#define save_and_next(ls) (save(ls, ls->current), next(ls))
#define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r')
#define END_OF_STREAM (-1)
static int fillbuf(LexState *ls) /* Get more input from reader. */
static LJ_NOINLINE LexChar lex_more(LexState *ls)
{ {
size_t sz; size_t sz;
const char *buf = ls->rfunc(ls->L, ls->rdata, &sz); const char *buf = ls->rfunc(ls->L, ls->rdata, &sz);
if (buf == NULL || sz == 0) return END_OF_STREAM; if (buf == NULL || sz == 0) return LEX_EOF;
ls->n = (MSize)sz - 1; ls->n = (MSize)sz - 1;
ls->p = buf; ls->p = buf;
return char2int(*(ls->p++)); return (LexChar)(uint8_t)*ls->p++;
} }
static LJ_AINLINE void save(LexState *ls, int c) /* Get next character. */
static LJ_AINLINE LexChar lex_next(LexState *ls)
{
return (ls->c = ls->n ? (ls->n--,(LexChar)(uint8_t)*ls->p++) : lex_more(ls));
}
/* Save character. */
static LJ_AINLINE void lex_save(LexState *ls, LexChar c)
{ {
lj_buf_putb(ls->L, &ls->sb, c); lj_buf_putb(ls->L, &ls->sb, c);
} }
static void inclinenumber(LexState *ls) /* Save previous character and get next character. */
static LJ_AINLINE LexChar lex_savenext(LexState *ls)
{ {
int old = ls->current; lex_save(ls, ls->c);
lua_assert(currIsNewline(ls)); return lex_next(ls);
next(ls); /* skip `\n' or `\r' */ }
if (currIsNewline(ls) && ls->current != old)
next(ls); /* skip `\n\r' or `\r\n' */ /* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */
static void lex_newline(LexState *ls)
{
LexChar old = ls->c;
lua_assert(lex_iseol(ls));
lex_next(ls); /* Skip "\n" or "\r". */
if (lex_iseol(ls) && ls->c != old) lex_next(ls); /* Skip "\n\r" or "\r\n". */
if (++ls->linenumber >= LJ_MAX_LINE) if (++ls->linenumber >= LJ_MAX_LINE)
lj_lex_error(ls, ls->token, LJ_ERR_XLINES); lj_lex_error(ls, ls->tok, LJ_ERR_XLINES);
} }
/* -- Scanner for terminals ----------------------------------------------- */ /* -- Scanner for terminals ----------------------------------------------- */
@ -77,18 +88,16 @@ static void inclinenumber(LexState *ls)
static void lex_number(LexState *ls, TValue *tv) static void lex_number(LexState *ls, TValue *tv)
{ {
StrScanFmt fmt; StrScanFmt fmt;
int c, xp = 'e'; LexChar c, xp = 'e';
lua_assert(lj_char_isdigit(ls->current)); lua_assert(lj_char_isdigit(ls->c));
if ((c = ls->current) == '0') { if ((c = ls->c) == '0' && (lex_savenext(ls) | 0x20) == 'x')
save_and_next(ls); xp = 'p';
if ((ls->current | 0x20) == 'x') xp = 'p'; while (lj_char_isident(ls->c) || ls->c == '.' ||
((ls->c == '-' || ls->c == '+') && (c | 0x20) == xp)) {
c = ls->c;
lex_savenext(ls);
} }
while (lj_char_isident(ls->current) || ls->current == '.' || lex_save(ls, '\0');
((ls->current == '-' || ls->current == '+') && (c | 0x20) == xp)) {
c = ls->current;
save_and_next(ls);
}
save(ls, '\0');
fmt = lj_strscan_scan((const uint8_t *)sbufB(&ls->sb), tv, fmt = lj_strscan_scan((const uint8_t *)sbufB(&ls->sb), tv,
(LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) | (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) |
(LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0)); (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0));
@ -122,44 +131,42 @@ static void lex_number(LexState *ls, TValue *tv)
} }
} }
static int skip_sep(LexState *ls) /* Skip equal signs for "[=...=[" and "]=...=]" and return their count. */
static int lex_skipeq(LexState *ls)
{ {
int count = 0; int count = 0;
int s = ls->current; LexChar s = ls->c;
lua_assert(s == '[' || s == ']'); lua_assert(s == '[' || s == ']');
save_and_next(ls); while (lex_savenext(ls) == '=')
while (ls->current == '=') {
save_and_next(ls);
count++; count++;
} return (ls->c == s) ? count : (-count) - 1;
return (ls->current == s) ? count : (-count) - 1;
} }
static void read_long_string(LexState *ls, TValue *tv, int sep) /* Parse a long string or long comment (tv set to NULL). */
static void lex_longstring(LexState *ls, TValue *tv, int sep)
{ {
save_and_next(ls); /* skip 2nd `[' */ lex_savenext(ls); /* Skip second '['. */
if (currIsNewline(ls)) /* string starts with a newline? */ if (lex_iseol(ls)) /* Skip initial newline. */
inclinenumber(ls); /* skip it */ lex_newline(ls);
for (;;) { for (;;) {
switch (ls->current) { switch (ls->c) {
case END_OF_STREAM: case LEX_EOF:
lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM); lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM);
break; break;
case ']': case ']':
if (skip_sep(ls) == sep) { if (lex_skipeq(ls) == sep) {
save_and_next(ls); /* skip 2nd `]' */ lex_savenext(ls); /* Skip second ']'. */
goto endloop; goto endloop;
} }
break; break;
case '\n': case '\n':
case '\r': case '\r':
save(ls, '\n'); lex_save(ls, '\n');
inclinenumber(ls); lex_newline(ls);
if (!tv) lj_buf_reset(&ls->sb); /* Don't waste space for comments. */ if (!tv) lj_buf_reset(&ls->sb); /* Don't waste space for comments. */
break; break;
default: default:
if (tv) save_and_next(ls); lex_savenext(ls);
else next(ls);
break; break;
} }
} endloop: } endloop:
@ -170,12 +177,14 @@ static void read_long_string(LexState *ls, TValue *tv, int sep)
} }
} }
static void read_string(LexState *ls, int delim, TValue *tv) /* Parse a string. */
static void lex_string(LexState *ls, TValue *tv)
{ {
save_and_next(ls); LexChar delim = ls->c; /* Delimiter is '\'' or '"'. */
while (ls->current != delim) { lex_savenext(ls);
switch (ls->current) { while (ls->c != delim) {
case END_OF_STREAM: switch (ls->c) {
case LEX_EOF:
lj_lex_error(ls, TK_eof, LJ_ERR_XSTR); lj_lex_error(ls, TK_eof, LJ_ERR_XSTR);
continue; continue;
case '\n': case '\n':
@ -183,7 +192,7 @@ static void read_string(LexState *ls, int delim, TValue *tv)
lj_lex_error(ls, TK_string, LJ_ERR_XSTR); lj_lex_error(ls, TK_string, LJ_ERR_XSTR);
continue; continue;
case '\\': { case '\\': {
int c = next(ls); /* Skip the '\\'. */ LexChar c = lex_next(ls); /* Skip the '\\'. */
switch (c) { switch (c) {
case 'a': c = '\a'; break; case 'a': c = '\a'; break;
case 'b': c = '\b'; break; case 'b': c = '\b'; break;
@ -193,112 +202,112 @@ static void read_string(LexState *ls, int delim, TValue *tv)
case 't': c = '\t'; break; case 't': c = '\t'; break;
case 'v': c = '\v'; break; case 'v': c = '\v'; break;
case 'x': /* Hexadecimal escape '\xXX'. */ case 'x': /* Hexadecimal escape '\xXX'. */
c = (next(ls) & 15u) << 4; c = (lex_next(ls) & 15u) << 4;
if (!lj_char_isdigit(ls->current)) { if (!lj_char_isdigit(ls->c)) {
if (!lj_char_isxdigit(ls->current)) goto err_xesc; if (!lj_char_isxdigit(ls->c)) goto err_xesc;
c += 9 << 4; c += 9 << 4;
} }
c += (next(ls) & 15u); c += (lex_next(ls) & 15u);
if (!lj_char_isdigit(ls->current)) { if (!lj_char_isdigit(ls->c)) {
if (!lj_char_isxdigit(ls->current)) goto err_xesc; if (!lj_char_isxdigit(ls->c)) goto err_xesc;
c += 9; c += 9;
} }
break; break;
case 'z': /* Skip whitespace. */ case 'z': /* Skip whitespace. */
next(ls); lex_next(ls);
while (lj_char_isspace(ls->current)) while (lj_char_isspace(ls->c))
if (currIsNewline(ls)) inclinenumber(ls); else next(ls); if (lex_iseol(ls)) lex_newline(ls); else lex_next(ls);
continue; continue;
case '\n': case '\r': save(ls, '\n'); inclinenumber(ls); continue; case '\n': case '\r': lex_save(ls, '\n'); lex_newline(ls); continue;
case '\\': case '\"': case '\'': break; case '\\': case '\"': case '\'': break;
case END_OF_STREAM: continue; case LEX_EOF: continue;
default: default:
if (!lj_char_isdigit(c)) if (!lj_char_isdigit(c))
goto err_xesc; goto err_xesc;
c -= '0'; /* Decimal escape '\ddd'. */ c -= '0'; /* Decimal escape '\ddd'. */
if (lj_char_isdigit(next(ls))) { if (lj_char_isdigit(lex_next(ls))) {
c = c*10 + (ls->current - '0'); c = c*10 + (ls->c - '0');
if (lj_char_isdigit(next(ls))) { if (lj_char_isdigit(lex_next(ls))) {
c = c*10 + (ls->current - '0'); c = c*10 + (ls->c - '0');
if (c > 255) { if (c > 255) {
err_xesc: err_xesc:
lj_lex_error(ls, TK_string, LJ_ERR_XESC); lj_lex_error(ls, TK_string, LJ_ERR_XESC);
} }
next(ls); lex_next(ls);
} }
} }
save(ls, c); lex_save(ls, c);
continue; continue;
} }
save(ls, c); lex_save(ls, c);
next(ls); lex_next(ls);
continue; continue;
} }
default: default:
save_and_next(ls); lex_savenext(ls);
break; break;
} }
} }
save_and_next(ls); /* skip delimiter */ lex_savenext(ls); /* Skip trailing delimiter. */
setstrV(ls->L, tv, setstrV(ls->L, tv,
lj_parse_keepstr(ls, sbufB(&ls->sb)+1, sbuflen(&ls->sb)-2)); lj_parse_keepstr(ls, sbufB(&ls->sb)+1, sbuflen(&ls->sb)-2));
} }
/* -- Main lexical scanner ------------------------------------------------ */ /* -- Main lexical scanner ------------------------------------------------ */
static int llex(LexState *ls, TValue *tv) /* Get next lexical token. */
static LexToken lex_scan(LexState *ls, TValue *tv)
{ {
lj_buf_reset(&ls->sb); lj_buf_reset(&ls->sb);
for (;;) { for (;;) {
if (lj_char_isident(ls->current)) { if (lj_char_isident(ls->c)) {
GCstr *s; GCstr *s;
if (lj_char_isdigit(ls->current)) { /* Numeric literal. */ if (lj_char_isdigit(ls->c)) { /* Numeric literal. */
lex_number(ls, tv); lex_number(ls, tv);
return TK_number; return TK_number;
} }
/* Identifier or reserved word. */ /* Identifier or reserved word. */
do { do {
save_and_next(ls); lex_savenext(ls);
} while (lj_char_isident(ls->current)); } while (lj_char_isident(ls->c));
s = lj_parse_keepstr(ls, sbufB(&ls->sb), sbuflen(&ls->sb)); s = lj_parse_keepstr(ls, sbufB(&ls->sb), sbuflen(&ls->sb));
setstrV(ls->L, tv, s); setstrV(ls->L, tv, s);
if (s->reserved > 0) /* Reserved word? */ if (s->reserved > 0) /* Reserved word? */
return TK_OFS + s->reserved; return TK_OFS + s->reserved;
return TK_name; return TK_name;
} }
switch (ls->current) { switch (ls->c) {
case '\n': case '\n':
case '\r': case '\r':
inclinenumber(ls); lex_newline(ls);
continue; continue;
case ' ': case ' ':
case '\t': case '\t':
case '\v': case '\v':
case '\f': case '\f':
next(ls); lex_next(ls);
continue; continue;
case '-': case '-':
next(ls); lex_next(ls);
if (ls->current != '-') return '-'; if (ls->c != '-') return '-';
/* else is a comment */ lex_next(ls);
next(ls); if (ls->c == '[') { /* Long comment "--[=*[...]=*]". */
if (ls->current == '[') { int sep = lex_skipeq(ls);
int sep = skip_sep(ls); lj_buf_reset(&ls->sb); /* `lex_skipeq' may dirty the buffer */
lj_buf_reset(&ls->sb); /* `skip_sep' may dirty the buffer */
if (sep >= 0) { if (sep >= 0) {
read_long_string(ls, NULL, sep); /* long comment */ lex_longstring(ls, NULL, sep);
lj_buf_reset(&ls->sb); lj_buf_reset(&ls->sb);
continue; continue;
} }
} }
/* else short comment */ /* Short comment "--.*\n". */
while (!currIsNewline(ls) && ls->current != END_OF_STREAM) while (!lex_iseol(ls) && ls->c != LEX_EOF)
next(ls); lex_next(ls);
continue; continue;
case '[': { case '[': {
int sep = skip_sep(ls); int sep = lex_skipeq(ls);
if (sep >= 0) { if (sep >= 0) {
read_long_string(ls, tv, sep); lex_longstring(ls, tv, sep);
return TK_string; return TK_string;
} else if (sep == -1) { } else if (sep == -1) {
return '['; return '[';
@ -308,44 +317,43 @@ static int llex(LexState *ls, TValue *tv)
} }
} }
case '=': case '=':
next(ls); lex_next(ls);
if (ls->current != '=') return '='; else { next(ls); return TK_eq; } if (ls->c != '=') return '='; else { lex_next(ls); return TK_eq; }
case '<': case '<':
next(ls); lex_next(ls);
if (ls->current != '=') return '<'; else { next(ls); return TK_le; } if (ls->c != '=') return '<'; else { lex_next(ls); return TK_le; }
case '>': case '>':
next(ls); lex_next(ls);
if (ls->current != '=') return '>'; else { next(ls); return TK_ge; } if (ls->c != '=') return '>'; else { lex_next(ls); return TK_ge; }
case '~': case '~':
next(ls); lex_next(ls);
if (ls->current != '=') return '~'; else { next(ls); return TK_ne; } if (ls->c != '=') return '~'; else { lex_next(ls); return TK_ne; }
case ':': case ':':
next(ls); lex_next(ls);
if (ls->current != ':') return ':'; else { next(ls); return TK_label; } if (ls->c != ':') return ':'; else { lex_next(ls); return TK_label; }
case '"': case '"':
case '\'': case '\'':
read_string(ls, ls->current, tv); lex_string(ls, tv);
return TK_string; return TK_string;
case '.': case '.':
save_and_next(ls); if (lex_savenext(ls) == '.') {
if (ls->current == '.') { lex_next(ls);
next(ls); if (ls->c == '.') {
if (ls->current == '.') { lex_next(ls);
next(ls);
return TK_dots; /* ... */ return TK_dots; /* ... */
} }
return TK_concat; /* .. */ return TK_concat; /* .. */
} else if (!lj_char_isdigit(ls->current)) { } else if (!lj_char_isdigit(ls->c)) {
return '.'; return '.';
} else { } else {
lex_number(ls, tv); lex_number(ls, tv);
return TK_number; return TK_number;
} }
case END_OF_STREAM: case LEX_EOF:
return TK_eof; return TK_eof;
default: { default: {
int c = ls->current; LexChar c = ls->c;
next(ls); lex_next(ls);
return c; /* Single-char tokens (+ - / ...). */ return c; /* Single-char tokens (+ - / ...). */
} }
} }
@ -370,23 +378,23 @@ int lj_lex_setup(lua_State *L, LexState *ls)
ls->lookahead = TK_eof; /* No look-ahead token. */ ls->lookahead = TK_eof; /* No look-ahead token. */
ls->linenumber = 1; ls->linenumber = 1;
ls->lastline = 1; ls->lastline = 1;
next(ls); /* Read-ahead first char. */ lex_next(ls); /* Read-ahead first char. */
if (ls->current == 0xef && ls->n >= 2 && char2int(ls->p[0]) == 0xbb && if (ls->c == 0xef && ls->n >= 2 && (uint8_t)ls->p[0] == 0xbb &&
char2int(ls->p[1]) == 0xbf) { /* Skip UTF-8 BOM (if buffered). */ (uint8_t)ls->p[1] == 0xbf) { /* Skip UTF-8 BOM (if buffered). */
ls->n -= 2; ls->n -= 2;
ls->p += 2; ls->p += 2;
next(ls); lex_next(ls);
header = 1; header = 1;
} }
if (ls->current == '#') { /* Skip POSIX #! header line. */ if (ls->c == '#') { /* Skip POSIX #! header line. */
do { do {
next(ls); lex_next(ls);
if (ls->current == END_OF_STREAM) return 0; if (ls->c == LEX_EOF) return 0;
} while (!currIsNewline(ls)); } while (!lex_iseol(ls));
inclinenumber(ls); lex_newline(ls);
header = 1; header = 1;
} }
if (ls->current == LUA_SIGNATURE[0]) { /* Bytecode dump. */ if (ls->c == LUA_SIGNATURE[0]) { /* Bytecode dump. */
if (header) { if (header) {
/* /*
** Loading bytecode with an extra header is disabled for security ** Loading bytecode with an extra header is disabled for security
@ -411,52 +419,57 @@ void lj_lex_cleanup(lua_State *L, LexState *ls)
lj_buf_free(g, &ls->sb); lj_buf_free(g, &ls->sb);
} }
/* Return next lexical token. */
void lj_lex_next(LexState *ls) void lj_lex_next(LexState *ls)
{ {
ls->lastline = ls->linenumber; ls->lastline = ls->linenumber;
if (LJ_LIKELY(ls->lookahead == TK_eof)) { /* No lookahead token? */ if (LJ_LIKELY(ls->lookahead == TK_eof)) { /* No lookahead token? */
ls->token = llex(ls, &ls->tokenval); /* Get next token. */ ls->tok = lex_scan(ls, &ls->tokval); /* Get next token. */
} else { /* Otherwise return lookahead token. */ } else { /* Otherwise return lookahead token. */
ls->token = ls->lookahead; ls->tok = ls->lookahead;
ls->lookahead = TK_eof; ls->lookahead = TK_eof;
ls->tokenval = ls->lookaheadval; ls->tokval = ls->lookaheadval;
} }
} }
/* Look ahead for the next token. */
LexToken lj_lex_lookahead(LexState *ls) LexToken lj_lex_lookahead(LexState *ls)
{ {
lua_assert(ls->lookahead == TK_eof); lua_assert(ls->lookahead == TK_eof);
ls->lookahead = llex(ls, &ls->lookaheadval); ls->lookahead = lex_scan(ls, &ls->lookaheadval);
return ls->lookahead; return ls->lookahead;
} }
const char *lj_lex_token2str(LexState *ls, LexToken token) /* Convert token to string. */
const char *lj_lex_token2str(LexState *ls, LexToken tok)
{ {
if (token > TK_OFS) if (tok > TK_OFS)
return tokennames[token-TK_OFS-1]; return tokennames[tok-TK_OFS-1];
else if (!lj_char_iscntrl(token)) else if (!lj_char_iscntrl(tok))
return lj_str_pushf(ls->L, "%c", token); return lj_str_pushf(ls->L, "%c", tok);
else else
return lj_str_pushf(ls->L, "char(%d)", token); return lj_str_pushf(ls->L, "char(%d)", tok);
} }
void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...) /* Lexer error. */
void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...)
{ {
const char *tok; const char *tokstr;
va_list argp; va_list argp;
if (token == 0) { if (tok == 0) {
tok = NULL; tokstr = NULL;
} else if (token == TK_name || token == TK_string || token == TK_number) { } else if (tok == TK_name || tok == TK_string || tok == TK_number) {
save(ls, '\0'); lex_save(ls, '\0');
tok = sbufB(&ls->sb); tokstr = sbufB(&ls->sb);
} else { } else {
tok = lj_lex_token2str(ls, token); tokstr = lj_lex_token2str(ls, tok);
} }
va_start(argp, em); va_start(argp, em);
lj_err_lex(ls->L, ls->chunkname, tok, ls->linenumber, em, argp); lj_err_lex(ls->L, ls->chunkname, tokstr, ls->linenumber, em, argp);
va_end(argp); va_end(argp);
} }
/* Initialize strings for reserved words. */
void lj_lex_init(lua_State *L) void lj_lex_init(lua_State *L)
{ {
uint32_t i; uint32_t i;

View File

@ -30,7 +30,8 @@ TKDEF(TKENUM1, TKENUM2)
TK_RESERVED = TK_while - TK_OFS TK_RESERVED = TK_while - TK_OFS
}; };
typedef int LexToken; typedef int LexChar; /* Lexical character. Unsigned ext. from char. */
typedef int LexToken; /* Lexical token. */
/* Combined bytecode ins/line. Only used during bytecode generation. */ /* Combined bytecode ins/line. Only used during bytecode generation. */
typedef struct BCInsLine { typedef struct BCInsLine {
@ -51,10 +52,10 @@ typedef struct VarInfo {
typedef struct LexState { typedef struct LexState {
struct FuncState *fs; /* Current FuncState. Defined in lj_parse.c. */ struct FuncState *fs; /* Current FuncState. Defined in lj_parse.c. */
struct lua_State *L; /* Lua state. */ struct lua_State *L; /* Lua state. */
TValue tokenval; /* Current token value. */ TValue tokval; /* Current token value. */
TValue lookaheadval; /* Lookahead token value. */ TValue lookaheadval; /* Lookahead token value. */
int current; /* Current character (charint). */ LexChar c; /* Current character. */
LexToken token; /* Current token. */ LexToken tok; /* Current token. */
LexToken lookahead; /* Lookahead token. */ LexToken lookahead; /* Lookahead token. */
MSize n; /* Bytes left in input buffer. */ MSize n; /* Bytes left in input buffer. */
const char *p; /* Current position in input buffer. */ const char *p; /* Current position in input buffer. */
@ -78,8 +79,8 @@ LJ_FUNC int lj_lex_setup(lua_State *L, LexState *ls);
LJ_FUNC void lj_lex_cleanup(lua_State *L, LexState *ls); LJ_FUNC void lj_lex_cleanup(lua_State *L, LexState *ls);
LJ_FUNC void lj_lex_next(LexState *ls); LJ_FUNC void lj_lex_next(LexState *ls);
LJ_FUNC LexToken lj_lex_lookahead(LexState *ls); LJ_FUNC LexToken lj_lex_lookahead(LexState *ls);
LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken token); LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken tok);
LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...); LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...);
LJ_FUNC void lj_lex_init(lua_State *L); LJ_FUNC void lj_lex_init(lua_State *L);
#endif #endif

View File

@ -56,7 +56,7 @@ static const uint8_t *lib_read_lfunc(lua_State *L, const uint8_t *p, GCtab *tab)
ls.L = L; ls.L = L;
ls.p = (const char *)(p+len); ls.p = (const char *)(p+len);
ls.n = ~(MSize)0; ls.n = ~(MSize)0;
ls.current = -1; ls.c = -1;
ls.level = (BCDUMP_F_STRIP|(LJ_BE*BCDUMP_F_BE)); ls.level = (BCDUMP_F_STRIP|(LJ_BE*BCDUMP_F_BE));
ls.chunkname = name; ls.chunkname = name;
pt = lj_bcread_proto(&ls); pt = lj_bcread_proto(&ls);

View File

@ -166,12 +166,12 @@ LJ_STATIC_ASSERT((int)BC_MODVV-(int)BC_ADDVV == (int)OPR_MOD-(int)OPR_ADD);
LJ_NORET LJ_NOINLINE static void err_syntax(LexState *ls, ErrMsg em) LJ_NORET LJ_NOINLINE static void err_syntax(LexState *ls, ErrMsg em)
{ {
lj_lex_error(ls, ls->token, em); lj_lex_error(ls, ls->tok, em);
} }
LJ_NORET LJ_NOINLINE static void err_token(LexState *ls, LexToken token) LJ_NORET LJ_NOINLINE static void err_token(LexState *ls, LexToken tok)
{ {
lj_lex_error(ls, ls->token, LJ_ERR_XTOKEN, lj_lex_token2str(ls, token)); lj_lex_error(ls, ls->tok, LJ_ERR_XTOKEN, lj_lex_token2str(ls, tok));
} }
LJ_NORET static void err_limit(FuncState *fs, uint32_t limit, const char *what) LJ_NORET static void err_limit(FuncState *fs, uint32_t limit, const char *what)
@ -982,7 +982,7 @@ static void bcemit_unop(FuncState *fs, BCOp op, ExpDesc *e)
/* Check and consume optional token. */ /* Check and consume optional token. */
static int lex_opt(LexState *ls, LexToken tok) static int lex_opt(LexState *ls, LexToken tok)
{ {
if (ls->token == tok) { if (ls->tok == tok) {
lj_lex_next(ls); lj_lex_next(ls);
return 1; return 1;
} }
@ -992,7 +992,7 @@ static int lex_opt(LexState *ls, LexToken tok)
/* Check and consume token. */ /* Check and consume token. */
static void lex_check(LexState *ls, LexToken tok) static void lex_check(LexState *ls, LexToken tok)
{ {
if (ls->token != tok) if (ls->tok != tok)
err_token(ls, tok); err_token(ls, tok);
lj_lex_next(ls); lj_lex_next(ls);
} }
@ -1006,7 +1006,7 @@ static void lex_match(LexState *ls, LexToken what, LexToken who, BCLine line)
} else { } else {
const char *swhat = lj_lex_token2str(ls, what); const char *swhat = lj_lex_token2str(ls, what);
const char *swho = lj_lex_token2str(ls, who); const char *swho = lj_lex_token2str(ls, who);
lj_lex_error(ls, ls->token, LJ_ERR_XMATCH, swhat, swho, line); lj_lex_error(ls, ls->tok, LJ_ERR_XMATCH, swhat, swho, line);
} }
} }
} }
@ -1015,9 +1015,9 @@ static void lex_match(LexState *ls, LexToken what, LexToken who, BCLine line)
static GCstr *lex_str(LexState *ls) static GCstr *lex_str(LexState *ls)
{ {
GCstr *s; GCstr *s;
if (ls->token != TK_name && (LJ_52 || ls->token != TK_goto)) if (ls->tok != TK_name && (LJ_52 || ls->tok != TK_goto))
err_token(ls, TK_name); err_token(ls, TK_name);
s = strV(&ls->tokenval); s = strV(&ls->tokval);
lj_lex_next(ls); lj_lex_next(ls);
return s; return s;
} }
@ -1584,7 +1584,7 @@ static GCproto *fs_finish(LexState *ls, BCLine line)
L->top--; /* Pop table of constants. */ L->top--; /* Pop table of constants. */
ls->vtop = fs->vbase; /* Reset variable stack. */ ls->vtop = fs->vbase; /* Reset variable stack. */
ls->fs = fs->prev; ls->fs = fs->prev;
lua_assert(ls->fs != NULL || ls->token == TK_eof); lua_assert(ls->fs != NULL || ls->tok == TK_eof);
return pt; return pt;
} }
@ -1706,15 +1706,15 @@ static void expr_table(LexState *ls, ExpDesc *e)
bcreg_reserve(fs, 1); bcreg_reserve(fs, 1);
freg++; freg++;
lex_check(ls, '{'); lex_check(ls, '{');
while (ls->token != '}') { while (ls->tok != '}') {
ExpDesc key, val; ExpDesc key, val;
vcall = 0; vcall = 0;
if (ls->token == '[') { if (ls->tok == '[') {
expr_bracket(ls, &key); /* Already calls expr_toval. */ expr_bracket(ls, &key); /* Already calls expr_toval. */
if (!expr_isk(&key)) expr_index(fs, e, &key); if (!expr_isk(&key)) expr_index(fs, e, &key);
if (expr_isnumk(&key) && expr_numiszero(&key)) needarr = 1; else nhash++; if (expr_isnumk(&key) && expr_numiszero(&key)) needarr = 1; else nhash++;
lex_check(ls, '='); lex_check(ls, '=');
} else if ((ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) && } else if ((ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) &&
lj_lex_lookahead(ls) == '=') { lj_lex_lookahead(ls) == '=') {
expr_str(ls, &key); expr_str(ls, &key);
lex_check(ls, '='); lex_check(ls, '=');
@ -1807,11 +1807,11 @@ static BCReg parse_params(LexState *ls, int needself)
lex_check(ls, '('); lex_check(ls, '(');
if (needself) if (needself)
var_new_lit(ls, nparams++, "self"); var_new_lit(ls, nparams++, "self");
if (ls->token != ')') { if (ls->tok != ')') {
do { do {
if (ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) { if (ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) {
var_new(ls, nparams++, lex_str(ls)); var_new(ls, nparams++, lex_str(ls));
} else if (ls->token == TK_dots) { } else if (ls->tok == TK_dots) {
lj_lex_next(ls); lj_lex_next(ls);
fs->flags |= PROTO_VARARG; fs->flags |= PROTO_VARARG;
break; break;
@ -1845,7 +1845,7 @@ static void parse_body(LexState *ls, ExpDesc *e, int needself, BCLine line)
fs.bclim = pfs->bclim - pfs->pc; fs.bclim = pfs->bclim - pfs->pc;
bcemit_AD(&fs, BC_FUNCF, 0, 0); /* Placeholder. */ bcemit_AD(&fs, BC_FUNCF, 0, 0); /* Placeholder. */
parse_chunk(ls); parse_chunk(ls);
if (ls->token != TK_end) lex_match(ls, TK_end, TK_function, line); if (ls->tok != TK_end) lex_match(ls, TK_end, TK_function, line);
pt = fs_finish(ls, (ls->lastline = ls->linenumber)); pt = fs_finish(ls, (ls->lastline = ls->linenumber));
pfs->bcbase = ls->bcstack + oldbase; /* May have been reallocated. */ pfs->bcbase = ls->bcstack + oldbase; /* May have been reallocated. */
pfs->bclim = (BCPos)(ls->sizebcstack - oldbase); pfs->bclim = (BCPos)(ls->sizebcstack - oldbase);
@ -1884,13 +1884,13 @@ static void parse_args(LexState *ls, ExpDesc *e)
BCIns ins; BCIns ins;
BCReg base; BCReg base;
BCLine line = ls->linenumber; BCLine line = ls->linenumber;
if (ls->token == '(') { if (ls->tok == '(') {
#if !LJ_52 #if !LJ_52
if (line != ls->lastline) if (line != ls->lastline)
err_syntax(ls, LJ_ERR_XAMBIG); err_syntax(ls, LJ_ERR_XAMBIG);
#endif #endif
lj_lex_next(ls); lj_lex_next(ls);
if (ls->token == ')') { /* f(). */ if (ls->tok == ')') { /* f(). */
args.k = VVOID; args.k = VVOID;
} else { } else {
expr_list(ls, &args); expr_list(ls, &args);
@ -1898,11 +1898,11 @@ static void parse_args(LexState *ls, ExpDesc *e)
setbc_b(bcptr(fs, &args), 0); /* Pass on multiple results. */ setbc_b(bcptr(fs, &args), 0); /* Pass on multiple results. */
} }
lex_match(ls, ')', '(', line); lex_match(ls, ')', '(', line);
} else if (ls->token == '{') { } else if (ls->tok == '{') {
expr_table(ls, &args); expr_table(ls, &args);
} else if (ls->token == TK_string) { } else if (ls->tok == TK_string) {
expr_init(&args, VKSTR, 0); expr_init(&args, VKSTR, 0);
args.u.sval = strV(&ls->tokenval); args.u.sval = strV(&ls->tokval);
lj_lex_next(ls); lj_lex_next(ls);
} else { } else {
err_syntax(ls, LJ_ERR_XFUNARG); err_syntax(ls, LJ_ERR_XFUNARG);
@ -1928,32 +1928,32 @@ static void expr_primary(LexState *ls, ExpDesc *v)
{ {
FuncState *fs = ls->fs; FuncState *fs = ls->fs;
/* Parse prefix expression. */ /* Parse prefix expression. */
if (ls->token == '(') { if (ls->tok == '(') {
BCLine line = ls->linenumber; BCLine line = ls->linenumber;
lj_lex_next(ls); lj_lex_next(ls);
expr(ls, v); expr(ls, v);
lex_match(ls, ')', '(', line); lex_match(ls, ')', '(', line);
expr_discharge(ls->fs, v); expr_discharge(ls->fs, v);
} else if (ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) { } else if (ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) {
var_lookup(ls, v); var_lookup(ls, v);
} else { } else {
err_syntax(ls, LJ_ERR_XSYMBOL); err_syntax(ls, LJ_ERR_XSYMBOL);
} }
for (;;) { /* Parse multiple expression suffixes. */ for (;;) { /* Parse multiple expression suffixes. */
if (ls->token == '.') { if (ls->tok == '.') {
expr_field(ls, v); expr_field(ls, v);
} else if (ls->token == '[') { } else if (ls->tok == '[') {
ExpDesc key; ExpDesc key;
expr_toanyreg(fs, v); expr_toanyreg(fs, v);
expr_bracket(ls, &key); expr_bracket(ls, &key);
expr_index(fs, v, &key); expr_index(fs, v, &key);
} else if (ls->token == ':') { } else if (ls->tok == ':') {
ExpDesc key; ExpDesc key;
lj_lex_next(ls); lj_lex_next(ls);
expr_str(ls, &key); expr_str(ls, &key);
bcemit_method(fs, v, &key); bcemit_method(fs, v, &key);
parse_args(ls, v); parse_args(ls, v);
} else if (ls->token == '(' || ls->token == TK_string || ls->token == '{') { } else if (ls->tok == '(' || ls->tok == TK_string || ls->tok == '{') {
expr_tonextreg(fs, v); expr_tonextreg(fs, v);
parse_args(ls, v); parse_args(ls, v);
} else { } else {
@ -1965,14 +1965,14 @@ static void expr_primary(LexState *ls, ExpDesc *v)
/* Parse simple expression. */ /* Parse simple expression. */
static void expr_simple(LexState *ls, ExpDesc *v) static void expr_simple(LexState *ls, ExpDesc *v)
{ {
switch (ls->token) { switch (ls->tok) {
case TK_number: case TK_number:
expr_init(v, (LJ_HASFFI && tviscdata(&ls->tokenval)) ? VKCDATA : VKNUM, 0); expr_init(v, (LJ_HASFFI && tviscdata(&ls->tokval)) ? VKCDATA : VKNUM, 0);
copyTV(ls->L, &v->u.nval, &ls->tokenval); copyTV(ls->L, &v->u.nval, &ls->tokval);
break; break;
case TK_string: case TK_string:
expr_init(v, VKSTR, 0); expr_init(v, VKSTR, 0);
v->u.sval = strV(&ls->tokenval); v->u.sval = strV(&ls->tokval);
break; break;
case TK_nil: case TK_nil:
expr_init(v, VKNIL, 0); expr_init(v, VKNIL, 0);
@ -2060,11 +2060,11 @@ static BinOpr expr_binop(LexState *ls, ExpDesc *v, uint32_t limit);
static void expr_unop(LexState *ls, ExpDesc *v) static void expr_unop(LexState *ls, ExpDesc *v)
{ {
BCOp op; BCOp op;
if (ls->token == TK_not) { if (ls->tok == TK_not) {
op = BC_NOT; op = BC_NOT;
} else if (ls->token == '-') { } else if (ls->tok == '-') {
op = BC_UNM; op = BC_UNM;
} else if (ls->token == '#') { } else if (ls->tok == '#') {
op = BC_LEN; op = BC_LEN;
} else { } else {
expr_simple(ls, v); expr_simple(ls, v);
@ -2081,7 +2081,7 @@ static BinOpr expr_binop(LexState *ls, ExpDesc *v, uint32_t limit)
BinOpr op; BinOpr op;
synlevel_begin(ls); synlevel_begin(ls);
expr_unop(ls, v); expr_unop(ls, v);
op = token2binop(ls->token); op = token2binop(ls->tok);
while (op != OPR_NOBINOPR && priority[op].left > limit) { while (op != OPR_NOBINOPR && priority[op].left > limit) {
ExpDesc v2; ExpDesc v2;
BinOpr nextop; BinOpr nextop;
@ -2270,9 +2270,9 @@ static void parse_func(LexState *ls, BCLine line)
lj_lex_next(ls); /* Skip 'function'. */ lj_lex_next(ls); /* Skip 'function'. */
/* Parse function name. */ /* Parse function name. */
var_lookup(ls, &v); var_lookup(ls, &v);
while (ls->token == '.') /* Multiple dot-separated fields. */ while (ls->tok == '.') /* Multiple dot-separated fields. */
expr_field(ls, &v); expr_field(ls, &v);
if (ls->token == ':') { /* Optional colon to signify method call. */ if (ls->tok == ':') { /* Optional colon to signify method call. */
needself = 1; needself = 1;
expr_field(ls, &v); expr_field(ls, &v);
} }
@ -2285,9 +2285,9 @@ static void parse_func(LexState *ls, BCLine line)
/* -- Control transfer statements ----------------------------------------- */ /* -- Control transfer statements ----------------------------------------- */
/* Check for end of block. */ /* Check for end of block. */
static int endofblock(LexToken token) static int parse_isend(LexToken tok)
{ {
switch (token) { switch (tok) {
case TK_else: case TK_elseif: case TK_end: case TK_until: case TK_eof: case TK_else: case TK_elseif: case TK_end: case TK_until: case TK_eof:
return 1; return 1;
default: default:
@ -2302,7 +2302,7 @@ static void parse_return(LexState *ls)
FuncState *fs = ls->fs; FuncState *fs = ls->fs;
lj_lex_next(ls); /* Skip 'return'. */ lj_lex_next(ls); /* Skip 'return'. */
fs->flags |= PROTO_HAS_RETURN; fs->flags |= PROTO_HAS_RETURN;
if (endofblock(ls->token) || ls->token == ';') { /* Bare return. */ if (parse_isend(ls->tok) || ls->tok == ';') { /* Bare return. */
ins = BCINS_AD(BC_RET0, 0, 1); ins = BCINS_AD(BC_RET0, 0, 1);
} else { /* Return with one or more values. */ } else { /* Return with one or more values. */
ExpDesc e; /* Receives the _last_ expression in the list. */ ExpDesc e; /* Receives the _last_ expression in the list. */
@ -2368,18 +2368,18 @@ static void parse_label(LexState *ls)
lex_check(ls, TK_label); lex_check(ls, TK_label);
/* Recursively parse trailing statements: labels and ';' (Lua 5.2 only). */ /* Recursively parse trailing statements: labels and ';' (Lua 5.2 only). */
for (;;) { for (;;) {
if (ls->token == TK_label) { if (ls->tok == TK_label) {
synlevel_begin(ls); synlevel_begin(ls);
parse_label(ls); parse_label(ls);
synlevel_end(ls); synlevel_end(ls);
} else if (LJ_52 && ls->token == ';') { } else if (LJ_52 && ls->tok == ';') {
lj_lex_next(ls); lj_lex_next(ls);
} else { } else {
break; break;
} }
} }
/* Trailing label is considered to be outside of scope. */ /* Trailing label is considered to be outside of scope. */
if (endofblock(ls->token) && ls->token != TK_until) if (parse_isend(ls->tok) && ls->tok != TK_until)
ls->vstack[idx].slot = fs->bl->nactvar; ls->vstack[idx].slot = fs->bl->nactvar;
gola_resolve(ls, fs->bl, idx); gola_resolve(ls, fs->bl, idx);
} }
@ -2563,9 +2563,9 @@ static void parse_for(LexState *ls, BCLine line)
fscope_begin(fs, &bl, FSCOPE_LOOP); fscope_begin(fs, &bl, FSCOPE_LOOP);
lj_lex_next(ls); /* Skip 'for'. */ lj_lex_next(ls); /* Skip 'for'. */
varname = lex_str(ls); /* Get first variable name. */ varname = lex_str(ls); /* Get first variable name. */
if (ls->token == '=') if (ls->tok == '=')
parse_for_num(ls, varname, line); parse_for_num(ls, varname, line);
else if (ls->token == ',' || ls->token == TK_in) else if (ls->tok == ',' || ls->tok == TK_in)
parse_for_iter(ls, varname); parse_for_iter(ls, varname);
else else
err_syntax(ls, LJ_ERR_XFOR); err_syntax(ls, LJ_ERR_XFOR);
@ -2591,12 +2591,12 @@ static void parse_if(LexState *ls, BCLine line)
BCPos flist; BCPos flist;
BCPos escapelist = NO_JMP; BCPos escapelist = NO_JMP;
flist = parse_then(ls); flist = parse_then(ls);
while (ls->token == TK_elseif) { /* Parse multiple 'elseif' blocks. */ while (ls->tok == TK_elseif) { /* Parse multiple 'elseif' blocks. */
jmp_append(fs, &escapelist, bcemit_jmp(fs)); jmp_append(fs, &escapelist, bcemit_jmp(fs));
jmp_tohere(fs, flist); jmp_tohere(fs, flist);
flist = parse_then(ls); flist = parse_then(ls);
} }
if (ls->token == TK_else) { /* Parse optional 'else' block. */ if (ls->tok == TK_else) { /* Parse optional 'else' block. */
jmp_append(fs, &escapelist, bcemit_jmp(fs)); jmp_append(fs, &escapelist, bcemit_jmp(fs));
jmp_tohere(fs, flist); jmp_tohere(fs, flist);
lj_lex_next(ls); /* Skip 'else'. */ lj_lex_next(ls); /* Skip 'else'. */
@ -2614,7 +2614,7 @@ static void parse_if(LexState *ls, BCLine line)
static int parse_stmt(LexState *ls) static int parse_stmt(LexState *ls)
{ {
BCLine line = ls->linenumber; BCLine line = ls->linenumber;
switch (ls->token) { switch (ls->tok) {
case TK_if: case TK_if:
parse_if(ls, line); parse_if(ls, line);
break; break;
@ -2672,7 +2672,7 @@ static void parse_chunk(LexState *ls)
{ {
int islast = 0; int islast = 0;
synlevel_begin(ls); synlevel_begin(ls);
while (!islast && !endofblock(ls->token)) { while (!islast && !parse_isend(ls->tok)) {
islast = parse_stmt(ls); islast = parse_stmt(ls);
lex_opt(ls, ';'); lex_opt(ls, ';');
lua_assert(ls->fs->framesize >= ls->fs->freereg && lua_assert(ls->fs->framesize >= ls->fs->freereg &&
@ -2707,7 +2707,7 @@ GCproto *lj_parse(LexState *ls)
bcemit_AD(&fs, BC_FUNCV, 0, 0); /* Placeholder. */ bcemit_AD(&fs, BC_FUNCV, 0, 0); /* Placeholder. */
lj_lex_next(ls); /* Read-ahead first token. */ lj_lex_next(ls); /* Read-ahead first token. */
parse_chunk(ls); parse_chunk(ls);
if (ls->token != TK_eof) if (ls->tok != TK_eof)
err_token(ls, TK_eof); err_token(ls, TK_eof);
pt = fs_finish(ls, ls->linenumber); pt = fs_finish(ls, ls->linenumber);
L->top--; /* Drop chunkname. */ L->top--; /* Drop chunkname. */