Refactor string.format().

This commit is contained in:
Mike Pall 2013-05-12 22:37:02 +02:00
parent bb2cc1dcaf
commit 5bb1f0edac
8 changed files with 465 additions and 195 deletions

View File

@ -439,7 +439,8 @@ LJLIB_C= $(LJLIB_O:.o=.c)
LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \ LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \ lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \
lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \ lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \
lj_api.o lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \ lj_strfmt.o lj_api.o \
lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \
lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \
lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \ lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \
lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \ lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \

View File

@ -35,7 +35,7 @@ lib_package.o: lib_package.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
lj_tab.h lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h \ lj_tab.h lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h \
lj_char.h lj_lib.h lj_libdef.h lj_char.h lj_strfmt.h lj_lib.h lj_libdef.h
lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
lj_tab.h lj_lib.h lj_libdef.h lj_tab.h lj_lib.h lj_libdef.h
@ -179,6 +179,8 @@ lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h
lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_state.h lj_char.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_state.h lj_char.h
lj_strfmt.o: lj_strfmt.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_buf.h lj_gc.h lj_str.h lj_char.h lj_strfmt.h
lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_char.h lj_strscan.h lj_char.h lj_strscan.h
lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
@ -203,19 +205,19 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c lj_tab.c lj_func.c \ lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c lj_tab.c lj_func.c \
lj_udata.c lj_meta.c lj_strscan.h lj_lib.h lj_debug.c lj_state.c \ lj_udata.c lj_meta.c lj_strscan.h lj_lib.h lj_debug.c lj_state.c \
lj_lex.h lj_alloc.h lj_dispatch.c lj_ccallback.h luajit.h lj_vmevent.c \ lj_lex.h lj_alloc.h lj_dispatch.c lj_ccallback.h luajit.h lj_vmevent.c \
lj_vmevent.h lj_vmmath.c lj_strscan.c lj_api.c lj_lex.c lualib.h \ lj_vmevent.h lj_vmmath.c lj_strscan.c lj_strfmt.c lj_strfmt.h lj_api.c \
lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c \ lj_lex.c lualib.h lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h \
lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h \ lj_bcwrite.c lj_load.c lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c \
lj_ccallback.c lj_target.h lj_target_*.h lj_mcode.h lj_carith.c \ lj_ccall.c lj_ccall.h lj_ccallback.c lj_target.h lj_target_*.h \
lj_carith.h lj_clib.c lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c \ lj_mcode.h lj_carith.c lj_carith.h lj_clib.c lj_clib.h lj_cparse.c \
lj_ircall.h lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h \ lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h lj_iropt.h lj_opt_mem.c \
lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c \ lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c \
lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h \ lj_snap.h lj_opt_split.c lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c \
lj_crecord.c lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h \ lj_record.h lj_ffrecord.h lj_crecord.c lj_crecord.h lj_ffrecord.c \
lj_emit_*.h lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c \ lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h lj_asm_*.h lj_trace.c \
lib_aux.c lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c \ lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c lib_base.c lj_libdef.h \
lib_io.c lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c \ lib_math.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c \
lib_ffi.c lib_init.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c lib_init.c
luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h
host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \ host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \
lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \ lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \

View File

@ -6,8 +6,6 @@
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
*/ */
#include <stdio.h>
#define lib_string_c #define lib_string_c
#define LUA_LIB #define LUA_LIB
@ -26,6 +24,7 @@
#include "lj_ff.h" #include "lj_ff.h"
#include "lj_bcdump.h" #include "lj_bcdump.h"
#include "lj_char.h" #include "lj_char.h"
#include "lj_strfmt.h"
#include "lj_lib.h" #include "lj_lib.h"
/* ------------------------------------------------------------------------ */ /* ------------------------------------------------------------------------ */
@ -641,130 +640,20 @@ LJLIB_CF(string_gsub)
/* ------------------------------------------------------------------------ */ /* ------------------------------------------------------------------------ */
/* Max. buffer size needed (at least #string.format("%99.99f", -1e308)). */
#define STRING_FMT_MAXBUF 512
/* Valid format specifier flags. */
#define STRING_FMT_FLAGS "-+ #0"
/* Max. format specifier size. */
#define STRING_FMT_MAXSPEC \
(sizeof(STRING_FMT_FLAGS) + sizeof(LUA_INTFRMLEN) + 10)
/* Add quoted string to buffer. */
static void string_fmt_quoted(SBuf *sb, GCstr *str)
{
const char *s = strdata(str);
MSize len = str->len;
lj_buf_putb(sb, '"');
while (len--) {
uint32_t c = (uint32_t)(uint8_t)*s++;
char *p = lj_buf_more(sb, 4);
if (c == '"' || c == '\\' || c == '\n') {
*p++ = '\\';
} else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */
uint32_t d;
*p++ = '\\';
if (c >= 100 || lj_char_isdigit((uint8_t)*s)) {
*p++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100;
goto tens;
} else if (c >= 10) {
tens:
d = (c * 205) >> 11; c -= d * 10; *p++ = (char)('0'+d);
}
c += '0';
}
*p++ = (char)c;
setsbufP(sb, p);
}
lj_buf_putb(sb, '"');
}
/* Scan format and generate format specifier. */
static const char *string_fmt_scan(lua_State *L, char *spec, const char *fmt)
{
const char *p = fmt;
while (*p && strchr(STRING_FMT_FLAGS, *p) != NULL) p++; /* Skip flags. */
if ((size_t)(p - fmt) >= sizeof(STRING_FMT_FLAGS))
lj_err_caller(L, LJ_ERR_STRFMTR);
if (lj_char_isdigit((uint8_t)*p)) p++; /* Skip max. 2 digits for width. */
if (lj_char_isdigit((uint8_t)*p)) p++;
if (*p == '.') {
p++;
if (lj_char_isdigit((uint8_t)*p)) p++; /* Skip max. 2 digits for prec. */
if (lj_char_isdigit((uint8_t)*p)) p++;
}
if (lj_char_isdigit((uint8_t)*p))
lj_err_caller(L, LJ_ERR_STRFMTW);
*spec++ = '%';
strncpy(spec, fmt, (size_t)(p - fmt + 1));
spec += p - fmt + 1;
*spec = '\0';
return p;
}
/* Patch LUA_INTRFRMLEN into integer format specifier. */
static void string_fmt_intfmt(char *spec)
{
char c;
do {
c = *spec++;
} while (*spec);
*--spec = (LUA_INTFRMLEN)[0];
if ((LUA_INTFRMLEN)[1]) *++spec = (LUA_INTFRMLEN)[1];
*++spec = c;
*++spec = '\0';
}
/* Derive sprintf argument for integer format. Ugly. */
static LUA_INTFRM_T string_fmt_intarg(lua_State *L, int arg)
{
if (sizeof(LUA_INTFRM_T) == 4) {
return (LUA_INTFRM_T)lj_lib_checkbit(L, arg);
} else {
cTValue *o;
lj_lib_checknumber(L, arg);
o = L->base+arg-1;
if (tvisint(o))
return (LUA_INTFRM_T)intV(o);
else
return (LUA_INTFRM_T)numV(o);
}
}
/* Derive sprintf argument for unsigned integer format. Ugly. */
static unsigned LUA_INTFRM_T string_fmt_uintarg(lua_State *L, int arg)
{
if (sizeof(LUA_INTFRM_T) == 4) {
return (unsigned LUA_INTFRM_T)lj_lib_checkbit(L, arg);
} else {
cTValue *o;
lj_lib_checknumber(L, arg);
o = L->base+arg-1;
if (tvisint(o))
return (unsigned LUA_INTFRM_T)intV(o);
else if ((int32_t)o->u32.hi < 0)
return (unsigned LUA_INTFRM_T)(LUA_INTFRM_T)numV(o);
else
return (unsigned LUA_INTFRM_T)numV(o);
}
}
/* Emulate tostring() inline. */ /* Emulate tostring() inline. */
static GCstr *string_fmt_tostring(lua_State *L, int arg) static GCstr *string_fmt_tostring(lua_State *L, int arg, int retry)
{ {
TValue *o = L->base+arg-1; TValue *o = L->base+arg-1;
cTValue *mo; cTValue *mo;
lua_assert(o < L->top); /* Caller already checks for existence. */ lua_assert(o < L->top); /* Caller already checks for existence. */
if (LJ_LIKELY(tvisstr(o))) if (LJ_LIKELY(tvisstr(o)))
return strV(o); return strV(o);
if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { if (retry != 2 && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
copyTV(L, L->top++, mo); copyTV(L, L->top++, mo);
copyTV(L, L->top++, o); copyTV(L, L->top++, o);
lua_call(L, 1, 1); lua_call(L, 1, 1);
L->top--; copyTV(L, L->base+arg-1, --L->top);
if (tvisstr(L->top)) return NULL; /* Buffer may be overwritten, retry. */
return strV(L->top);
o = L->base+arg-1;
copyTV(L, o, L->top);
} }
if (tvisnumber(o)) { if (tvisnumber(o)) {
return lj_str_fromnumber(L, o); return lj_str_fromnumber(L, o);
@ -775,84 +664,85 @@ static GCstr *string_fmt_tostring(lua_State *L, int arg)
} else if (tvistrue(o)) { } else if (tvistrue(o)) {
return lj_str_newlit(L, "true"); return lj_str_newlit(L, "true");
} else { } else {
if (tvisfunc(o) && isffunc(funcV(o))) char buf[8+2+2+16], *p = buf;
lj_str_pushf(L, "function: builtin#%d", funcV(o)->c.ffid); if (tvisfunc(o) && isffunc(funcV(o))) {
else p = lj_buf_wmem(p, "function: builtin#", 18);
lj_str_pushf(L, "%s: %p", lj_typename(o), lua_topointer(L, arg)); p = lj_str_bufint(p, funcV(o)->c.ffid);
L->top--; } else {
return strV(L->top); p = lj_buf_wmem(p, lj_typename(o), strlen(lj_typename(o)));
*p++ = ':'; *p++ = ' ';
p = lj_str_bufptr(p, lua_topointer(L, arg));
}
return lj_str_new(L, buf, (size_t)(p - buf));
} }
} }
LJLIB_CF(string_format) LJLIB_CF(string_format)
{ {
int arg = 1, top = (int)(L->top - L->base); int arg, top = (int)(L->top - L->base);
GCstr *sfmt = lj_lib_checkstr(L, arg); GCstr *sfmt;
const char *fmt = strdata(sfmt); SBuf *sb;
const char *efmt = fmt + sfmt->len; FormatState fs;
SBuf *sb = lj_buf_tmp_(L); SFormat sf;
while (fmt < efmt) { int retry = 0;
if (*fmt != L_ESC || *++fmt == L_ESC) { again:
lj_buf_putb(sb, *fmt++); arg = 1;
sb = lj_buf_tmp_(L);
sfmt = lj_lib_checkstr(L, arg);
lj_strfmt_init(&fs, strdata(sfmt), sfmt->len);
while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {
if (sf == STRFMT_LIT) {
lj_buf_putmem(sb, fs.str, fs.len);
} else if (sf == STRFMT_ERR) {
lj_err_callerv(L, LJ_ERR_STRFMT, strdata(lj_str_new(L, fs.str, fs.len)));
} else { } else {
char buf[STRING_FMT_MAXBUF];
char spec[STRING_FMT_MAXSPEC];
MSize len = 0;
if (++arg > top) if (++arg > top)
luaL_argerror(L, arg, lj_obj_typename[0]); luaL_argerror(L, arg, lj_obj_typename[0]);
fmt = string_fmt_scan(L, spec, fmt); switch (STRFMT_TYPE(sf)) {
switch (*fmt++) { case STRFMT_INT:
case 'c': if (tvisint(L->base+arg-1)) {
len = (MSize)sprintf(buf, spec, lj_lib_checkint(L, arg)); int32_t k = intV(L->base+arg-1);
if (sf == STRFMT_INT)
lj_buf_putint(sb, k); /* Shortcut for plain %d. */
else
lj_strfmt_putxint(sb, sf, k);
} else {
lj_strfmt_putnum_int(sb, sf, lj_lib_checknum(L, arg));
}
break; break;
case 'd': case 'i': case STRFMT_UINT:
string_fmt_intfmt(spec); if (tvisint(L->base+arg-1))
len = (MSize)sprintf(buf, spec, string_fmt_intarg(L, arg)); lj_strfmt_putxint(sb, sf, intV(L->base+arg-1));
else
lj_strfmt_putnum_uint(sb, sf, lj_lib_checknum(L, arg));
break; break;
case 'o': case 'u': case 'x': case 'X': case STRFMT_NUM:
string_fmt_intfmt(spec); lj_strfmt_putnum(sb, sf, lj_lib_checknum(L, arg));
len = (MSize)sprintf(buf, spec, string_fmt_uintarg(L, arg));
break; break;
case 'e': case 'E': case 'f': case 'g': case 'G': case 'a': case 'A': { case STRFMT_STR: {
TValue tv; GCstr *str = string_fmt_tostring(L, arg, retry);
tv.n = lj_lib_checknum(L, arg); if (str == NULL)
if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) { retry = 1;
/* Canonicalize output of non-finite values. */ else if ((sf & STRFMT_T_QUOTED))
char nbuf[LJ_STR_NUMBUF]; lj_strfmt_putquoted(sb, str);
char *p = lj_str_bufnum(nbuf, &tv); else
if (fmt[-1] < 'a') { *(p-3) -= 0x20; *(p-2) -= 0x20; *(p-1) -= 0x20; } lj_strfmt_putstr(sb, sf, str);
*p = '\0';
for (p = spec; *p < 'A' && *p != '.'; p++) ;
*p++ = 's'; *p = '\0';
len = (MSize)sprintf(buf, spec, nbuf);
break; break;
} }
len = (MSize)sprintf(buf, spec, (double)tv.n); case STRFMT_CHAR:
lj_strfmt_putchar(sb, sf, lj_lib_checkint(L, arg));
break; break;
} case STRFMT_PTR: /* No formatting. */
case 'q':
string_fmt_quoted(sb, lj_lib_checkstr(L, arg));
continue;
case 'p':
setsbufP(sb, lj_str_bufptr(lj_buf_more(sb, LJ_STR_PTRBUF), setsbufP(sb, lj_str_bufptr(lj_buf_more(sb, LJ_STR_PTRBUF),
lua_topointer(L, arg))); lua_topointer(L, arg)));
continue;
case 's': {
GCstr *str = string_fmt_tostring(L, arg);
if (!strchr(spec, '.') && str->len >= 100) { /* Format overflow? */
lj_buf_putmem(sb, strdata(str), str->len); /* Use orig string. */
continue;
}
len = (MSize)sprintf(buf, spec, strdata(str));
break; break;
}
default: default:
lj_err_callerv(L, LJ_ERR_STRFMTO, fmt[-1] ? fmt[-1] : ' '); lua_assert(0);
break; break;
} }
lj_buf_putmem(sb, buf, len);
} }
} }
if (retry++ == 1) goto again;
setstrV(L, L->top-1, lj_buf_str(L, sb)); setstrV(L, L->top-1, lj_buf_str(L, sb));
lj_gc_check(L); lj_gc_check(L);
return 1; return 1;

View File

@ -96,9 +96,7 @@ ERRDEF(STRPATX, "pattern too complex")
ERRDEF(STRCAPI, "invalid capture index") ERRDEF(STRCAPI, "invalid capture index")
ERRDEF(STRCAPN, "too many captures") ERRDEF(STRCAPN, "too many captures")
ERRDEF(STRCAPU, "unfinished capture") ERRDEF(STRCAPU, "unfinished capture")
ERRDEF(STRFMTO, "invalid option " LUA_QL("%%%c") " to " LUA_QL("format")) ERRDEF(STRFMT, "invalid option " LUA_QS " to " LUA_QL("format"))
ERRDEF(STRFMTR, "invalid format (repeated flags)")
ERRDEF(STRFMTW, "invalid format (width or precision too long)")
ERRDEF(STRGSRV, "invalid replacement value (a %s)") ERRDEF(STRGSRV, "invalid replacement value (a %s)")
ERRDEF(BADMODN, "name conflict for module " LUA_QS) ERRDEF(BADMODN, "name conflict for module " LUA_QS)
#if LJ_HASJIT #if LJ_HASJIT
@ -117,7 +115,6 @@ ERRDEF(JITOPT, "unknown or malformed optimization flag " LUA_QS)
/* Lexer/parser errors. */ /* Lexer/parser errors. */
ERRDEF(XMODE, "attempt to load chunk with wrong mode") ERRDEF(XMODE, "attempt to load chunk with wrong mode")
ERRDEF(XNEAR, "%s near " LUA_QS) ERRDEF(XNEAR, "%s near " LUA_QS)
ERRDEF(XELEM, "lexical element too long")
ERRDEF(XLINES, "chunk has too many lines") ERRDEF(XLINES, "chunk has too many lines")
ERRDEF(XLEVELS, "chunk has too many syntax levels") ERRDEF(XLEVELS, "chunk has too many syntax levels")
ERRDEF(XNUMBER, "malformed number") ERRDEF(XNUMBER, "malformed number")

View File

@ -25,7 +25,7 @@ LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
#define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1)) #define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1))
/* Type conversions. */ /* Type conversions. */
LJ_FUNC char * LJ_FASTCALL lj_str_bufint(char *buf, int32_t k); LJ_FUNC char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k);
LJ_FUNC char * LJ_FASTCALL lj_str_bufnum(char *p, cTValue *o); LJ_FUNC char * LJ_FASTCALL lj_str_bufnum(char *p, cTValue *o);
LJ_FUNC char * LJ_FASTCALL lj_str_bufptr(char *p, const void *v); LJ_FUNC char * LJ_FASTCALL lj_str_bufptr(char *p, const void *v);
LJ_FUNC const char *lj_str_buftv(char *buf, cTValue *o, MSize *lenp); LJ_FUNC const char *lj_str_buftv(char *buf, cTValue *o, MSize *lenp);

295
src/lj_strfmt.c Normal file
View File

@ -0,0 +1,295 @@
/*
** String formatting.
** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
*/
#include <stdio.h>
#define lj_strfmt_c
#define LUA_CORE
#include "lj_obj.h"
#include "lj_buf.h"
#include "lj_char.h"
#include "lj_strfmt.h"
/* -- Format parser ------------------------------------------------------- */
static const uint8_t strfmt_map[('x'-'A')+1] = {
STRFMT_A,0,0,0,STRFMT_E,0,STRFMT_G,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,STRFMT_X,0,0,
0,0,0,0,0,0,
STRFMT_A,0,STRFMT_C,STRFMT_D,STRFMT_E,STRFMT_F,STRFMT_G,0,STRFMT_I,0,0,0,0,
0,STRFMT_O,STRFMT_P,STRFMT_Q,0,STRFMT_S,0,STRFMT_U,0,0,STRFMT_X
};
SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs)
{
const uint8_t *p = fs->p, *e = fs->e;
fs->str = (const char *)p;
for (; p < e; p++) {
if (*p == '%') { /* Escape char? */
if (p[1] == '%') { /* '%%'? */
fs->p = ++p+1;
goto retlit;
} else {
SFormat sf = 0;
uint32_t c;
if (p != (const uint8_t *)fs->str)
break;
for (p++; (uint32_t)*p - ' ' <= (uint32_t)('0' - ' '); p++) {
/* Parse flags. */
if (*p == '-') sf |= STRFMT_F_LEFT;
else if (*p == '+') sf |= STRFMT_F_PLUS;
else if (*p == '0') sf |= STRFMT_F_ZERO;
else if (*p == ' ') sf |= STRFMT_F_SPACE;
else if (*p == '#') sf |= STRFMT_F_ALT;
else break;
}
if ((uint32_t)*p - '0' < 10) { /* Parse width. */
uint32_t width = (uint32_t)*p++ - '0';
if ((uint32_t)*p - '0' < 10)
width = (uint32_t)*p++ - '0' + width*10;
sf |= (width << STRFMT_SH_WIDTH);
}
if (*p == '.') { /* Parse precision. */
uint32_t prec = 0;
p++;
if ((uint32_t)*p - '0' < 10) {
prec = (uint32_t)*p++ - '0';
if ((uint32_t)*p - '0' < 10)
prec = (uint32_t)*p++ - '0' + prec*10;
}
sf |= ((prec+1) << STRFMT_SH_PREC);
}
/* Parse conversion. */
c = (uint32_t)*p - 'A';
if (LJ_LIKELY(c <= (uint32_t)('x' - 'A'))) {
uint32_t sx = strfmt_map[c];
if (sx) {
fs->p = p+1;
return (sf | sx | ((c & 0x20) ? 0 : STRFMT_F_UPPER));
}
}
/* Return error location. */
if (*p >= 32) p++;
fs->len = (MSize)(p - (const uint8_t *)fs->str);
fs->p = fs->e;
return STRFMT_ERR;
}
}
}
fs->p = p;
retlit:
fs->len = (MSize)(p - (const uint8_t *)fs->str);
return fs->len ? STRFMT_LIT : STRFMT_EOF;
}
/* -- Format conversions -------------------------------------------------- */
/* Add formatted char to buffer. */
SBuf *lj_strfmt_putchar(SBuf *sb, SFormat sf, int32_t c)
{
MSize width = STRFMT_WIDTH(sf);
char *p = lj_buf_more(sb, width > 1 ? width : 1);
if ((sf & STRFMT_F_LEFT)) *p++ = (char)c;
while (width-- > 1) *p++ = ' ';
if (!(sf & STRFMT_F_LEFT)) *p++ = (char)c;
setsbufP(sb, p);
return sb;
}
/* Add formatted string to buffer. */
SBuf *lj_strfmt_putstr(SBuf *sb, SFormat sf, GCstr *str)
{
MSize len = str->len <= STRFMT_PREC(sf) ? str->len : STRFMT_PREC(sf);
MSize width = STRFMT_WIDTH(sf);
char *p = lj_buf_more(sb, width > len ? width : len);
if ((sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len);
while (width-- > len) *p++ = ' ';
if (!(sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len);
setsbufP(sb, p);
return sb;
}
/* Add quoted string to buffer (no formatting). */
SBuf *lj_strfmt_putquoted(SBuf *sb, GCstr *str)
{
const char *s = strdata(str);
MSize len = str->len;
lj_buf_putb(sb, '"');
while (len--) {
uint32_t c = (uint32_t)(uint8_t)*s++;
char *p = lj_buf_more(sb, 4);
if (c == '"' || c == '\\' || c == '\n') {
*p++ = '\\';
} else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */
uint32_t d;
*p++ = '\\';
if (c >= 100 || lj_char_isdigit((uint8_t)*s)) {
*p++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100;
goto tens;
} else if (c >= 10) {
tens:
d = (c * 205) >> 11; c -= d * 10; *p++ = (char)('0'+d);
}
c += '0';
}
*p++ = (char)c;
setsbufP(sb, p);
}
lj_buf_putb(sb, '"');
return sb;
}
/* Add formatted signed/unsigned integer to buffer. */
SBuf *lj_strfmt_putxint(SBuf *sb, SFormat sf, uint64_t k)
{
char buf[1+22], *q = buf + sizeof(buf), *p;
#ifdef LUA_USE_ASSERT
char *ps;
#endif
MSize prefix = 0, len, prec, pprec, width, need;
/* Figure out signed prefixes. */
if (STRFMT_TYPE(sf) == STRFMT_INT) {
if ((int64_t)k < 0) {
k = (uint64_t)-(int64_t)k;
prefix = 256 + '-';
} else if ((sf & STRFMT_F_PLUS)) {
prefix = 256 + '+';
} else if ((sf & STRFMT_F_SPACE)) {
prefix = 256 + ' ';
}
}
/* Convert number and store to fixed-size buffer in reverse order. */
prec = STRFMT_PREC(sf);
if ((int32_t)prec >= 0) sf &= ~STRFMT_F_ZERO;
if (k == 0) { /* Special-case zero argument. */
if (prec != 0 ||
(sf & (STRFMT_T_OCT|STRFMT_F_ALT)) == (STRFMT_T_OCT|STRFMT_F_ALT))
*--q = '0';
} else if (!(sf & (STRFMT_T_HEX|STRFMT_T_OCT))) { /* Decimal. */
uint32_t k2;
while ((k >> 32)) { *--q = (char)('0' + k % 10); k /= 10; }
k2 = (uint32_t)k;
do { *--q = (char)('0' + k2 % 10); k2 /= 10; } while (k2);
} else if ((sf & STRFMT_T_HEX)) { /* Hex. */
const char *hexdig = (sf & STRFMT_F_UPPER) ? "0123456789ABCDEF" :
"0123456789abcdef";
do { *--q = hexdig[(k & 15)]; k >>= 4; } while (k);
if ((sf & STRFMT_F_ALT)) prefix = 512 + 'x';
} else { /* Octal. */
do { *--q = (char)('0' + (uint32_t)(k & 7)); k >>= 3; } while (k);
if ((sf & STRFMT_F_ALT)) *--q = '0';
}
/* Calculate sizes. */
len = (MSize)(buf + sizeof(buf) - q);
if ((int32_t)len >= (int32_t)prec) prec = len;
width = STRFMT_WIDTH(sf);
pprec = prec + (prefix >> 8);
need = width > pprec ? width : pprec;
p = lj_buf_more(sb, need);
#ifdef LUA_USE_ASSERT
ps = p;
#endif
/* Format number with leading/trailing whitespace and zeros. */
if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == 0)
while (width-- > pprec) *p++ = ' ';
if (prefix) {
if ((char)prefix == 'x') *p++ = '0';
*p++ = (char)prefix;
}
if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == STRFMT_F_ZERO)
while (width-- > pprec) *p++ = '0';
while (prec-- > len) *p++ = '0';
while (q < buf + sizeof(buf)) *p++ = *q++; /* Add number itself. */
if ((sf & STRFMT_F_LEFT))
while (width-- > pprec) *p++ = ' ';
lua_assert(need == (MSize)(p - ps));
setsbufP(sb, p);
return sb;
}
/* Add number formatted as signed integer to buffer. */
SBuf *lj_strfmt_putnum_int(SBuf *sb, SFormat sf, lua_Number n)
{
int64_t k = (int64_t)n;
if (checki32(k) && sf == STRFMT_INT)
return lj_buf_putint(sb, k); /* Shortcut for plain %d. */
else
return lj_strfmt_putxint(sb, sf, (uint64_t)k);
}
/* Add number formatted as unsigned integer to buffer. */
SBuf *lj_strfmt_putnum_uint(SBuf *sb, SFormat sf, lua_Number n)
{
int64_t k;
if (n >= 9223372036854775808.0)
k = (int64_t)(n - 18446744073709551616.0);
else
k = (int64_t)n;
return lj_strfmt_putxint(sb, sf, (uint64_t)k);
}
/* Max. sprintf buffer size needed. At least #string.format("%.99f", -1e308). */
#define STRFMT_FMTNUMBUF 512
/* Add formatted floating-point number to buffer. */
SBuf *lj_strfmt_putnum(SBuf *sb, SFormat sf, lua_Number n)
{
TValue tv;
tv.n = n;
if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) {
/* Canonicalize output of non-finite values. */
MSize width = STRFMT_WIDTH(sf), len = 3;
int prefix = 0, ch = (sf & STRFMT_F_UPPER) ? 0x202020 : 0;
char *p;
if (((tv.u32.hi & 0x000fffff) | tv.u32.lo) != 0) {
ch ^= ('n' << 16) | ('a' << 8) | 'n';
if ((sf & STRFMT_F_SPACE)) prefix = ' ';
} else {
ch ^= ('i' << 16) | ('n' << 8) | 'f';
if ((tv.u32.hi & 0x80000000)) prefix = '-';
else if ((sf & STRFMT_F_PLUS)) prefix = '+';
else if ((sf & STRFMT_F_SPACE)) prefix = ' ';
}
if (prefix) len = 4;
p = lj_buf_more(sb, width > len ? width : len);
if (!(sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' ';
if (prefix) *p++ = prefix;
*p++ = (char)(ch >> 16); *p++ = (char)(ch >> 8); *p++ = (char)ch;
if ((sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' ';
setsbufP(sb, p);
} else { /* Delegate to sprintf() for now. */
uint8_t width = (uint8_t)STRFMT_WIDTH(sf), prec = (uint8_t)STRFMT_PREC(sf);
char fmt[1+5+2+3+1+1], *p = fmt;
*p++ = '%';
if ((sf & STRFMT_F_LEFT)) *p++ = '-';
if ((sf & STRFMT_F_PLUS)) *p++ = '+';
if ((sf & STRFMT_F_ZERO)) *p++ = '0';
if ((sf & STRFMT_F_SPACE)) *p++ = ' ';
if ((sf & STRFMT_F_ALT)) *p++ = '#';
if (width) {
uint8_t x = width / 10, y = width % 10;
if (x) *p++ = '0' + x;
*p++ = '0' + y;
}
if (prec != 255) {
uint8_t x = prec / 10, y = prec % 10;
*p++ = '.';
if (x) *p++ = '0' + x;
*p++ = '0' + y;
}
*p++ = (0x67666561 >> (STRFMT_FP(sf)<<3)) ^ ((sf & STRFMT_F_UPPER)?0x20:0);
*p = '\0';
p = lj_buf_more(sb, STRFMT_FMTNUMBUF);
setsbufP(sb, p + sprintf(p, fmt, n));
}
return sb;
}

84
src/lj_strfmt.h Normal file
View File

@ -0,0 +1,84 @@
/*
** String formatting.
** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_STRFMT_H
#define _LJ_STRFMT_H
#include "lj_obj.h"
typedef uint32_t SFormat; /* Format indicator. */
/* Format parser state. */
typedef struct FormatState {
const uint8_t *p; /* Current format string pointer. */
const uint8_t *e; /* End of format string. */
const char *str; /* Returned literal string. */
MSize len; /* Size of literal string. */
} FormatState;
/* Format types (max. 16). */
typedef enum FormatType {
STRFMT_EOF, STRFMT_ERR, STRFMT_LIT,
STRFMT_INT, STRFMT_UINT, STRFMT_NUM, STRFMT_STR, STRFMT_CHAR, STRFMT_PTR
} FormatType;
/* Format subtypes (bits are reused). */
#define STRFMT_T_HEX 0x0010 /* STRFMT_UINT */
#define STRFMT_T_OCT 0x0020 /* STRFMT_UINT */
#define STRFMT_T_FP_A 0x0000 /* STRFMT_NUM */
#define STRFMT_T_FP_E 0x0010 /* STRFMT_NUM */
#define STRFMT_T_FP_F 0x0020 /* STRFMT_NUM */
#define STRFMT_T_FP_G 0x0030 /* STRFMT_NUM */
#define STRFMT_T_QUOTED 0x0010 /* STRFMT_STR */
/* Format flags. */
#define STRFMT_F_LEFT 0x0100
#define STRFMT_F_PLUS 0x0200
#define STRFMT_F_ZERO 0x0400
#define STRFMT_F_SPACE 0x0800
#define STRFMT_F_ALT 0x1000
#define STRFMT_F_UPPER 0x2000
/* Format indicator fields. */
#define STRFMT_SH_WIDTH 16
#define STRFMT_SH_PREC 24
#define STRFMT_TYPE(sf) ((FormatType)((sf) & 15))
#define STRFMT_WIDTH(sf) (((sf) >> STRFMT_SH_WIDTH) & 255u)
#define STRFMT_PREC(sf) ((((sf) >> STRFMT_SH_PREC) & 255u) - 1u)
#define STRFMT_FP(sf) (((sf) >> 4) & 3)
/* Formats for conversion characters. */
#define STRFMT_A (STRFMT_NUM|STRFMT_T_FP_A)
#define STRFMT_C (STRFMT_CHAR)
#define STRFMT_D (STRFMT_INT)
#define STRFMT_E (STRFMT_NUM|STRFMT_T_FP_E)
#define STRFMT_F (STRFMT_NUM|STRFMT_T_FP_F)
#define STRFMT_G (STRFMT_NUM|STRFMT_T_FP_G)
#define STRFMT_I STRFMT_D
#define STRFMT_O (STRFMT_UINT|STRFMT_T_OCT)
#define STRFMT_P (STRFMT_PTR)
#define STRFMT_Q (STRFMT_STR|STRFMT_T_QUOTED)
#define STRFMT_S (STRFMT_STR)
#define STRFMT_U (STRFMT_UINT)
#define STRFMT_X (STRFMT_UINT|STRFMT_T_HEX)
static LJ_AINLINE void lj_strfmt_init(FormatState *fs, const char *p, MSize len)
{
fs->p = (const uint8_t *)p;
fs->e = (const uint8_t *)p + len;
lua_assert(*fs->e == 0); /* Must be NUL-terminated (may have NULs inside). */
}
LJ_FUNC SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs);
LJ_FUNC SBuf *lj_strfmt_putchar(SBuf *sb, SFormat, int32_t c);
LJ_FUNC SBuf *lj_strfmt_putstr(SBuf *sb, SFormat, GCstr *str);
LJ_FUNC SBuf *lj_strfmt_putquoted(SBuf *sb, GCstr *str);
LJ_FUNC SBuf *lj_strfmt_putxint(SBuf *sb, SFormat sf, uint64_t k);
LJ_FUNC SBuf *lj_strfmt_putnum_int(SBuf *sb, SFormat sf, lua_Number n);
LJ_FUNC SBuf *lj_strfmt_putnum_uint(SBuf *sb, SFormat sf, lua_Number n);
LJ_FUNC SBuf *lj_strfmt_putnum(SBuf *sb, SFormat, lua_Number n);
#endif

View File

@ -45,6 +45,7 @@
#include "lj_vmevent.c" #include "lj_vmevent.c"
#include "lj_vmmath.c" #include "lj_vmmath.c"
#include "lj_strscan.c" #include "lj_strscan.c"
#include "lj_strfmt.c"
#include "lj_api.c" #include "lj_api.c"
#include "lj_lex.c" #include "lj_lex.c"
#include "lj_parse.c" #include "lj_parse.c"