Refactor string.format().

This commit is contained in:
Mike Pall 2013-05-12 22:37:02 +02:00
parent bb2cc1dcaf
commit 5bb1f0edac
8 changed files with 465 additions and 195 deletions

View File

@ -439,7 +439,8 @@ LJLIB_C= $(LJLIB_O:.o=.c)
LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \
lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \
lj_api.o lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \
lj_strfmt.o lj_api.o \
lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \
lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \
lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \
lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \

View File

@ -35,7 +35,7 @@ lib_package.o: lib_package.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
lj_tab.h lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h \
lj_char.h lj_lib.h lj_libdef.h
lj_char.h lj_strfmt.h lj_lib.h lj_libdef.h
lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
lj_tab.h lj_lib.h lj_libdef.h
@ -179,6 +179,8 @@ lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h
lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_state.h lj_char.h
lj_strfmt.o: lj_strfmt.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_buf.h lj_gc.h lj_str.h lj_char.h lj_strfmt.h
lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_char.h lj_strscan.h
lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
@ -203,19 +205,19 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c lj_tab.c lj_func.c \
lj_udata.c lj_meta.c lj_strscan.h lj_lib.h lj_debug.c lj_state.c \
lj_lex.h lj_alloc.h lj_dispatch.c lj_ccallback.h luajit.h lj_vmevent.c \
lj_vmevent.h lj_vmmath.c lj_strscan.c lj_api.c lj_lex.c lualib.h \
lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c \
lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h \
lj_ccallback.c lj_target.h lj_target_*.h lj_mcode.h lj_carith.c \
lj_carith.h lj_clib.c lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c \
lj_ircall.h lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h \
lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c \
lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h \
lj_crecord.c lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h \
lj_emit_*.h lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c \
lib_aux.c lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c \
lib_io.c lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c \
lib_ffi.c lib_init.c
lj_vmevent.h lj_vmmath.c lj_strscan.c lj_strfmt.c lj_strfmt.h lj_api.c \
lj_lex.c lualib.h lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h \
lj_bcwrite.c lj_load.c lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c \
lj_ccall.c lj_ccall.h lj_ccallback.c lj_target.h lj_target_*.h \
lj_mcode.h lj_carith.c lj_carith.h lj_clib.c lj_clib.h lj_cparse.c \
lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h lj_iropt.h lj_opt_mem.c \
lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c \
lj_snap.h lj_opt_split.c lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c \
lj_record.h lj_ffrecord.h lj_crecord.c lj_crecord.h lj_ffrecord.c \
lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h lj_asm_*.h lj_trace.c \
lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c lib_base.c lj_libdef.h \
lib_math.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c \
lib_debug.c lib_bit.c lib_jit.c lib_ffi.c lib_init.c
luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h
host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \
lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \

View File

@ -6,8 +6,6 @@
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
*/
#include <stdio.h>
#define lib_string_c
#define LUA_LIB
@ -26,6 +24,7 @@
#include "lj_ff.h"
#include "lj_bcdump.h"
#include "lj_char.h"
#include "lj_strfmt.h"
#include "lj_lib.h"
/* ------------------------------------------------------------------------ */
@ -641,130 +640,20 @@ LJLIB_CF(string_gsub)
/* ------------------------------------------------------------------------ */
/* Max. buffer size needed (at least #string.format("%99.99f", -1e308)). */
#define STRING_FMT_MAXBUF 512
/* Valid format specifier flags. */
#define STRING_FMT_FLAGS "-+ #0"
/* Max. format specifier size. */
#define STRING_FMT_MAXSPEC \
(sizeof(STRING_FMT_FLAGS) + sizeof(LUA_INTFRMLEN) + 10)
/* Add quoted string to buffer. */
static void string_fmt_quoted(SBuf *sb, GCstr *str)
{
const char *s = strdata(str);
MSize len = str->len;
lj_buf_putb(sb, '"');
while (len--) {
uint32_t c = (uint32_t)(uint8_t)*s++;
char *p = lj_buf_more(sb, 4);
if (c == '"' || c == '\\' || c == '\n') {
*p++ = '\\';
} else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */
uint32_t d;
*p++ = '\\';
if (c >= 100 || lj_char_isdigit((uint8_t)*s)) {
*p++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100;
goto tens;
} else if (c >= 10) {
tens:
d = (c * 205) >> 11; c -= d * 10; *p++ = (char)('0'+d);
}
c += '0';
}
*p++ = (char)c;
setsbufP(sb, p);
}
lj_buf_putb(sb, '"');
}
/* Scan format and generate format specifier. */
static const char *string_fmt_scan(lua_State *L, char *spec, const char *fmt)
{
const char *p = fmt;
while (*p && strchr(STRING_FMT_FLAGS, *p) != NULL) p++; /* Skip flags. */
if ((size_t)(p - fmt) >= sizeof(STRING_FMT_FLAGS))
lj_err_caller(L, LJ_ERR_STRFMTR);
if (lj_char_isdigit((uint8_t)*p)) p++; /* Skip max. 2 digits for width. */
if (lj_char_isdigit((uint8_t)*p)) p++;
if (*p == '.') {
p++;
if (lj_char_isdigit((uint8_t)*p)) p++; /* Skip max. 2 digits for prec. */
if (lj_char_isdigit((uint8_t)*p)) p++;
}
if (lj_char_isdigit((uint8_t)*p))
lj_err_caller(L, LJ_ERR_STRFMTW);
*spec++ = '%';
strncpy(spec, fmt, (size_t)(p - fmt + 1));
spec += p - fmt + 1;
*spec = '\0';
return p;
}
/* Patch LUA_INTRFRMLEN into integer format specifier. */
static void string_fmt_intfmt(char *spec)
{
char c;
do {
c = *spec++;
} while (*spec);
*--spec = (LUA_INTFRMLEN)[0];
if ((LUA_INTFRMLEN)[1]) *++spec = (LUA_INTFRMLEN)[1];
*++spec = c;
*++spec = '\0';
}
/* Derive sprintf argument for integer format. Ugly. */
static LUA_INTFRM_T string_fmt_intarg(lua_State *L, int arg)
{
if (sizeof(LUA_INTFRM_T) == 4) {
return (LUA_INTFRM_T)lj_lib_checkbit(L, arg);
} else {
cTValue *o;
lj_lib_checknumber(L, arg);
o = L->base+arg-1;
if (tvisint(o))
return (LUA_INTFRM_T)intV(o);
else
return (LUA_INTFRM_T)numV(o);
}
}
/* Derive sprintf argument for unsigned integer format. Ugly. */
static unsigned LUA_INTFRM_T string_fmt_uintarg(lua_State *L, int arg)
{
if (sizeof(LUA_INTFRM_T) == 4) {
return (unsigned LUA_INTFRM_T)lj_lib_checkbit(L, arg);
} else {
cTValue *o;
lj_lib_checknumber(L, arg);
o = L->base+arg-1;
if (tvisint(o))
return (unsigned LUA_INTFRM_T)intV(o);
else if ((int32_t)o->u32.hi < 0)
return (unsigned LUA_INTFRM_T)(LUA_INTFRM_T)numV(o);
else
return (unsigned LUA_INTFRM_T)numV(o);
}
}
/* Emulate tostring() inline. */
static GCstr *string_fmt_tostring(lua_State *L, int arg)
static GCstr *string_fmt_tostring(lua_State *L, int arg, int retry)
{
TValue *o = L->base+arg-1;
cTValue *mo;
lua_assert(o < L->top); /* Caller already checks for existence. */
if (LJ_LIKELY(tvisstr(o)))
return strV(o);
if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
if (retry != 2 && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
copyTV(L, L->top++, mo);
copyTV(L, L->top++, o);
lua_call(L, 1, 1);
L->top--;
if (tvisstr(L->top))
return strV(L->top);
o = L->base+arg-1;
copyTV(L, o, L->top);
copyTV(L, L->base+arg-1, --L->top);
return NULL; /* Buffer may be overwritten, retry. */
}
if (tvisnumber(o)) {
return lj_str_fromnumber(L, o);
@ -775,84 +664,85 @@ static GCstr *string_fmt_tostring(lua_State *L, int arg)
} else if (tvistrue(o)) {
return lj_str_newlit(L, "true");
} else {
if (tvisfunc(o) && isffunc(funcV(o)))
lj_str_pushf(L, "function: builtin#%d", funcV(o)->c.ffid);
else
lj_str_pushf(L, "%s: %p", lj_typename(o), lua_topointer(L, arg));
L->top--;
return strV(L->top);
char buf[8+2+2+16], *p = buf;
if (tvisfunc(o) && isffunc(funcV(o))) {
p = lj_buf_wmem(p, "function: builtin#", 18);
p = lj_str_bufint(p, funcV(o)->c.ffid);
} else {
p = lj_buf_wmem(p, lj_typename(o), strlen(lj_typename(o)));
*p++ = ':'; *p++ = ' ';
p = lj_str_bufptr(p, lua_topointer(L, arg));
}
return lj_str_new(L, buf, (size_t)(p - buf));
}
}
LJLIB_CF(string_format)
{
int arg = 1, top = (int)(L->top - L->base);
GCstr *sfmt = lj_lib_checkstr(L, arg);
const char *fmt = strdata(sfmt);
const char *efmt = fmt + sfmt->len;
SBuf *sb = lj_buf_tmp_(L);
while (fmt < efmt) {
if (*fmt != L_ESC || *++fmt == L_ESC) {
lj_buf_putb(sb, *fmt++);
int arg, top = (int)(L->top - L->base);
GCstr *sfmt;
SBuf *sb;
FormatState fs;
SFormat sf;
int retry = 0;
again:
arg = 1;
sb = lj_buf_tmp_(L);
sfmt = lj_lib_checkstr(L, arg);
lj_strfmt_init(&fs, strdata(sfmt), sfmt->len);
while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {
if (sf == STRFMT_LIT) {
lj_buf_putmem(sb, fs.str, fs.len);
} else if (sf == STRFMT_ERR) {
lj_err_callerv(L, LJ_ERR_STRFMT, strdata(lj_str_new(L, fs.str, fs.len)));
} else {
char buf[STRING_FMT_MAXBUF];
char spec[STRING_FMT_MAXSPEC];
MSize len = 0;
if (++arg > top)
luaL_argerror(L, arg, lj_obj_typename[0]);
fmt = string_fmt_scan(L, spec, fmt);
switch (*fmt++) {
case 'c':
len = (MSize)sprintf(buf, spec, lj_lib_checkint(L, arg));
break;
case 'd': case 'i':
string_fmt_intfmt(spec);
len = (MSize)sprintf(buf, spec, string_fmt_intarg(L, arg));
break;
case 'o': case 'u': case 'x': case 'X':
string_fmt_intfmt(spec);
len = (MSize)sprintf(buf, spec, string_fmt_uintarg(L, arg));
break;
case 'e': case 'E': case 'f': case 'g': case 'G': case 'a': case 'A': {
TValue tv;
tv.n = lj_lib_checknum(L, arg);
if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) {
/* Canonicalize output of non-finite values. */
char nbuf[LJ_STR_NUMBUF];
char *p = lj_str_bufnum(nbuf, &tv);
if (fmt[-1] < 'a') { *(p-3) -= 0x20; *(p-2) -= 0x20; *(p-1) -= 0x20; }
*p = '\0';
for (p = spec; *p < 'A' && *p != '.'; p++) ;
*p++ = 's'; *p = '\0';
len = (MSize)sprintf(buf, spec, nbuf);
break;
switch (STRFMT_TYPE(sf)) {
case STRFMT_INT:
if (tvisint(L->base+arg-1)) {
int32_t k = intV(L->base+arg-1);
if (sf == STRFMT_INT)
lj_buf_putint(sb, k); /* Shortcut for plain %d. */
else
lj_strfmt_putxint(sb, sf, k);
} else {
lj_strfmt_putnum_int(sb, sf, lj_lib_checknum(L, arg));
}
len = (MSize)sprintf(buf, spec, (double)tv.n);
break;
case STRFMT_UINT:
if (tvisint(L->base+arg-1))
lj_strfmt_putxint(sb, sf, intV(L->base+arg-1));
else
lj_strfmt_putnum_uint(sb, sf, lj_lib_checknum(L, arg));
break;
case STRFMT_NUM:
lj_strfmt_putnum(sb, sf, lj_lib_checknum(L, arg));
break;
case STRFMT_STR: {
GCstr *str = string_fmt_tostring(L, arg, retry);
if (str == NULL)
retry = 1;
else if ((sf & STRFMT_T_QUOTED))
lj_strfmt_putquoted(sb, str);
else
lj_strfmt_putstr(sb, sf, str);
break;
}
case 'q':
string_fmt_quoted(sb, lj_lib_checkstr(L, arg));
continue;
case 'p':
case STRFMT_CHAR:
lj_strfmt_putchar(sb, sf, lj_lib_checkint(L, arg));
break;
case STRFMT_PTR: /* No formatting. */
setsbufP(sb, lj_str_bufptr(lj_buf_more(sb, LJ_STR_PTRBUF),
lua_topointer(L, arg)));
continue;
case 's': {
GCstr *str = string_fmt_tostring(L, arg);
if (!strchr(spec, '.') && str->len >= 100) { /* Format overflow? */
lj_buf_putmem(sb, strdata(str), str->len); /* Use orig string. */
continue;
}
len = (MSize)sprintf(buf, spec, strdata(str));
break;
}
default:
lj_err_callerv(L, LJ_ERR_STRFMTO, fmt[-1] ? fmt[-1] : ' ');
lua_assert(0);
break;
}
lj_buf_putmem(sb, buf, len);
}
}
if (retry++ == 1) goto again;
setstrV(L, L->top-1, lj_buf_str(L, sb));
lj_gc_check(L);
return 1;

View File

@ -96,9 +96,7 @@ ERRDEF(STRPATX, "pattern too complex")
ERRDEF(STRCAPI, "invalid capture index")
ERRDEF(STRCAPN, "too many captures")
ERRDEF(STRCAPU, "unfinished capture")
ERRDEF(STRFMTO, "invalid option " LUA_QL("%%%c") " to " LUA_QL("format"))
ERRDEF(STRFMTR, "invalid format (repeated flags)")
ERRDEF(STRFMTW, "invalid format (width or precision too long)")
ERRDEF(STRFMT, "invalid option " LUA_QS " to " LUA_QL("format"))
ERRDEF(STRGSRV, "invalid replacement value (a %s)")
ERRDEF(BADMODN, "name conflict for module " LUA_QS)
#if LJ_HASJIT
@ -117,7 +115,6 @@ ERRDEF(JITOPT, "unknown or malformed optimization flag " LUA_QS)
/* Lexer/parser errors. */
ERRDEF(XMODE, "attempt to load chunk with wrong mode")
ERRDEF(XNEAR, "%s near " LUA_QS)
ERRDEF(XELEM, "lexical element too long")
ERRDEF(XLINES, "chunk has too many lines")
ERRDEF(XLEVELS, "chunk has too many syntax levels")
ERRDEF(XNUMBER, "malformed number")

View File

@ -25,7 +25,7 @@ LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
#define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1))
/* Type conversions. */
LJ_FUNC char * LJ_FASTCALL lj_str_bufint(char *buf, int32_t k);
LJ_FUNC char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k);
LJ_FUNC char * LJ_FASTCALL lj_str_bufnum(char *p, cTValue *o);
LJ_FUNC char * LJ_FASTCALL lj_str_bufptr(char *p, const void *v);
LJ_FUNC const char *lj_str_buftv(char *buf, cTValue *o, MSize *lenp);

295
src/lj_strfmt.c Normal file
View File

@ -0,0 +1,295 @@
/*
** String formatting.
** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
*/
#include <stdio.h>
#define lj_strfmt_c
#define LUA_CORE
#include "lj_obj.h"
#include "lj_buf.h"
#include "lj_char.h"
#include "lj_strfmt.h"
/* -- Format parser ------------------------------------------------------- */
static const uint8_t strfmt_map[('x'-'A')+1] = {
STRFMT_A,0,0,0,STRFMT_E,0,STRFMT_G,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,STRFMT_X,0,0,
0,0,0,0,0,0,
STRFMT_A,0,STRFMT_C,STRFMT_D,STRFMT_E,STRFMT_F,STRFMT_G,0,STRFMT_I,0,0,0,0,
0,STRFMT_O,STRFMT_P,STRFMT_Q,0,STRFMT_S,0,STRFMT_U,0,0,STRFMT_X
};
SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs)
{
const uint8_t *p = fs->p, *e = fs->e;
fs->str = (const char *)p;
for (; p < e; p++) {
if (*p == '%') { /* Escape char? */
if (p[1] == '%') { /* '%%'? */
fs->p = ++p+1;
goto retlit;
} else {
SFormat sf = 0;
uint32_t c;
if (p != (const uint8_t *)fs->str)
break;
for (p++; (uint32_t)*p - ' ' <= (uint32_t)('0' - ' '); p++) {
/* Parse flags. */
if (*p == '-') sf |= STRFMT_F_LEFT;
else if (*p == '+') sf |= STRFMT_F_PLUS;
else if (*p == '0') sf |= STRFMT_F_ZERO;
else if (*p == ' ') sf |= STRFMT_F_SPACE;
else if (*p == '#') sf |= STRFMT_F_ALT;
else break;
}
if ((uint32_t)*p - '0' < 10) { /* Parse width. */
uint32_t width = (uint32_t)*p++ - '0';
if ((uint32_t)*p - '0' < 10)
width = (uint32_t)*p++ - '0' + width*10;
sf |= (width << STRFMT_SH_WIDTH);
}
if (*p == '.') { /* Parse precision. */
uint32_t prec = 0;
p++;
if ((uint32_t)*p - '0' < 10) {
prec = (uint32_t)*p++ - '0';
if ((uint32_t)*p - '0' < 10)
prec = (uint32_t)*p++ - '0' + prec*10;
}
sf |= ((prec+1) << STRFMT_SH_PREC);
}
/* Parse conversion. */
c = (uint32_t)*p - 'A';
if (LJ_LIKELY(c <= (uint32_t)('x' - 'A'))) {
uint32_t sx = strfmt_map[c];
if (sx) {
fs->p = p+1;
return (sf | sx | ((c & 0x20) ? 0 : STRFMT_F_UPPER));
}
}
/* Return error location. */
if (*p >= 32) p++;
fs->len = (MSize)(p - (const uint8_t *)fs->str);
fs->p = fs->e;
return STRFMT_ERR;
}
}
}
fs->p = p;
retlit:
fs->len = (MSize)(p - (const uint8_t *)fs->str);
return fs->len ? STRFMT_LIT : STRFMT_EOF;
}
/* -- Format conversions -------------------------------------------------- */
/* Add formatted char to buffer. */
SBuf *lj_strfmt_putchar(SBuf *sb, SFormat sf, int32_t c)
{
MSize width = STRFMT_WIDTH(sf);
char *p = lj_buf_more(sb, width > 1 ? width : 1);
if ((sf & STRFMT_F_LEFT)) *p++ = (char)c;
while (width-- > 1) *p++ = ' ';
if (!(sf & STRFMT_F_LEFT)) *p++ = (char)c;
setsbufP(sb, p);
return sb;
}
/* Add formatted string to buffer. */
SBuf *lj_strfmt_putstr(SBuf *sb, SFormat sf, GCstr *str)
{
MSize len = str->len <= STRFMT_PREC(sf) ? str->len : STRFMT_PREC(sf);
MSize width = STRFMT_WIDTH(sf);
char *p = lj_buf_more(sb, width > len ? width : len);
if ((sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len);
while (width-- > len) *p++ = ' ';
if (!(sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len);
setsbufP(sb, p);
return sb;
}
/* Add quoted string to buffer (no formatting). */
SBuf *lj_strfmt_putquoted(SBuf *sb, GCstr *str)
{
const char *s = strdata(str);
MSize len = str->len;
lj_buf_putb(sb, '"');
while (len--) {
uint32_t c = (uint32_t)(uint8_t)*s++;
char *p = lj_buf_more(sb, 4);
if (c == '"' || c == '\\' || c == '\n') {
*p++ = '\\';
} else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */
uint32_t d;
*p++ = '\\';
if (c >= 100 || lj_char_isdigit((uint8_t)*s)) {
*p++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100;
goto tens;
} else if (c >= 10) {
tens:
d = (c * 205) >> 11; c -= d * 10; *p++ = (char)('0'+d);
}
c += '0';
}
*p++ = (char)c;
setsbufP(sb, p);
}
lj_buf_putb(sb, '"');
return sb;
}
/* Add formatted signed/unsigned integer to buffer. */
SBuf *lj_strfmt_putxint(SBuf *sb, SFormat sf, uint64_t k)
{
char buf[1+22], *q = buf + sizeof(buf), *p;
#ifdef LUA_USE_ASSERT
char *ps;
#endif
MSize prefix = 0, len, prec, pprec, width, need;
/* Figure out signed prefixes. */
if (STRFMT_TYPE(sf) == STRFMT_INT) {
if ((int64_t)k < 0) {
k = (uint64_t)-(int64_t)k;
prefix = 256 + '-';
} else if ((sf & STRFMT_F_PLUS)) {
prefix = 256 + '+';
} else if ((sf & STRFMT_F_SPACE)) {
prefix = 256 + ' ';
}
}
/* Convert number and store to fixed-size buffer in reverse order. */
prec = STRFMT_PREC(sf);
if ((int32_t)prec >= 0) sf &= ~STRFMT_F_ZERO;
if (k == 0) { /* Special-case zero argument. */
if (prec != 0 ||
(sf & (STRFMT_T_OCT|STRFMT_F_ALT)) == (STRFMT_T_OCT|STRFMT_F_ALT))
*--q = '0';
} else if (!(sf & (STRFMT_T_HEX|STRFMT_T_OCT))) { /* Decimal. */
uint32_t k2;
while ((k >> 32)) { *--q = (char)('0' + k % 10); k /= 10; }
k2 = (uint32_t)k;
do { *--q = (char)('0' + k2 % 10); k2 /= 10; } while (k2);
} else if ((sf & STRFMT_T_HEX)) { /* Hex. */
const char *hexdig = (sf & STRFMT_F_UPPER) ? "0123456789ABCDEF" :
"0123456789abcdef";
do { *--q = hexdig[(k & 15)]; k >>= 4; } while (k);
if ((sf & STRFMT_F_ALT)) prefix = 512 + 'x';
} else { /* Octal. */
do { *--q = (char)('0' + (uint32_t)(k & 7)); k >>= 3; } while (k);
if ((sf & STRFMT_F_ALT)) *--q = '0';
}
/* Calculate sizes. */
len = (MSize)(buf + sizeof(buf) - q);
if ((int32_t)len >= (int32_t)prec) prec = len;
width = STRFMT_WIDTH(sf);
pprec = prec + (prefix >> 8);
need = width > pprec ? width : pprec;
p = lj_buf_more(sb, need);
#ifdef LUA_USE_ASSERT
ps = p;
#endif
/* Format number with leading/trailing whitespace and zeros. */
if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == 0)
while (width-- > pprec) *p++ = ' ';
if (prefix) {
if ((char)prefix == 'x') *p++ = '0';
*p++ = (char)prefix;
}
if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == STRFMT_F_ZERO)
while (width-- > pprec) *p++ = '0';
while (prec-- > len) *p++ = '0';
while (q < buf + sizeof(buf)) *p++ = *q++; /* Add number itself. */
if ((sf & STRFMT_F_LEFT))
while (width-- > pprec) *p++ = ' ';
lua_assert(need == (MSize)(p - ps));
setsbufP(sb, p);
return sb;
}
/* Add number formatted as signed integer to buffer. */
SBuf *lj_strfmt_putnum_int(SBuf *sb, SFormat sf, lua_Number n)
{
int64_t k = (int64_t)n;
if (checki32(k) && sf == STRFMT_INT)
return lj_buf_putint(sb, k); /* Shortcut for plain %d. */
else
return lj_strfmt_putxint(sb, sf, (uint64_t)k);
}
/* Add number formatted as unsigned integer to buffer. */
SBuf *lj_strfmt_putnum_uint(SBuf *sb, SFormat sf, lua_Number n)
{
int64_t k;
if (n >= 9223372036854775808.0)
k = (int64_t)(n - 18446744073709551616.0);
else
k = (int64_t)n;
return lj_strfmt_putxint(sb, sf, (uint64_t)k);
}
/* Max. sprintf buffer size needed. At least #string.format("%.99f", -1e308). */
#define STRFMT_FMTNUMBUF 512
/* Add formatted floating-point number to buffer. */
SBuf *lj_strfmt_putnum(SBuf *sb, SFormat sf, lua_Number n)
{
TValue tv;
tv.n = n;
if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) {
/* Canonicalize output of non-finite values. */
MSize width = STRFMT_WIDTH(sf), len = 3;
int prefix = 0, ch = (sf & STRFMT_F_UPPER) ? 0x202020 : 0;
char *p;
if (((tv.u32.hi & 0x000fffff) | tv.u32.lo) != 0) {
ch ^= ('n' << 16) | ('a' << 8) | 'n';
if ((sf & STRFMT_F_SPACE)) prefix = ' ';
} else {
ch ^= ('i' << 16) | ('n' << 8) | 'f';
if ((tv.u32.hi & 0x80000000)) prefix = '-';
else if ((sf & STRFMT_F_PLUS)) prefix = '+';
else if ((sf & STRFMT_F_SPACE)) prefix = ' ';
}
if (prefix) len = 4;
p = lj_buf_more(sb, width > len ? width : len);
if (!(sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' ';
if (prefix) *p++ = prefix;
*p++ = (char)(ch >> 16); *p++ = (char)(ch >> 8); *p++ = (char)ch;
if ((sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' ';
setsbufP(sb, p);
} else { /* Delegate to sprintf() for now. */
uint8_t width = (uint8_t)STRFMT_WIDTH(sf), prec = (uint8_t)STRFMT_PREC(sf);
char fmt[1+5+2+3+1+1], *p = fmt;
*p++ = '%';
if ((sf & STRFMT_F_LEFT)) *p++ = '-';
if ((sf & STRFMT_F_PLUS)) *p++ = '+';
if ((sf & STRFMT_F_ZERO)) *p++ = '0';
if ((sf & STRFMT_F_SPACE)) *p++ = ' ';
if ((sf & STRFMT_F_ALT)) *p++ = '#';
if (width) {
uint8_t x = width / 10, y = width % 10;
if (x) *p++ = '0' + x;
*p++ = '0' + y;
}
if (prec != 255) {
uint8_t x = prec / 10, y = prec % 10;
*p++ = '.';
if (x) *p++ = '0' + x;
*p++ = '0' + y;
}
*p++ = (0x67666561 >> (STRFMT_FP(sf)<<3)) ^ ((sf & STRFMT_F_UPPER)?0x20:0);
*p = '\0';
p = lj_buf_more(sb, STRFMT_FMTNUMBUF);
setsbufP(sb, p + sprintf(p, fmt, n));
}
return sb;
}

84
src/lj_strfmt.h Normal file
View File

@ -0,0 +1,84 @@
/*
** String formatting.
** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_STRFMT_H
#define _LJ_STRFMT_H
#include "lj_obj.h"
typedef uint32_t SFormat; /* Format indicator. */
/* Format parser state. */
typedef struct FormatState {
const uint8_t *p; /* Current format string pointer. */
const uint8_t *e; /* End of format string. */
const char *str; /* Returned literal string. */
MSize len; /* Size of literal string. */
} FormatState;
/* Format types (max. 16). */
typedef enum FormatType {
STRFMT_EOF, STRFMT_ERR, STRFMT_LIT,
STRFMT_INT, STRFMT_UINT, STRFMT_NUM, STRFMT_STR, STRFMT_CHAR, STRFMT_PTR
} FormatType;
/* Format subtypes (bits are reused). */
#define STRFMT_T_HEX 0x0010 /* STRFMT_UINT */
#define STRFMT_T_OCT 0x0020 /* STRFMT_UINT */
#define STRFMT_T_FP_A 0x0000 /* STRFMT_NUM */
#define STRFMT_T_FP_E 0x0010 /* STRFMT_NUM */
#define STRFMT_T_FP_F 0x0020 /* STRFMT_NUM */
#define STRFMT_T_FP_G 0x0030 /* STRFMT_NUM */
#define STRFMT_T_QUOTED 0x0010 /* STRFMT_STR */
/* Format flags. */
#define STRFMT_F_LEFT 0x0100
#define STRFMT_F_PLUS 0x0200
#define STRFMT_F_ZERO 0x0400
#define STRFMT_F_SPACE 0x0800
#define STRFMT_F_ALT 0x1000
#define STRFMT_F_UPPER 0x2000
/* Format indicator fields. */
#define STRFMT_SH_WIDTH 16
#define STRFMT_SH_PREC 24
#define STRFMT_TYPE(sf) ((FormatType)((sf) & 15))
#define STRFMT_WIDTH(sf) (((sf) >> STRFMT_SH_WIDTH) & 255u)
#define STRFMT_PREC(sf) ((((sf) >> STRFMT_SH_PREC) & 255u) - 1u)
#define STRFMT_FP(sf) (((sf) >> 4) & 3)
/* Formats for conversion characters. */
#define STRFMT_A (STRFMT_NUM|STRFMT_T_FP_A)
#define STRFMT_C (STRFMT_CHAR)
#define STRFMT_D (STRFMT_INT)
#define STRFMT_E (STRFMT_NUM|STRFMT_T_FP_E)
#define STRFMT_F (STRFMT_NUM|STRFMT_T_FP_F)
#define STRFMT_G (STRFMT_NUM|STRFMT_T_FP_G)
#define STRFMT_I STRFMT_D
#define STRFMT_O (STRFMT_UINT|STRFMT_T_OCT)
#define STRFMT_P (STRFMT_PTR)
#define STRFMT_Q (STRFMT_STR|STRFMT_T_QUOTED)
#define STRFMT_S (STRFMT_STR)
#define STRFMT_U (STRFMT_UINT)
#define STRFMT_X (STRFMT_UINT|STRFMT_T_HEX)
static LJ_AINLINE void lj_strfmt_init(FormatState *fs, const char *p, MSize len)
{
fs->p = (const uint8_t *)p;
fs->e = (const uint8_t *)p + len;
lua_assert(*fs->e == 0); /* Must be NUL-terminated (may have NULs inside). */
}
LJ_FUNC SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs);
LJ_FUNC SBuf *lj_strfmt_putchar(SBuf *sb, SFormat, int32_t c);
LJ_FUNC SBuf *lj_strfmt_putstr(SBuf *sb, SFormat, GCstr *str);
LJ_FUNC SBuf *lj_strfmt_putquoted(SBuf *sb, GCstr *str);
LJ_FUNC SBuf *lj_strfmt_putxint(SBuf *sb, SFormat sf, uint64_t k);
LJ_FUNC SBuf *lj_strfmt_putnum_int(SBuf *sb, SFormat sf, lua_Number n);
LJ_FUNC SBuf *lj_strfmt_putnum_uint(SBuf *sb, SFormat sf, lua_Number n);
LJ_FUNC SBuf *lj_strfmt_putnum(SBuf *sb, SFormat, lua_Number n);
#endif

View File

@ -45,6 +45,7 @@
#include "lj_vmevent.c"
#include "lj_vmmath.c"
#include "lj_strscan.c"
#include "lj_strfmt.c"
#include "lj_api.c"
#include "lj_lex.c"
#include "lj_parse.c"